From e9022348727f8846c78d112da34a36e48eadce50 Mon Sep 17 00:00:00 2001 From: Remco Verhoef Date: Sun, 5 Jun 2022 09:08:41 +0200 Subject: [PATCH 1/5] Adding initial version of ballista-python --- .github/workflows/ballista-python.build.yml | 127 ++ .github/workflows/ballista-python.test.yaml | 106 ++ ballista-python/.dockerignore | 19 + ballista-python/.gitignore | 173 ++ ballista-python/CHANGELOG.md | 86 + ballista-python/Cargo.lock | 1607 +++++++++++++++++ ballista-python/Cargo.toml | 51 + ballista-python/LICENSE.txt | 202 +++ ballista-python/README.md | 105 ++ ballista-python/ballista/__init__.py | 120 ++ ballista-python/ballista/functions.py | 23 + ballista-python/ballista/tests/__init__.py | 16 + ballista-python/ballista/tests/conftest.py | 33 + ballista-python/ballista/tests/generic.py | 87 + .../ballista/tests/test_aggregation.py | 48 + .../ballista/tests/test_catalog.py | 40 + .../ballista/tests/test_context.py | 74 + .../ballista/tests/test_dataframe.py | 199 ++ .../ballista/tests/test_functions.py | 219 +++ .../ballista/tests/test_imports.py | 69 + .../ballista/tests/test_indexing.py | 55 + ballista-python/ballista/tests/test_sql.py | 245 +++ ballista-python/ballista/tests/test_udaf.py | 136 ++ ballista-python/dev/create_license.py | 252 +++ ballista-python/pyproject.toml | 58 + ballista-python/requirements-310.txt | 213 +++ ballista-python/requirements-37.txt | 268 +++ ballista-python/requirements.in | 27 + ballista-python/src/ballista_context.rs | 125 ++ ballista-python/src/dataframe.rs | 163 ++ ballista-python/src/errors.rs | 95 + ballista-python/src/expression.rs | 137 ++ ballista-python/src/functions.rs | 338 ++++ ballista-python/src/lib.rs | 48 + ballista-python/src/utils.rs | 30 + 35 files changed, 5594 insertions(+) create mode 100644 .github/workflows/ballista-python.build.yml create mode 100644 .github/workflows/ballista-python.test.yaml create mode 100644 ballista-python/.dockerignore create mode 100644 ballista-python/.gitignore create mode 100644 ballista-python/CHANGELOG.md create mode 100644 ballista-python/Cargo.lock create mode 100644 ballista-python/Cargo.toml create mode 100644 ballista-python/LICENSE.txt create mode 100644 ballista-python/README.md create mode 100644 ballista-python/ballista/__init__.py create mode 100644 ballista-python/ballista/functions.py create mode 100644 ballista-python/ballista/tests/__init__.py create mode 100644 ballista-python/ballista/tests/conftest.py create mode 100644 ballista-python/ballista/tests/generic.py create mode 100644 ballista-python/ballista/tests/test_aggregation.py create mode 100644 ballista-python/ballista/tests/test_catalog.py create mode 100644 ballista-python/ballista/tests/test_context.py create mode 100644 ballista-python/ballista/tests/test_dataframe.py create mode 100644 ballista-python/ballista/tests/test_functions.py create mode 100644 ballista-python/ballista/tests/test_imports.py create mode 100644 ballista-python/ballista/tests/test_indexing.py create mode 100644 ballista-python/ballista/tests/test_sql.py create mode 100644 ballista-python/ballista/tests/test_udaf.py create mode 100644 ballista-python/dev/create_license.py create mode 100644 ballista-python/pyproject.toml create mode 100644 ballista-python/requirements-310.txt create mode 100644 ballista-python/requirements-37.txt create mode 100644 ballista-python/requirements.in create mode 100644 ballista-python/src/ballista_context.rs create mode 100644 ballista-python/src/dataframe.rs create mode 100644 ballista-python/src/errors.rs create mode 100644 ballista-python/src/expression.rs create mode 100644 ballista-python/src/functions.rs create mode 100644 ballista-python/src/lib.rs create mode 100644 ballista-python/src/utils.rs diff --git a/.github/workflows/ballista-python.build.yml b/.github/workflows/ballista-python.build.yml new file mode 100644 index 000000000..e788ed3ae --- /dev/null +++ b/.github/workflows/ballista-python.build.yml @@ -0,0 +1,127 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: Python Release Build +on: + push: + tags: + - "*-rc*" + +jobs: + generate-license: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + - name: Generate license file + run: python ./dev/create_license.py + - uses: actions/upload-artifact@v2 + with: + name: python-wheel-license + path: LICENSE.txt + + build-python-mac-win: + needs: [generate-license] + name: Mac/Win + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + python-version: ["3.10"] + os: [macos-latest, windows-latest] + steps: + - uses: actions/checkout@v2 + + - uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - uses: actions-rs/toolchain@v1 + with: + toolchain: stable + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install maturin==0.12.16 + + - run: rm LICENSE.txt + - name: Download LICENSE.txt + uses: actions/download-artifact@v2 + with: + name: python-wheel-license + path: . + + - name: Build Python package + run: maturin build --release --strip --cargo-extra-args="--locked" + + - name: List Windows wheels + if: matrix.os == 'windows-latest' + run: dir target\wheels\ + + - name: List Mac wheels + if: matrix.os != 'windows-latest' + run: find target/wheels/ + + - name: Archive wheels + uses: actions/upload-artifact@v2 + with: + name: dist + path: target/wheels/* + + build-manylinux: + needs: [generate-license] + name: Manylinux + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - run: rm LICENSE.txt + - name: Download LICENSE.txt + uses: actions/download-artifact@v2 + with: + name: python-wheel-license + path: . + - run: cat LICENSE.txt + - name: Build wheels + run: | + export RUSTFLAGS='-C target-cpu=skylake' + docker run --rm -v $(pwd):/io \ + --workdir /io \ + konstin2/maturin:v0.12.16 \ + build --release --manylinux 2010 --cargo-extra-args="--locked" + - name: Archive wheels + uses: actions/upload-artifact@v2 + with: + name: dist + path: target/wheels/* + + # NOTE: PyPI publish needs to be done manually for now after release passed the vote + # release: + # name: Publish in PyPI + # needs: [build-manylinux, build-python-mac-win] + # runs-on: ubuntu-latest + # steps: + # - uses: actions/download-artifact@v2 + # - name: Publish to PyPI + # uses: pypa/gh-action-pypi-publish@master + # with: + # user: __token__ + # password: ${{ secrets.pypi_password }} diff --git a/.github/workflows/ballista-python.test.yaml b/.github/workflows/ballista-python.test.yaml new file mode 100644 index 000000000..40872ef45 --- /dev/null +++ b/.github/workflows/ballista-python.test.yaml @@ -0,0 +1,106 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: Python test +on: + push: + branches: [main] + pull_request: + branches: [main] + +concurrency: + group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} + cancel-in-progress: true + +jobs: + test-matrix: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: + - "3.10" + toolchain: + - "stable" + - "beta" + # we are not that much eager in walking on the edge yet + # - nightly + # build stable for only 3.7 + include: + - python-version: "3.7" + toolchain: "stable" + steps: + - uses: actions/checkout@v2 + + - name: Setup Rust Toolchain + uses: actions-rs/toolchain@v1 + id: rust-toolchain + with: + toolchain: ${{ matrix.toolchain }} + override: true + + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - name: Cache Cargo + uses: actions/cache@v2 + with: + path: ~/.cargo + key: cargo-cache-${{ steps.rust-toolchain.outputs.rustc_hash }}-${{ hashFiles('Cargo.lock') }} + + - name: Check Formatting + uses: actions-rs/cargo@v1 + if: ${{ matrix.python-version == '3.10' && matrix.toolchain == 'stable' }} + with: + command: fmt + args: -- --check + + - name: Run Clippy + uses: actions-rs/cargo@v1 + if: ${{ matrix.python-version == '3.10' && matrix.toolchain == 'stable' }} + with: + command: clippy + args: --all-targets --all-features -- -D clippy::all + + - name: Create Virtualenv (3.10) + if: ${{ matrix.python-version == '3.10' }} + run: | + python -m venv venv + source venv/bin/activate + pip install -r requirements-310.txt + + - name: Create Virtualenv (3.7) + if: ${{ matrix.python-version == '3.7' }} + run: | + python -m venv venv + source venv/bin/activate + pip install -r requirements-37.txt + + - name: Run Python Linters + if: ${{ matrix.python-version == '3.10' && matrix.toolchain == 'stable' }} + run: | + source venv/bin/activate + flake8 --exclude venv --ignore=E501 + black --line-length 79 --diff --check . + + - name: Run tests + run: | + source venv/bin/activate + maturin develop --cargo-extra-args='--locked' + RUST_BACKTRACE=1 pytest -v . diff --git a/ballista-python/.dockerignore b/ballista-python/.dockerignore new file mode 100644 index 000000000..08c131c2e --- /dev/null +++ b/ballista-python/.dockerignore @@ -0,0 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +target +venv diff --git a/ballista-python/.gitignore b/ballista-python/.gitignore new file mode 100644 index 000000000..050f1edbf --- /dev/null +++ b/ballista-python/.gitignore @@ -0,0 +1,173 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +# Created by https://www.toptal.com/developers/gitignore/api/python,rust +# Edit at https://www.toptal.com/developers/gitignore?templates=python,rust + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +### Rust ### +# Generated by Cargo +# will have compiled files and executables +debug/ + +# These are backup files generated by rustfmt +**/*.rs.bk + +# MSVC Windows builds of rustc generate these, which store debugging information +*.pdb + +# End of https://www.toptal.com/developers/gitignore/api/python,rust diff --git a/ballista-python/CHANGELOG.md b/ballista-python/CHANGELOG.md new file mode 100644 index 000000000..a27ad0adb --- /dev/null +++ b/ballista-python/CHANGELOG.md @@ -0,0 +1,86 @@ + + +# Changelog + +## [Unreleased](https://github.com/datafusion-contrib/datafusion-python/tree/HEAD) + +[Full Changelog](https://github.com/datafusion-contrib/datafusion-python/compare/0.5.1...HEAD) + +**Merged pull requests:** + +- use \_\_getitem\_\_ for df column selection [\#41](https://github.com/datafusion-contrib/datafusion-python/pull/41) ([Jimexist](https://github.com/Jimexist)) +- fix demo in readme [\#40](https://github.com/datafusion-contrib/datafusion-python/pull/40) ([Jimexist](https://github.com/Jimexist)) +- Implement select_columns [\#39](https://github.com/datafusion-contrib/datafusion-python/pull/39) ([andygrove](https://github.com/andygrove)) +- update readme and changelog [\#38](https://github.com/datafusion-contrib/datafusion-python/pull/38) ([Jimexist](https://github.com/Jimexist)) +- Add PyDataFrame.explain [\#36](https://github.com/datafusion-contrib/datafusion-python/pull/36) ([andygrove](https://github.com/andygrove)) +- Release 0.5.0 [\#34](https://github.com/datafusion-contrib/datafusion-python/pull/34) ([Jimexist](https://github.com/Jimexist)) +- disable nightly in workflow [\#33](https://github.com/datafusion-contrib/datafusion-python/pull/33) ([Jimexist](https://github.com/Jimexist)) +- update requirements to 37 and 310, update readme [\#32](https://github.com/datafusion-contrib/datafusion-python/pull/32) ([Jimexist](https://github.com/Jimexist)) +- Add custom global allocator [\#30](https://github.com/datafusion-contrib/datafusion-python/pull/30) ([matthewmturner](https://github.com/matthewmturner)) +- Remove pandas dependency [\#25](https://github.com/datafusion-contrib/datafusion-python/pull/25) ([matthewmturner](https://github.com/matthewmturner)) +- upgrade datafusion and pyo3 [\#20](https://github.com/datafusion-contrib/datafusion-python/pull/20) ([Jimexist](https://github.com/Jimexist)) +- update maturin 0.12+ [\#17](https://github.com/datafusion-contrib/datafusion-python/pull/17) ([Jimexist](https://github.com/Jimexist)) +- Update README.md [\#16](https://github.com/datafusion-contrib/datafusion-python/pull/16) ([Jimexist](https://github.com/Jimexist)) +- apply cargo clippy --fix [\#15](https://github.com/datafusion-contrib/datafusion-python/pull/15) ([Jimexist](https://github.com/Jimexist)) +- update test workflow to include rust clippy and check [\#14](https://github.com/datafusion-contrib/datafusion-python/pull/14) ([Jimexist](https://github.com/Jimexist)) +- use maturin 0.12.6 [\#13](https://github.com/datafusion-contrib/datafusion-python/pull/13) ([Jimexist](https://github.com/Jimexist)) +- apply cargo fmt [\#12](https://github.com/datafusion-contrib/datafusion-python/pull/12) ([Jimexist](https://github.com/Jimexist)) +- use stable not nightly [\#11](https://github.com/datafusion-contrib/datafusion-python/pull/11) ([Jimexist](https://github.com/Jimexist)) +- ci: test against more compilers, setup clippy and fix clippy lints [\#9](https://github.com/datafusion-contrib/datafusion-python/pull/9) ([cpcloud](https://github.com/cpcloud)) +- Fix use of importlib.metadata and unify requirements.txt [\#8](https://github.com/datafusion-contrib/datafusion-python/pull/8) ([cpcloud](https://github.com/cpcloud)) +- Ship the Cargo.lock file in the source distribution [\#7](https://github.com/datafusion-contrib/datafusion-python/pull/7) ([cpcloud](https://github.com/cpcloud)) +- add \_\_version\_\_ attribute to datafusion object [\#3](https://github.com/datafusion-contrib/datafusion-python/pull/3) ([tfeda](https://github.com/tfeda)) +- fix ci by fixing directories [\#2](https://github.com/datafusion-contrib/datafusion-python/pull/2) ([Jimexist](https://github.com/Jimexist)) +- setup workflow [\#1](https://github.com/datafusion-contrib/datafusion-python/pull/1) ([Jimexist](https://github.com/Jimexist)) + +## [0.5.1](https://github.com/datafusion-contrib/datafusion-python/tree/0.5.1) (2022-03-15) + +[Full Changelog](https://github.com/datafusion-contrib/datafusion-python/compare/0.5.1-rc1...0.5.1) + +## [0.5.1-rc1](https://github.com/datafusion-contrib/datafusion-python/tree/0.5.1-rc1) (2022-03-15) + +[Full Changelog](https://github.com/datafusion-contrib/datafusion-python/compare/0.5.0...0.5.1-rc1) + +## [0.5.0](https://github.com/datafusion-contrib/datafusion-python/tree/0.5.0) (2022-03-10) + +[Full Changelog](https://github.com/datafusion-contrib/datafusion-python/compare/0.5.0-rc2...0.5.0) + +## [0.5.0-rc2](https://github.com/datafusion-contrib/datafusion-python/tree/0.5.0-rc2) (2022-03-10) + +[Full Changelog](https://github.com/datafusion-contrib/datafusion-python/compare/0.5.0-rc1...0.5.0-rc2) + +**Closed issues:** + +- Add support for Ballista [\#37](https://github.com/datafusion-contrib/datafusion-python/issues/37) +- Implement DataFrame.explain [\#35](https://github.com/datafusion-contrib/datafusion-python/issues/35) + +## [0.5.0-rc1](https://github.com/datafusion-contrib/datafusion-python/tree/0.5.0-rc1) (2022-03-09) + +[Full Changelog](https://github.com/datafusion-contrib/datafusion-python/compare/4c98b8e9c3c3f8e2e6a8f2d1ffcfefda344c4680...0.5.0-rc1) + +**Closed issues:** + +- Investigate exposing additional optimizations [\#28](https://github.com/datafusion-contrib/datafusion-python/issues/28) +- Use custom allocator in Python build [\#27](https://github.com/datafusion-contrib/datafusion-python/issues/27) +- Why is pandas a requirement? [\#24](https://github.com/datafusion-contrib/datafusion-python/issues/24) +- Unable to build [\#18](https://github.com/datafusion-contrib/datafusion-python/issues/18) +- Setup CI against multiple Python version [\#6](https://github.com/datafusion-contrib/datafusion-python/issues/6) + +\* _This Changelog was automatically generated by [github_changelog_generator](https://github.com/github-changelog-generator/github-changelog-generator)_ diff --git a/ballista-python/Cargo.lock b/ballista-python/Cargo.lock new file mode 100644 index 000000000..e71932b3b --- /dev/null +++ b/ballista-python/Cargo.lock @@ -0,0 +1,1607 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "ahash" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" +dependencies = [ + "getrandom 0.2.6", + "once_cell", + "version_check", +] + +[[package]] +name = "aho-corasick" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" +dependencies = [ + "memchr", +] + +[[package]] +name = "alloc-no-stdlib" +version = "2.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35ef4730490ad1c4eae5c4325b2a95f521d023e5c885853ff7aca0a6a1631db3" + +[[package]] +name = "alloc-stdlib" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "697ed7edc0f1711de49ce108c541623a0af97c6c60b2f6e2b65229847ac843c2" +dependencies = [ + "alloc-no-stdlib", +] + +[[package]] +name = "arrayref" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544" + +[[package]] +name = "arrayvec" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" + +[[package]] +name = "arrow" +version = "13.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c6bee230122beb516ead31935a61f683715f987c6f003eff44ad6986624105a" +dependencies = [ + "bitflags", + "chrono", + "comfy-table", + "csv", + "flatbuffers", + "half", + "hex", + "indexmap", + "lazy_static", + "lexical-core", + "multiversion", + "num", + "pyo3", + "rand 0.8.5", + "regex", + "serde", + "serde_derive", + "serde_json", +] + +[[package]] +name = "async-trait" +version = "0.1.53" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed6aa3524a2dfcf9fe180c51eae2b58738348d819517ceadf95789c51fff7600" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "base64" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "blake2" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9cf849ee05b2ee5fba5e36f97ff8ec2533916700fc0758d40d92136a42f3388" +dependencies = [ + "digest", +] + +[[package]] +name = "blake3" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a08e53fc5a564bb15bfe6fae56bd71522205f1f91893f9c0116edad6496c183f" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq", + "digest", +] + +[[package]] +name = "block-buffer" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf7fe51849ea569fd452f37822f606a5cabb684dc918707a0193fd4664ff324" +dependencies = [ + "generic-array", +] + +[[package]] +name = "brotli" +version = "3.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1a0b1dbcc8ae29329621f8d4f0d835787c1c38bb1401979b49d13b0b305ff68" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", + "brotli-decompressor", +] + +[[package]] +name = "brotli-decompressor" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ad2d4653bf5ca36ae797b1f4bb4dbddb60ce49ca4aed8a2ce4829f60425b80" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", +] + +[[package]] +name = "bstr" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" +dependencies = [ + "lazy_static", + "memchr", + "regex-automata", + "serde", +] + +[[package]] +name = "byteorder" +version = "1.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" + +[[package]] +name = "cc" +version = "1.0.73" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" +dependencies = [ + "jobserver", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "chrono" +version = "0.4.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73" +dependencies = [ + "libc", + "num-integer", + "num-traits", + "winapi", +] + +[[package]] +name = "comfy-table" +version = "5.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b103d85ca6e209388771bfb7aa6b68a7aeec4afbf6f0a0264bfbf50360e5212e" +dependencies = [ + "strum", + "strum_macros", + "unicode-width", +] + +[[package]] +name = "constant_time_eq" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc" + +[[package]] +name = "cpufeatures" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59a6001667ab124aebae2a495118e11d30984c3a653e99d86d58971708cf5e4b" +dependencies = [ + "libc", +] + +[[package]] +name = "crc32fast" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crypto-common" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57952ca27b5e3606ff4dd79b0020231aaf9d6aa76dc05fd30137538c50bd3ce8" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "csv" +version = "1.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1" +dependencies = [ + "bstr", + "csv-core", + "itoa 0.4.8", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" +dependencies = [ + "memchr", +] + +[[package]] +name = "datafusion" +version = "8.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8174c458d0266ba442038160fad2c98f02924e6179d6d46175f600b69abb5bb7" +dependencies = [ + "ahash", + "arrow", + "async-trait", + "chrono", + "datafusion-common", + "datafusion-data-access", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-row", + "futures", + "hashbrown 0.12.1", + "lazy_static", + "log", + "num_cpus", + "ordered-float 3.0.0", + "parking_lot", + "parquet", + "paste", + "pin-project-lite", + "pyo3", + "rand 0.8.5", + "smallvec", + "sqlparser", + "tempfile", + "tokio", + "tokio-stream", + "uuid 1.0.0", +] + +[[package]] +name = "datafusion-common" +version = "8.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c687e12a58d1499b19ee899fa467d122c8cc9223570af6f3eb3c4d9d9be929b" +dependencies = [ + "arrow", + "ordered-float 3.0.0", + "parquet", + "pyo3", + "sqlparser", +] + +[[package]] +name = "datafusion-data-access" +version = "8.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6cca9e1200ddd362d97f22d185e98995ebd3737d2b3d69a200a06a5099fa699a" +dependencies = [ + "async-trait", + "chrono", + "futures", + "glob", + "parking_lot", + "tempfile", + "tokio", +] + +[[package]] +name = "datafusion-expr" +version = "8.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d20909d70931b88605b6f121c0ed820cec1d5b802cb51b7b5759f0421be3add8" +dependencies = [ + "ahash", + "arrow", + "datafusion-common", + "sqlparser", +] + +[[package]] +name = "datafusion-physical-expr" +version = "8.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad9083cb20b57216430d5b8e52341782d9520ce77e65c60803e330fd09bdfa6e" +dependencies = [ + "ahash", + "arrow", + "blake2", + "blake3", + "chrono", + "datafusion-common", + "datafusion-expr", + "datafusion-row", + "hashbrown 0.12.1", + "lazy_static", + "md-5", + "ordered-float 3.0.0", + "paste", + "rand 0.8.5", + "regex", + "sha2", + "unicode-segmentation", +] + +[[package]] +name = "datafusion-python" +version = "0.6.0" +dependencies = [ + "datafusion", + "datafusion-common", + "datafusion-expr", + "mimalloc", + "pyo3", + "rand 0.7.3", + "tokio", + "uuid 0.8.2", +] + +[[package]] +name = "datafusion-row" +version = "8.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f649a2021eefef44e0bef6782d053148b2fef74db7592fecfe87e042d52947a" +dependencies = [ + "arrow", + "datafusion-common", + "paste", + "rand 0.8.5", +] + +[[package]] +name = "digest" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2fb860ca6fafa5552fb6d0e816a69c8e49f0908bf524e30a90d97c85892d506" +dependencies = [ + "block-buffer", + "crypto-common", + "subtle", +] + +[[package]] +name = "fastrand" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3fcf0cee53519c866c09b5de1f6c56ff9d647101f81c1964fa632e148896cdf" +dependencies = [ + "instant", +] + +[[package]] +name = "flatbuffers" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b428b715fdbdd1c364b84573b5fdc0f84f8e423661b9f398735278bc7f2b6a" +dependencies = [ + "bitflags", + "smallvec", + "thiserror", +] + +[[package]] +name = "flate2" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39522e96686d38f4bc984b9198e3a0613264abaebaff2c5c918bfa6b6da09af" +dependencies = [ + "cfg-if", + "crc32fast", + "libc", + "miniz_oxide", +] + +[[package]] +name = "futures" +version = "0.3.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f73fe65f54d1e12b726f517d3e2135ca3125a437b6d998caf1962961f7172d9e" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3083ce4b914124575708913bca19bfe887522d6e2e6d0952943f5eac4a74010" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c09fd04b7e4073ac7156a9539b57a484a8ea920f79c7c675d05d289ab6110d3" + +[[package]] +name = "futures-executor" +version = "0.3.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9420b90cfa29e327d0429f19be13e7ddb68fa1cccb09d65e5706b8c7a749b8a6" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc4045962a5a5e935ee2fdedaa4e08284547402885ab326734432bed5d12966b" + +[[package]] +name = "futures-macro" +version = "0.3.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33c1e13800337f4d4d7a316bf45a567dbcb6ffe087f16424852d97e97a91f512" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "futures-sink" +version = "0.3.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21163e139fa306126e6eedaf49ecdb4588f939600f0b1e770f4205ee4b7fa868" + +[[package]] +name = "futures-task" +version = "0.3.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c66a976bf5909d801bbef33416c41372779507e7a6b3a5e25e4749c58f776a" + +[[package]] +name = "futures-util" +version = "0.3.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8b7abd5d659d9b90c8cba917f6ec750a74e2dc23902ef9cd4cc8c8b22e6036a" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + +[[package]] +name = "generic-array" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd48d33ec7f05fbfa152300fdad764757cbded343c1aa1cff2fbaf4134851803" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.9.0+wasi-snapshot-preview1", +] + +[[package]] +name = "getrandom" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9be70c98951c83b8d2f8f60d7065fa6d5146873094452a1008da8c2f1e4205ad" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.10.2+wasi-snapshot-preview1", +] + +[[package]] +name = "glob" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" + +[[package]] +name = "half" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" + +[[package]] +name = "hashbrown" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" + +[[package]] +name = "hashbrown" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db0d4cf898abf0081f964436dc980e96670a0f36863e4b83aaacdb65c9d7ccc3" +dependencies = [ + "ahash", +] + +[[package]] +name = "heck" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" +dependencies = [ + "unicode-segmentation", +] + +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "indexmap" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f647032dfaa1f8b6dc29bd3edb7bbef4861b8b8007ebb118d6db284fd59f6ee" +dependencies = [ + "autocfg", + "hashbrown 0.11.2", +] + +[[package]] +name = "indoc" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05a0bd019339e5d968b37855180087b7b9d512c5046fbd244cf8c95687927d6e" + +[[package]] +name = "instant" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "integer-encoding" +version = "1.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48dc51180a9b377fd75814d0cc02199c20f8e99433d6762f650d39cdbbd3b56f" + +[[package]] +name = "itoa" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" + +[[package]] +name = "itoa" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "112c678d4050afce233f4f2852bb2eb519230b3cf12f33585275537d7e41578d" + +[[package]] +name = "jobserver" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af25a77299a7f711a01975c35a6a424eb6862092cc2d6c72c4ed6cbc56dfc1fa" +dependencies = [ + "libc", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "lexical-core" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92912c4af2e7d9075be3e5e3122c4d7263855fa6cce34fbece4dd08e5884624d" +dependencies = [ + "lexical-parse-float", + "lexical-parse-integer", + "lexical-util", + "lexical-write-float", + "lexical-write-integer", +] + +[[package]] +name = "lexical-parse-float" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f518eed87c3be6debe6d26b855c97358d8a11bf05acec137e5f53080f5ad2dd8" +dependencies = [ + "lexical-parse-integer", + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-parse-integer" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "afc852ec67c6538bbb2b9911116a385b24510e879a69ab516e6a151b15a79168" +dependencies = [ + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-util" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c72a9d52c5c4e62fa2cdc2cb6c694a39ae1382d9c2a17a466f18e272a0930eb1" +dependencies = [ + "static_assertions", +] + +[[package]] +name = "lexical-write-float" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a89ec1d062e481210c309b672f73a0567b7855f21e7d2fae636df44d12e97f9" +dependencies = [ + "lexical-util", + "lexical-write-integer", + "static_assertions", +] + +[[package]] +name = "lexical-write-integer" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "094060bd2a7c2ff3a16d5304a6ae82727cb3cc9d1c70f813cc73f744c319337e" +dependencies = [ + "lexical-util", + "static_assertions", +] + +[[package]] +name = "libc" +version = "0.2.126" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" + +[[package]] +name = "libmimalloc-sys" +version = "0.1.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11ca136052550448f55df7898c6dbe651c6b574fe38a0d9ea687a9f8088a2e2c" +dependencies = [ + "cc", +] + +[[package]] +name = "lock_api" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "327fa5b6a6940e4699ec49a9beae1ea4845c6bab9314e4f84ac68742139d8c53" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "lz4" +version = "1.23.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4edcb94251b1c375c459e5abe9fb0168c1c826c3370172684844f8f3f8d1a885" +dependencies = [ + "libc", + "lz4-sys", +] + +[[package]] +name = "lz4-sys" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7be8908e2ed6f31c02db8a9fa962f03e36c53fbfde437363eae3306b85d7e17" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "md-5" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "658646b21e0b72f7866c7038ab086d3d5e1cd6271f060fd37defb241949d0582" +dependencies = [ + "digest", +] + +[[package]] +name = "memchr" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" + +[[package]] +name = "mimalloc" +version = "0.1.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f64ad83c969af2e732e907564deb0d0ed393cec4af80776f77dd77a1a427698" +dependencies = [ + "libmimalloc-sys", +] + +[[package]] +name = "miniz_oxide" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2b29bd4bc3f33391105ebee3589c19197c4271e3e5a9ec9bfe8127eeff8f082" +dependencies = [ + "adler", +] + +[[package]] +name = "multiversion" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "025c962a3dd3cc5e0e520aa9c612201d127dcdf28616974961a649dca64f5373" +dependencies = [ + "multiversion-macros", +] + +[[package]] +name = "multiversion-macros" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8a3e2bde382ebf960c1f3e79689fa5941625fe9bf694a1cb64af3e85faff3af" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "num" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43db66d1170d347f9a065114077f7dccb00c1b9478c89384490a3425279a4606" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f93ab6289c7b344a8a9f60f88d80aa20032336fe78da341afc91c8a2341fc75f" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-complex" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97fbc387afefefd5e9e39493299f3069e14a140dd34dc19b4c1c1a8fddb6a790" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" +dependencies = [ + "autocfg", + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d03e6c028c5dc5cac6e2dec0efda81fc887605bb3d884578bb6d6bf7514e252" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d41702bd167c2df5520b384281bc111a4b5efcf7fbc4c9c222c815b07e0a6a6a" +dependencies = [ + "autocfg", + "num-bigint", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" +dependencies = [ + "autocfg", +] + +[[package]] +name = "num_cpus" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "once_cell" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87f3e037eac156d1775da914196f0f37741a274155e34a0b7e427c35d2a2ecb9" + +[[package]] +name = "ordered-float" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3305af35278dd29f46fcdd139e0b1fbfae2153f0e5928b39b035542dd31e37b7" +dependencies = [ + "num-traits", +] + +[[package]] +name = "ordered-float" +version = "3.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96bcbab4bfea7a59c2c0fe47211a1ac4e3e96bea6eb446d704f310bc5c732ae2" +dependencies = [ + "num-traits", +] + +[[package]] +name = "parking_lot" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87f5ec2493a61ac0506c0f4199f99070cbe83857b0337006a30f3e6719b8ef58" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09a279cbf25cb0757810394fbc1e359949b59e348145c643a939a525692e6929" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-sys", +] + +[[package]] +name = "parquet" +version = "13.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c6d737baed48775e87a69aa262f1fa2f1d6bd074dedbe9cac244b9aabf2a0b4" +dependencies = [ + "arrow", + "base64", + "brotli", + "byteorder", + "chrono", + "flate2", + "lz4", + "num", + "num-bigint", + "parquet-format", + "rand 0.8.5", + "snap", + "thrift", + "zstd", +] + +[[package]] +name = "parquet-format" +version = "4.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f0c06cdcd5460967c485f9c40a821746f5955ad81990533c7fae95dbd9bc0b5" +dependencies = [ + "thrift", +] + +[[package]] +name = "paste" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c520e05135d6e763148b6426a837e239041653ba7becd2e538c076c738025fc" + +[[package]] +name = "pin-project-lite" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "ppv-lite86" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872" + +[[package]] +name = "proc-macro2" +version = "1.0.39" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c54b25569025b7fc9651de43004ae593a75ad88543b17178aa5e1b9c4f15f56f" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "pyo3" +version = "0.16.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e6302e85060011447471887705bb7838f14aba43fcb06957d823739a496b3dc" +dependencies = [ + "cfg-if", + "indoc", + "libc", + "parking_lot", + "pyo3-build-config", + "pyo3-ffi", + "pyo3-macros", + "unindent", +] + +[[package]] +name = "pyo3-build-config" +version = "0.16.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5b65b546c35d8a3b1b2f0ddbac7c6a569d759f357f2b9df884f5d6b719152c8" +dependencies = [ + "once_cell", + "target-lexicon", +] + +[[package]] +name = "pyo3-ffi" +version = "0.16.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c275a07127c1aca33031a563e384ffdd485aee34ef131116fcd58e3430d1742b" +dependencies = [ + "libc", + "pyo3-build-config", +] + +[[package]] +name = "pyo3-macros" +version = "0.16.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "284fc4485bfbcc9850a6d661d627783f18d19c2ab55880b021671c4ba83e90f7" +dependencies = [ + "proc-macro2", + "pyo3-macros-backend", + "quote", + "syn", +] + +[[package]] +name = "pyo3-macros-backend" +version = "0.16.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53bda0f58f73f5c5429693c96ed57f7abdb38fdfc28ae06da4101a257adb7faf" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "quote" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1feb54ed693b93a84e14094943b84b7c4eae204c512b7ccb95ab0c66d278ad1" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" +dependencies = [ + "getrandom 0.1.16", + "libc", + "rand_chacha 0.2.2", + "rand_core 0.5.1", + "rand_hc", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha 0.3.1", + "rand_core 0.6.3", +] + +[[package]] +name = "rand_chacha" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" +dependencies = [ + "ppv-lite86", + "rand_core 0.5.1", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core 0.6.3", +] + +[[package]] +name = "rand_core" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" +dependencies = [ + "getrandom 0.1.16", +] + +[[package]] +name = "rand_core" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" +dependencies = [ + "getrandom 0.2.6", +] + +[[package]] +name = "rand_hc" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" +dependencies = [ + "rand_core 0.5.1", +] + +[[package]] +name = "redox_syscall" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62f25bc4c7e55e0b0b7a1d43fb893f4fa1361d0abe38b9ce4f323c2adfe6ef42" +dependencies = [ + "bitflags", +] + +[[package]] +name = "regex" +version = "1.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a11647b6b25ff05a515cb92c365cec08801e83423a235b51e231e1808747286" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" + +[[package]] +name = "regex-syntax" +version = "0.6.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" + +[[package]] +name = "remove_dir_all" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" +dependencies = [ + "winapi", +] + +[[package]] +name = "rustversion" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2cc38e8fa666e2de3c4aba7edeb5ffc5246c1c2ed0e3d17e560aeeba736b23f" + +[[package]] +name = "ryu" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3f6f92acf49d1b98f7a81226834412ada05458b7364277387724a237f062695" + +[[package]] +name = "scopeguard" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" + +[[package]] +name = "serde" +version = "1.0.137" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61ea8d54c77f8315140a05f4c7237403bf38b72704d031543aa1d16abbf517d1" + +[[package]] +name = "serde_derive" +version = "1.0.137" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f26faba0c3959972377d3b2d306ee9f71faee9714294e41bb777f83f88578be" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.81" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b7ce2b32a1aed03c558dc61a5cd328f15aff2dbc17daad8fb8af04d2100e15c" +dependencies = [ + "indexmap", + "itoa 1.0.2", + "ryu", + "serde", +] + +[[package]] +name = "sha2" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55deaec60f81eefe3cce0dc50bda92d6d8e88f2a27df7c5033b42afeb1ed2676" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "slab" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb703cfe953bccee95685111adeedb76fabe4e97549a58d16f03ea7b9367bb32" + +[[package]] +name = "smallvec" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" + +[[package]] +name = "snap" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45456094d1983e2ee2a18fdfebce3189fa451699d0502cb8e3b49dba5ba41451" + +[[package]] +name = "sqlparser" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddc2739f3a9bfc68e2f7b7695589f6cb0181c88af73ceaee0c84215cd2a2ae28" +dependencies = [ + "log", +] + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "strum" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cae14b91c7d11c9a851d3fbc80a963198998c2a64eec840477fa92d8ce9b70bb" + +[[package]] +name = "strum_macros" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5bb0dc7ee9c15cea6199cde9a127fa16a4c5819af85395457ad72d68edc85a38" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "rustversion", + "syn", +] + +[[package]] +name = "subtle" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601" + +[[package]] +name = "syn" +version = "1.0.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbaf6116ab8924f39d52792136fb74fd60a80194cf1b1c6ffa6453eef1c3f942" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "target-lexicon" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7fa7e55043acb85fca6b3c01485a2eeb6b69c5d21002e273c79e465f43b7ac1" + +[[package]] +name = "tempfile" +version = "3.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cdb1ef4eaeeaddc8fbd371e5017057064af0911902ef36b39801f67cc6d79e4" +dependencies = [ + "cfg-if", + "fastrand", + "libc", + "redox_syscall", + "remove_dir_all", + "winapi", +] + +[[package]] +name = "thiserror" +version = "1.0.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd829fe32373d27f76265620b5309d0340cb8550f523c1dda251d6298069069a" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0396bc89e626244658bef819e22d0cc459e795a5ebe878e6ec336d1674a8d79a" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "threadpool" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d050e60b33d41c19108b32cea32164033a9013fe3b46cbd4457559bfbf77afaa" +dependencies = [ + "num_cpus", +] + +[[package]] +name = "thrift" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c6d965454947cc7266d22716ebfd07b18d84ebaf35eec558586bbb2a8cb6b5b" +dependencies = [ + "byteorder", + "integer-encoding", + "log", + "ordered-float 1.1.1", + "threadpool", +] + +[[package]] +name = "tokio" +version = "1.18.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4903bf0427cf68dddd5aa6a93220756f8be0c34fcfa9f5e6191e103e15a31395" +dependencies = [ + "num_cpus", + "once_cell", + "parking_lot", + "pin-project-lite", + "tokio-macros", +] + +[[package]] +name = "tokio-macros" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b557f72f448c511a979e2564e55d74e6c4432fc96ff4f6241bc6bded342643b7" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tokio-stream" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50145484efff8818b5ccd256697f36863f587da82cf8b409c53adf1e840798e3" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "typenum" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987" + +[[package]] +name = "unicode-ident" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d22af068fba1eb5edcb4aea19d382b2a3deb4c8f9d475c589b6ada9e0fd493ee" + +[[package]] +name = "unicode-segmentation" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e8820f5d777f6224dc4be3632222971ac30164d4a258d595640799554ebfd99" + +[[package]] +name = "unicode-width" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973" + +[[package]] +name = "unindent" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52fee519a3e570f7df377a06a1a7775cdbfb7aa460be7e08de2b1f0e69973a44" + +[[package]] +name = "uuid" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7" +dependencies = [ + "getrandom 0.2.6", +] + +[[package]] +name = "uuid" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8cfcd319456c4d6ea10087ed423473267e1a071f3bc0aa89f80d60997843c6f0" +dependencies = [ + "getrandom 0.2.6", +] + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "wasi" +version = "0.9.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" + +[[package]] +name = "wasi" +version = "0.10.2+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-sys" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2" +dependencies = [ + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_msvc" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47" + +[[package]] +name = "windows_i686_gnu" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6" + +[[package]] +name = "windows_i686_msvc" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" + +[[package]] +name = "zstd" +version = "0.11.2+zstd.1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "5.0.2+zstd.1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d2a5585e04f9eea4b2a3d1eca508c4dee9592a89ef6f450c11719da0726f4db" +dependencies = [ + "libc", + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.1+zstd.1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fd07cbbc53846d9145dbffdf6dd09a7a0aa52be46741825f5c97bdd4f73f12b" +dependencies = [ + "cc", + "libc", +] diff --git a/ballista-python/Cargo.toml b/ballista-python/Cargo.toml new file mode 100644 index 000000000..c9edb2d6f --- /dev/null +++ b/ballista-python/Cargo.toml @@ -0,0 +1,51 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[package] +name = "ballista-python" +version = "0.6.0" +homepage = "https://github.com/apache/arrow" +repository = "https://github.com/apache/arrow" +authors = ["Apache Arrow "] +description = "Build and run queries against data" +readme = "README.md" +license = "Apache-2.0" +edition = "2021" +rust-version = "1.57" + +[dependencies] +tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync"] } +rand = "0.7" +pyo3 = { version = "~0.16.5", features = ["extension-module", "abi3", "abi3-py37"] } +datafusion = {git="https://github.com/apache/arrow-datafusion", rev = "585bc3a629b92ea7a86ebfe8bf762dbef4155710", features=["pyarrow"]} +datafusion-expr = {git="https://github.com/apache/arrow-datafusion", rev = "585bc3a629b92ea7a86ebfe8bf762dbef4155710"} +datafusion-common = {git="https://github.com/apache/arrow-datafusion", rev = "585bc3a629b92ea7a86ebfe8bf762dbef4155710"} +uuid = { version = "0.8", features = ["v4"] } + +mimalloc = { version = "*", default-features = false } +ballista = { git = "https://github.com/apache/arrow-ballista", rev = "9e78b8ecd55cc3e9d46387c87098c4e6625294eb"} + +[lib] +name = "ballista_python" +crate-type = ["cdylib", "rlib"] + +[package.metadata.maturin] +name = "ballista._internal" + +[profile.release] +lto = true +codegen-units = 1 diff --git a/ballista-python/LICENSE.txt b/ballista-python/LICENSE.txt new file mode 100644 index 000000000..d64569567 --- /dev/null +++ b/ballista-python/LICENSE.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/ballista-python/README.md b/ballista-python/README.md new file mode 100644 index 000000000..a9d7b873e --- /dev/null +++ b/ballista-python/README.md @@ -0,0 +1,105 @@ + + +# Ballista in Python + +[![Python test](https://github.com/datafusion-contrib/datafusion-python/actions/workflows/test.yaml/badge.svg)](https://github.com/datafusion-contrib/datafusion-python/actions/workflows/test.yaml) + +This is a Python library that binds to [Apache Arrow](https://arrow.apache.org/) in-memory query engine [DataFusion](https://github.com/apache/arrow-datafusion). + +Like pyspark, it allows you to build a plan through SQL or a DataFrame API against in-memory data, parquet or CSV files, run it in a multi-threaded environment, and obtain the result back in Python. + +The major advantage of this library over other execution engines is that this library achieves zero-copy between Python and its execution engine: there is no cost in using UDFs, UDAFs, and collecting the results to Python apart from having to lock the GIL when running those operations. + +Its query engine, DataFusion, is written in [Rust](https://www.rust-lang.org/), which makes strong assumptions about thread safety and lack of memory leaks. + +Technically, zero-copy is achieved via the [c data interface](https://arrow.apache.org/docs/format/CDataInterface.html). + +## How to use it + +Simple usage: + +```python +import ballista +ctx = ballista.BallistaContext(host="host.docker.internal") +ctx.register_parquet("table", "/data") +df = ctx.sql("SELECT * FROM table LIMIT 10") +df.show() +``` + +## How to install (from pip) + +```bash +pip install ballista +# or +python -m pip install ballista +``` + +You can verify the installation by running: + +```python +>>> import ballista +>>> ballista.__version__ +'0.6.0' +``` + +## How to develop + +This assumes that you have rust and cargo installed. We use the workflow recommended by [pyo3](https://github.com/PyO3/pyo3) and [maturin](https://github.com/PyO3/maturin). + +Bootstrap: + +```bash +# fetch this repo +git clone git@github.com:datafusion-contrib/datafusion-python.git +# prepare development environment (used to build wheel / install in development) +python3 -m venv venv +# activate the venv +source venv/bin/activate +# update pip itself if necessary +python -m pip install -U pip +# install dependencies (for Python 3.8+) +python -m pip install -r requirements-310.txt +``` + +Whenever rust code changes (your changes or via `git pull`): + +```bash +# make sure you activate the venv using "source venv/bin/activate" first +maturin develop +python -m pytest +``` + +## How to update dependencies + +To change test dependencies, change the `requirements.in` and run + +```bash +# install pip-tools (this can be done only once), also consider running in venv +python -m pip install pip-tools +python -m piptools compile --generate-hashes -o requirements-310.txt +``` + +To update dependencies, run with `-U` + +```bash +python -m piptools compile -U --generate-hashes -o requirements-310.txt +``` + +More details [here](https://github.com/jazzband/pip-tools) diff --git a/ballista-python/ballista/__init__.py b/ballista-python/ballista/__init__.py new file mode 100644 index 000000000..96108f9c6 --- /dev/null +++ b/ballista-python/ballista/__init__.py @@ -0,0 +1,120 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from abc import ABCMeta, abstractmethod +from typing import List + +try: + import importlib.metadata as importlib_metadata +except ImportError: + import importlib_metadata + + +import datafusion + +import pyarrow as pa + +from ._internal import ( + DataFrame, + BallistaContext, + Expression, +) + + +__version__ = importlib_metadata.version(__name__) + + +__all__ = [ + "DataFrame", + "BallistaContext", + "Expression", + "AggregateUDF", + "ScalarUDF", + "column", + "literal", +] + + +class Accumulator(metaclass=ABCMeta): + @abstractmethod + def state(self) -> List[pa.Scalar]: + pass + + @abstractmethod + def update(self, values: pa.Array) -> None: + pass + + @abstractmethod + def merge(self, states: pa.Array) -> None: + pass + + @abstractmethod + def evaluate(self) -> pa.Scalar: + pass + + +def column(value): + return Expression.column(value) + + +col = column + + +def literal(value): + if not isinstance(value, pa.Scalar): + value = pa.scalar(value) + return Expression.literal(value) + + +lit = literal + + +def udf(func, input_types, return_type, volatility, name=None): + """ + Create a new User Defined Function + """ + if not callable(func): + raise TypeError("`func` argument must be callable") + if name is None: + name = func.__qualname__ + return datafusion.ScalarUDF( + name=name, + func=func, + input_types=input_types, + return_type=return_type, + volatility=volatility, + ) + + +def udaf(accum, input_type, return_type, state_type, volatility, name=None): + """ + Create a new User Defined Aggregate Function + """ + if not issubclass(accum, Accumulator): + raise TypeError( + "`accum` must implement the abstract base class Accumulator" + ) + if name is None: + name = accum.__qualname__ + return datafusion.AggregateUDF( + name=name, + accumulator=accum, + input_type=input_type, + return_type=return_type, + state_type=state_type, + volatility=volatility, + ) diff --git a/ballista-python/ballista/functions.py b/ballista-python/ballista/functions.py new file mode 100644 index 000000000..782ecba22 --- /dev/null +++ b/ballista-python/ballista/functions.py @@ -0,0 +1,23 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +from ._internal import functions + + +def __getattr__(name): + return getattr(functions, name) diff --git a/ballista-python/ballista/tests/__init__.py b/ballista-python/ballista/tests/__init__.py new file mode 100644 index 000000000..13a83393a --- /dev/null +++ b/ballista-python/ballista/tests/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/ballista-python/ballista/tests/conftest.py b/ballista-python/ballista/tests/conftest.py new file mode 100644 index 000000000..93662a082 --- /dev/null +++ b/ballista-python/ballista/tests/conftest.py @@ -0,0 +1,33 @@ +import pytest +from datafusion import SessionContext +import pyarrow as pa + + +@pytest.fixture +def ctx(): + return SessionContext() + + +@pytest.fixture +def database(ctx, tmp_path): + path = tmp_path / "test.csv" + + table = pa.Table.from_arrays( + [ + [1, 2, 3, 4], + ["a", "b", "c", "d"], + [1.1, 2.2, 3.3, 4.4], + ], + names=["int", "str", "float"], + ) + pa.csv.write_csv(table, path) + + ctx.register_csv("csv", path) + ctx.register_csv("csv1", str(path)) + ctx.register_csv( + "csv2", + path, + has_header=True, + delimiter=",", + schema_infer_max_records=10, + ) diff --git a/ballista-python/ballista/tests/generic.py b/ballista-python/ballista/tests/generic.py new file mode 100644 index 000000000..1f984a40a --- /dev/null +++ b/ballista-python/ballista/tests/generic.py @@ -0,0 +1,87 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import datetime + +import numpy as np +import pyarrow as pa +import pyarrow.csv + +# used to write parquet files +import pyarrow.parquet as pq + + +def data(): + np.random.seed(1) + data = np.concatenate( + [ + np.random.normal(0, 0.01, size=50), + np.random.normal(50, 0.01, size=50), + ] + ) + return pa.array(data) + + +def data_with_nans(): + np.random.seed(0) + data = np.random.normal(0, 0.01, size=50) + mask = np.random.randint(0, 2, size=50) + data[mask == 0] = np.NaN + return data + + +def data_datetime(f): + data = [ + datetime.datetime.now(), + datetime.datetime.now() - datetime.timedelta(days=1), + datetime.datetime.now() + datetime.timedelta(days=1), + ] + return pa.array( + data, type=pa.timestamp(f), mask=np.array([False, True, False]) + ) + + +def data_date32(): + data = [ + datetime.date(2000, 1, 1), + datetime.date(1980, 1, 1), + datetime.date(2030, 1, 1), + ] + return pa.array( + data, type=pa.date32(), mask=np.array([False, True, False]) + ) + + +def data_timedelta(f): + data = [ + datetime.timedelta(days=100), + datetime.timedelta(days=1), + datetime.timedelta(seconds=1), + ] + return pa.array( + data, type=pa.duration(f), mask=np.array([False, True, False]) + ) + + +def data_binary_other(): + return np.array([1, 0, 0], dtype="u4") + + +def write_parquet(path, data): + table = pa.Table.from_arrays([data], names=["a"]) + pq.write_table(table, path) + return str(path) diff --git a/ballista-python/ballista/tests/test_aggregation.py b/ballista-python/ballista/tests/test_aggregation.py new file mode 100644 index 000000000..bba1ed41d --- /dev/null +++ b/ballista-python/ballista/tests/test_aggregation.py @@ -0,0 +1,48 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pyarrow as pa +import pytest + +from datafusion import SessionContext, column +from datafusion import functions as f + + +@pytest.fixture +def df(): + ctx = SessionContext() + + # create a RecordBatch and a new DataFrame from it + batch = pa.RecordBatch.from_arrays( + [pa.array([1, 2, 3]), pa.array([4, 4, 6])], + names=["a", "b"], + ) + return ctx.create_dataframe([[batch]]) + + +def test_built_in_aggregation(df): + col_a = column("a") + col_b = column("b") + df = df.aggregate( + [], + [f.max(col_a), f.min(col_a), f.count(col_a), f.approx_distinct(col_b)], + ) + result = df.collect()[0] + assert result.column(0) == pa.array([3]) + assert result.column(1) == pa.array([1]) + assert result.column(2) == pa.array([3], type=pa.uint64()) + assert result.column(3) == pa.array([2], type=pa.uint64()) diff --git a/ballista-python/ballista/tests/test_catalog.py b/ballista-python/ballista/tests/test_catalog.py new file mode 100644 index 000000000..a9bdf72b3 --- /dev/null +++ b/ballista-python/ballista/tests/test_catalog.py @@ -0,0 +1,40 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pyarrow as pa +import pytest + + +def test_basic(ctx, database): + with pytest.raises(KeyError): + ctx.catalog("non-existent") + + default = ctx.catalog() + assert default.names() == ["public"] + + for database in [default.database("public"), default.database()]: + assert database.names() == {"csv1", "csv", "csv2"} + + table = database.table("csv") + assert table.kind == "physical" + assert table.schema == pa.schema( + [ + pa.field("int", pa.int64(), nullable=False), + pa.field("str", pa.string(), nullable=False), + pa.field("float", pa.float64(), nullable=False), + ] + ) diff --git a/ballista-python/ballista/tests/test_context.py b/ballista-python/ballista/tests/test_context.py new file mode 100644 index 000000000..4d4a38c9d --- /dev/null +++ b/ballista-python/ballista/tests/test_context.py @@ -0,0 +1,74 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pyarrow as pa + + +def test_register_record_batches(ctx): + # create a RecordBatch and register it as memtable + batch = pa.RecordBatch.from_arrays( + [pa.array([1, 2, 3]), pa.array([4, 5, 6])], + names=["a", "b"], + ) + + ctx.register_record_batches("t", [[batch]]) + + assert ctx.tables() == {"t"} + + result = ctx.sql("SELECT a+b, a-b FROM t").collect() + + assert result[0].column(0) == pa.array([5, 7, 9]) + assert result[0].column(1) == pa.array([-3, -3, -3]) + + +def test_create_dataframe_registers_unique_table_name(ctx): + # create a RecordBatch and register it as memtable + batch = pa.RecordBatch.from_arrays( + [pa.array([1, 2, 3]), pa.array([4, 5, 6])], + names=["a", "b"], + ) + + df = ctx.create_dataframe([[batch]]) + tables = list(ctx.tables()) + + assert df + assert len(tables) == 1 + assert len(tables[0]) == 33 + assert tables[0].startswith("c") + # ensure that the rest of the table name contains + # only hexadecimal numbers + for c in tables[0][1:]: + assert c in "0123456789abcdef" + + +def test_register_table(ctx, database): + default = ctx.catalog() + public = default.database("public") + assert public.names() == {"csv", "csv1", "csv2"} + table = public.table("csv") + + ctx.register_table("csv3", table) + assert public.names() == {"csv", "csv1", "csv2", "csv3"} + + +def test_deregister_table(ctx, database): + default = ctx.catalog() + public = default.database("public") + assert public.names() == {"csv", "csv1", "csv2"} + + ctx.deregister_table("csv") + assert public.names() == {"csv1", "csv2"} diff --git a/ballista-python/ballista/tests/test_dataframe.py b/ballista-python/ballista/tests/test_dataframe.py new file mode 100644 index 000000000..43c260ae2 --- /dev/null +++ b/ballista-python/ballista/tests/test_dataframe.py @@ -0,0 +1,199 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pyarrow as pa +import pytest + +from datafusion import functions as f +from datafusion import DataFrame, SessionContext, column, literal, udf + + +@pytest.fixture +def df(): + ctx = SessionContext() + + # create a RecordBatch and a new DataFrame from it + batch = pa.RecordBatch.from_arrays( + [pa.array([1, 2, 3]), pa.array([4, 5, 6])], + names=["a", "b"], + ) + + return ctx.create_dataframe([[batch]]) + + +@pytest.fixture +def struct_df(): + ctx = SessionContext() + + # create a RecordBatch and a new DataFrame from it + batch = pa.RecordBatch.from_arrays( + [pa.array([{"c": 1}, {"c": 2}, {"c": 3}]), pa.array([4, 5, 6])], + names=["a", "b"], + ) + + return ctx.create_dataframe([[batch]]) + + +def test_select(df): + df = df.select( + column("a") + column("b"), + column("a") - column("b"), + ) + + # execute and collect the first (and only) batch + result = df.collect()[0] + + assert result.column(0) == pa.array([5, 7, 9]) + assert result.column(1) == pa.array([-3, -3, -3]) + + +def test_select_columns(df): + df = df.select_columns("b", "a") + + # execute and collect the first (and only) batch + result = df.collect()[0] + + assert result.column(0) == pa.array([4, 5, 6]) + assert result.column(1) == pa.array([1, 2, 3]) + + +def test_filter(df): + df = df.select( + column("a") + column("b"), + column("a") - column("b"), + ).filter(column("a") > literal(2)) + + # execute and collect the first (and only) batch + result = df.collect()[0] + + assert result.column(0) == pa.array([9]) + assert result.column(1) == pa.array([-3]) + + +def test_sort(df): + df = df.sort(column("b").sort(ascending=False)) + + table = pa.Table.from_batches(df.collect()) + expected = {"a": [3, 2, 1], "b": [6, 5, 4]} + + assert table.to_pydict() == expected + + +def test_limit(df): + df = df.limit(1) + + # execute and collect the first (and only) batch + result = df.collect()[0] + + assert len(result.column(0)) == 1 + assert len(result.column(1)) == 1 + + +def test_udf(df): + # is_null is a pa function over arrays + is_null = udf( + lambda x: x.is_null(), + [pa.int64()], + pa.bool_(), + volatility="immutable", + ) + + df = df.select(is_null(column("a"))) + result = df.collect()[0].column(0) + + assert result == pa.array([False, False, False]) + + +def test_join(): + ctx = SessionContext() + + batch = pa.RecordBatch.from_arrays( + [pa.array([1, 2, 3]), pa.array([4, 5, 6])], + names=["a", "b"], + ) + df = ctx.create_dataframe([[batch]]) + + batch = pa.RecordBatch.from_arrays( + [pa.array([1, 2]), pa.array([8, 10])], + names=["a", "c"], + ) + df1 = ctx.create_dataframe([[batch]]) + + df = df.join(df1, join_keys=(["a"], ["a"]), how="inner") + df = df.sort(column("a").sort(ascending=True)) + table = pa.Table.from_batches(df.collect()) + + expected = {"a": [1, 2], "c": [8, 10], "b": [4, 5]} + assert table.to_pydict() == expected + + +def test_window_lead(df): + df = df.select( + column("a"), + f.alias( + f.window( + "lead", [column("b")], order_by=[f.order_by(column("b"))] + ), + "a_next", + ), + ) + + table = pa.Table.from_batches(df.collect()) + + expected = {"a": [1, 2, 3], "a_next": [5, 6, None]} + assert table.to_pydict() == expected + + +def test_get_dataframe(tmp_path): + ctx = SessionContext() + + path = tmp_path / "test.csv" + table = pa.Table.from_arrays( + [ + [1, 2, 3, 4], + ["a", "b", "c", "d"], + [1.1, 2.2, 3.3, 4.4], + ], + names=["int", "str", "float"], + ) + pa.csv.write_csv(table, path) + + ctx.register_csv("csv", path) + + df = ctx.table("csv") + assert isinstance(df, DataFrame) + + +def test_struct_select(struct_df): + df = struct_df.select( + column("a")["c"] + column("b"), + column("a")["c"] - column("b"), + ) + + # execute and collect the first (and only) batch + result = df.collect()[0] + + assert result.column(0) == pa.array([5, 7, 9]) + assert result.column(1) == pa.array([-3, -3, -3]) + + +def test_explain(df): + df = df.select( + column("a") + column("b"), + column("a") - column("b"), + ) + df.explain() diff --git a/ballista-python/ballista/tests/test_functions.py b/ballista-python/ballista/tests/test_functions.py new file mode 100644 index 000000000..daa2f1967 --- /dev/null +++ b/ballista-python/ballista/tests/test_functions.py @@ -0,0 +1,219 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import numpy as np +import pyarrow as pa +import pytest + +from datafusion import SessionContext, column +from datafusion import functions as f +from datafusion import literal + + +@pytest.fixture +def df(): + ctx = SessionContext() + # create a RecordBatch and a new DataFrame from it + batch = pa.RecordBatch.from_arrays( + [pa.array(["Hello", "World", "!"]), pa.array([4, 5, 6])], + names=["a", "b"], + ) + return ctx.create_dataframe([[batch]]) + + +def test_literal(df): + df = df.select( + literal(1), + literal("1"), + literal("OK"), + literal(3.14), + literal(True), + literal(b"hello world"), + ) + result = df.collect() + assert len(result) == 1 + result = result[0] + assert result.column(0) == pa.array([1] * 3) + assert result.column(1) == pa.array(["1"] * 3) + assert result.column(2) == pa.array(["OK"] * 3) + assert result.column(3) == pa.array([3.14] * 3) + assert result.column(4) == pa.array([True] * 3) + assert result.column(5) == pa.array([b"hello world"] * 3) + + +def test_lit_arith(df): + """ + Test literals with arithmetic operations + """ + df = df.select( + literal(1) + column("b"), f.concat(column("a"), literal("!")) + ) + result = df.collect() + assert len(result) == 1 + result = result[0] + assert result.column(0) == pa.array([5, 6, 7]) + assert result.column(1) == pa.array(["Hello!", "World!", "!!"]) + + +def test_math_functions(): + ctx = SessionContext() + # create a RecordBatch and a new DataFrame from it + batch = pa.RecordBatch.from_arrays( + [pa.array([0.1, -0.7, 0.55])], names=["value"] + ) + df = ctx.create_dataframe([[batch]]) + + values = np.array([0.1, -0.7, 0.55]) + col_v = column("value") + df = df.select( + f.abs(col_v), + f.sin(col_v), + f.cos(col_v), + f.tan(col_v), + f.asin(col_v), + f.acos(col_v), + f.exp(col_v), + f.ln(col_v + literal(pa.scalar(1))), + f.log2(col_v + literal(pa.scalar(1))), + f.log10(col_v + literal(pa.scalar(1))), + f.random(), + ) + batches = df.collect() + assert len(batches) == 1 + result = batches[0] + + np.testing.assert_array_almost_equal(result.column(0), np.abs(values)) + np.testing.assert_array_almost_equal(result.column(1), np.sin(values)) + np.testing.assert_array_almost_equal(result.column(2), np.cos(values)) + np.testing.assert_array_almost_equal(result.column(3), np.tan(values)) + np.testing.assert_array_almost_equal(result.column(4), np.arcsin(values)) + np.testing.assert_array_almost_equal(result.column(5), np.arccos(values)) + np.testing.assert_array_almost_equal(result.column(6), np.exp(values)) + np.testing.assert_array_almost_equal( + result.column(7), np.log(values + 1.0) + ) + np.testing.assert_array_almost_equal( + result.column(8), np.log2(values + 1.0) + ) + np.testing.assert_array_almost_equal( + result.column(9), np.log10(values + 1.0) + ) + np.testing.assert_array_less(result.column(10), np.ones_like(values)) + + +def test_string_functions(df): + df = df.select(f.md5(column("a")), f.lower(column("a"))) + result = df.collect() + assert len(result) == 1 + result = result[0] + assert result.column(0) == pa.array( + [ + "8b1a9953c4611296a827abf8c47804d7", + "f5a7924e621e84c9280a9a27e1bcb7f6", + "9033e0e305f247c0c3c80d0c7848c8b3", + ] + ) + assert result.column(1) == pa.array(["hello", "world", "!"]) + + +def test_hash_functions(df): + exprs = [ + f.digest(column("a"), literal(m)) + for m in ("md5", "sha256", "sha512", "blake2s", "blake3") + ] + df = df.select(*exprs) + result = df.collect() + assert len(result) == 1 + result = result[0] + b = bytearray.fromhex + assert result.column(0) == pa.array( + [ + b("8B1A9953C4611296A827ABF8C47804D7"), + b("F5A7924E621E84C9280A9A27E1BCB7F6"), + b("9033E0E305F247C0C3C80D0C7848C8B3"), + ] + ) + assert result.column(1) == pa.array( + [ + b( + "185F8DB32271FE25F561A6FC938B2E26" + "4306EC304EDA518007D1764826381969" + ), + b( + "78AE647DC5544D227130A0682A51E30B" + "C7777FBB6D8A8F17007463A3ECD1D524" + ), + b( + "BB7208BC9B5D7C04F1236A82A0093A5E" + "33F40423D5BA8D4266F7092C3BA43B62" + ), + ] + ) + assert result.column(2) == pa.array( + [ + b( + "3615F80C9D293ED7402687F94B22D58E" + "529B8CC7916F8FAC7FDDF7FBD5AF4CF7" + "77D3D795A7A00A16BF7E7F3FB9561EE9" + "BAAE480DA9FE7A18769E71886B03F315" + ), + b( + "8EA77393A42AB8FA92500FB077A9509C" + "C32BC95E72712EFA116EDAF2EDFAE34F" + "BB682EFDD6C5DD13C117E08BD4AAEF71" + "291D8AACE2F890273081D0677C16DF0F" + ), + b( + "3831A6A6155E509DEE59A7F451EB3532" + "4D8F8F2DF6E3708894740F98FDEE2388" + "9F4DE5ADB0C5010DFB555CDA77C8AB5D" + "C902094C52DE3278F35A75EBC25F093A" + ), + ] + ) + assert result.column(3) == pa.array( + [ + b( + "F73A5FBF881F89B814871F46E26AD3FA" + "37CB2921C5E8561618639015B3CCBB71" + ), + b( + "B792A0383FB9E7A189EC150686579532" + "854E44B71AC394831DAED169BA85CCC5" + ), + b( + "27988A0E51812297C77A433F63523334" + "6AEE29A829DCF4F46E0F58F402C6CFCB" + ), + ] + ) + assert result.column(4) == pa.array( + [ + b( + "FBC2B0516EE8744D293B980779178A35" + "08850FDCFE965985782C39601B65794F" + ), + b( + "BF73D18575A736E4037D45F9E316085B" + "86C19BE6363DE6AA789E13DEAACC1C4E" + ), + b( + "C8D11B9F7237E4034ADBCD2005735F9B" + "C4C597C75AD89F4492BEC8F77D15F7EB" + ), + ] + ) diff --git a/ballista-python/ballista/tests/test_imports.py b/ballista-python/ballista/tests/test_imports.py new file mode 100644 index 000000000..9522bc31a --- /dev/null +++ b/ballista-python/ballista/tests/test_imports.py @@ -0,0 +1,69 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest + +import ballista +from ballista import ( + AggregateUDF, + DataFrame, + SessionContext, + Expression, + ScalarUDF, + functions, +) + + +def test_import_ballista(): + assert ballista.__name__ == "ballista" + + +def test_ballista_python_version(): + assert ballista.__version__ is not None + + +def test_class_module_is_ballista(): + for klass in [ + SessionContext, + Expression, + DataFrame, + ScalarUDF, + AggregateUDF, + ]: + assert klass.__module__ == "ballista" + + +def test_import_from_functions_submodule(): + from ballista.functions import abs, sin # noqa + + assert functions.abs is abs + assert functions.sin is sin + + msg = "cannot import name 'foobar' from 'ballista.functions'" + with pytest.raises(ImportError, match=msg): + from ballista.functions import foobar # noqa + + +def test_classes_are_inheritable(): + class MyExecContext(SessionContext): + pass + + class MyExpression(Expression): + pass + + class MyDataFrame(DataFrame): + pass diff --git a/ballista-python/ballista/tests/test_indexing.py b/ballista-python/ballista/tests/test_indexing.py new file mode 100644 index 000000000..9e65c0efd --- /dev/null +++ b/ballista-python/ballista/tests/test_indexing.py @@ -0,0 +1,55 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pyarrow as pa +import pytest + +from datafusion import SessionContext + + +@pytest.fixture +def df(): + ctx = SessionContext() + + # create a RecordBatch and a new DataFrame from it + batch = pa.RecordBatch.from_arrays( + [pa.array([1, 2, 3]), pa.array([4, 4, 6])], + names=["a", "b"], + ) + return ctx.create_dataframe([[batch]]) + + +def test_indexing(df): + assert df["a"] is not None + assert df["a", "b"] is not None + assert df[("a", "b")] is not None + assert df[["a"]] is not None + + +def test_err(df): + with pytest.raises(Exception) as e_info: + df["c"] + + assert "Schema error: No field named 'c'" in e_info.value.args[0] + + with pytest.raises(Exception) as e_info: + df[1] + + assert ( + "DataFrame can only be indexed by string index or indices" + in e_info.value.args[0] + ) diff --git a/ballista-python/ballista/tests/test_sql.py b/ballista-python/ballista/tests/test_sql.py new file mode 100644 index 000000000..cde5425a8 --- /dev/null +++ b/ballista-python/ballista/tests/test_sql.py @@ -0,0 +1,245 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import numpy as np +import pyarrow as pa +import pytest + +from datafusion import udf + +from . import generic as helpers + + +def test_no_table(ctx): + with pytest.raises(Exception, match="DataFusion error"): + ctx.sql("SELECT a FROM b").collect() + + +def test_register_csv(ctx, tmp_path): + path = tmp_path / "test.csv" + + table = pa.Table.from_arrays( + [ + [1, 2, 3, 4], + ["a", "b", "c", "d"], + [1.1, 2.2, 3.3, 4.4], + ], + names=["int", "str", "float"], + ) + pa.csv.write_csv(table, path) + + ctx.register_csv("csv", path) + ctx.register_csv("csv1", str(path)) + ctx.register_csv( + "csv2", + path, + has_header=True, + delimiter=",", + schema_infer_max_records=10, + ) + alternative_schema = pa.schema( + [ + ("some_int", pa.int16()), + ("some_bytes", pa.string()), + ("some_floats", pa.float32()), + ] + ) + ctx.register_csv("csv3", path, schema=alternative_schema) + + assert ctx.tables() == {"csv", "csv1", "csv2", "csv3"} + + for table in ["csv", "csv1", "csv2"]: + result = ctx.sql(f"SELECT COUNT(int) AS cnt FROM {table}").collect() + result = pa.Table.from_batches(result) + assert result.to_pydict() == {"cnt": [4]} + + result = ctx.sql("SELECT * FROM csv3").collect() + result = pa.Table.from_batches(result) + assert result.schema == alternative_schema + + with pytest.raises( + ValueError, match="Delimiter must be a single character" + ): + ctx.register_csv("csv4", path, delimiter="wrong") + + +def test_register_parquet(ctx, tmp_path): + path = helpers.write_parquet(tmp_path / "a.parquet", helpers.data()) + ctx.register_parquet("t", path) + assert ctx.tables() == {"t"} + + result = ctx.sql("SELECT COUNT(a) AS cnt FROM t").collect() + result = pa.Table.from_batches(result) + assert result.to_pydict() == {"cnt": [100]} + + +def test_execute(ctx, tmp_path): + data = [1, 1, 2, 2, 3, 11, 12] + + # single column, "a" + path = helpers.write_parquet(tmp_path / "a.parquet", pa.array(data)) + ctx.register_parquet("t", path) + + assert ctx.tables() == {"t"} + + # count + result = ctx.sql("SELECT COUNT(a) AS cnt FROM t").collect() + + expected = pa.array([7], pa.uint64()) + expected = [pa.RecordBatch.from_arrays([expected], ["cnt"])] + assert result == expected + + # where + expected = pa.array([2], pa.uint64()) + expected = [pa.RecordBatch.from_arrays([expected], ["cnt"])] + result = ctx.sql("SELECT COUNT(a) AS cnt FROM t WHERE a > 10").collect() + assert result == expected + + # group by + results = ctx.sql( + "SELECT CAST(a as int) AS a, COUNT(a) AS cnt FROM t GROUP BY a" + ).collect() + + # group by returns batches + result_keys = [] + result_values = [] + for result in results: + pydict = result.to_pydict() + result_keys.extend(pydict["a"]) + result_values.extend(pydict["cnt"]) + + result_keys, result_values = ( + list(t) for t in zip(*sorted(zip(result_keys, result_values))) + ) + + assert result_keys == [1, 2, 3, 11, 12] + assert result_values == [2, 2, 1, 1, 1] + + # order by + result = ctx.sql( + "SELECT a, CAST(a AS int) AS a_int FROM t ORDER BY a DESC LIMIT 2" + ).collect() + expected_a = pa.array([50.0219, 50.0152], pa.float64()) + expected_cast = pa.array([50, 50], pa.int32()) + expected = [ + pa.RecordBatch.from_arrays([expected_a, expected_cast], ["a", "a_int"]) + ] + np.testing.assert_equal(expected[0].column(1), expected[0].column(1)) + + +def test_cast(ctx, tmp_path): + """ + Verify that we can cast + """ + path = helpers.write_parquet(tmp_path / "a.parquet", helpers.data()) + ctx.register_parquet("t", path) + + valid_types = [ + "smallint", + "int", + "bigint", + "float(32)", + "float(64)", + "float", + ] + + select = ", ".join( + [f"CAST(9 AS {t}) AS A{i}" for i, t in enumerate(valid_types)] + ) + + # can execute, which implies that we can cast + ctx.sql(f"SELECT {select} FROM t").collect() + + +@pytest.mark.parametrize( + ("fn", "input_types", "output_type", "input_values", "expected_values"), + [ + ( + lambda x: x, + [pa.float64()], + pa.float64(), + [-1.2, None, 1.2], + [-1.2, None, 1.2], + ), + ( + lambda x: x.is_null(), + [pa.float64()], + pa.bool_(), + [-1.2, None, 1.2], + [False, True, False], + ), + ], +) +def test_udf( + ctx, tmp_path, fn, input_types, output_type, input_values, expected_values +): + # write to disk + path = helpers.write_parquet( + tmp_path / "a.parquet", pa.array(input_values) + ) + ctx.register_parquet("t", path) + + func = udf( + fn, input_types, output_type, name="func", volatility="immutable" + ) + ctx.register_udf(func) + + batches = ctx.sql("SELECT func(a) AS tt FROM t").collect() + result = batches[0].column(0) + + assert result == pa.array(expected_values) + + +_null_mask = np.array([False, True, False]) + + +@pytest.mark.parametrize( + "arr", + [ + pa.array(["a", "b", "c"], pa.utf8(), _null_mask), + pa.array(["a", "b", "c"], pa.large_utf8(), _null_mask), + pa.array([b"1", b"2", b"3"], pa.binary(), _null_mask), + pa.array([b"1111", b"2222", b"3333"], pa.large_binary(), _null_mask), + pa.array([False, True, True], None, _null_mask), + pa.array([0, 1, 2], None), + helpers.data_binary_other(), + helpers.data_date32(), + helpers.data_with_nans(), + # C data interface missing + pytest.param( + pa.array([b"1111", b"2222", b"3333"], pa.binary(4), _null_mask), + marks=pytest.mark.xfail, + ), + pytest.param(helpers.data_datetime("s"), marks=pytest.mark.xfail), + pytest.param(helpers.data_datetime("ms"), marks=pytest.mark.xfail), + pytest.param(helpers.data_datetime("us"), marks=pytest.mark.xfail), + pytest.param(helpers.data_datetime("ns"), marks=pytest.mark.xfail), + # Not writtable to parquet + pytest.param(helpers.data_timedelta("s"), marks=pytest.mark.xfail), + pytest.param(helpers.data_timedelta("ms"), marks=pytest.mark.xfail), + pytest.param(helpers.data_timedelta("us"), marks=pytest.mark.xfail), + pytest.param(helpers.data_timedelta("ns"), marks=pytest.mark.xfail), + ], +) +def test_simple_select(ctx, tmp_path, arr): + path = helpers.write_parquet(tmp_path / "a.parquet", arr) + ctx.register_parquet("t", path) + + batches = ctx.sql("SELECT a AS tt FROM t").collect() + result = batches[0].column(0) + + np.testing.assert_equal(result, arr) diff --git a/ballista-python/ballista/tests/test_udaf.py b/ballista-python/ballista/tests/test_udaf.py new file mode 100644 index 000000000..c2b29d199 --- /dev/null +++ b/ballista-python/ballista/tests/test_udaf.py @@ -0,0 +1,136 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import List + +import pyarrow as pa +import pyarrow.compute as pc +import pytest + +from datafusion import Accumulator, SessionContext, column, udaf + + +class Summarize(Accumulator): + """ + Interface of a user-defined accumulation. + """ + + def __init__(self): + self._sum = pa.scalar(0.0) + + def state(self) -> List[pa.Scalar]: + return [self._sum] + + def update(self, values: pa.Array) -> None: + # Not nice since pyarrow scalars can't be summed yet. + # This breaks on `None` + self._sum = pa.scalar(self._sum.as_py() + pc.sum(values).as_py()) + + def merge(self, states: pa.Array) -> None: + # Not nice since pyarrow scalars can't be summed yet. + # This breaks on `None` + self._sum = pa.scalar(self._sum.as_py() + pc.sum(states).as_py()) + + def evaluate(self) -> pa.Scalar: + return self._sum + + +class NotSubclassOfAccumulator: + pass + + +class MissingMethods(Accumulator): + def __init__(self): + self._sum = pa.scalar(0) + + def state(self) -> List[pa.Scalar]: + return [self._sum] + + +@pytest.fixture +def df(): + ctx = SessionContext() + + # create a RecordBatch and a new DataFrame from it + batch = pa.RecordBatch.from_arrays( + [pa.array([1, 2, 3]), pa.array([4, 4, 6])], + names=["a", "b"], + ) + return ctx.create_dataframe([[batch]]) + + +@pytest.mark.skip(reason="df.collect() will hang, need more investigations") +def test_errors(df): + with pytest.raises(TypeError): + udaf( + NotSubclassOfAccumulator, + pa.float64(), + pa.float64(), + [pa.float64()], + volatility="immutable", + ) + + accum = udaf( + MissingMethods, + pa.int64(), + pa.int64(), + [pa.int64()], + volatility="immutable", + ) + df = df.aggregate([], [accum(column("a"))]) + + msg = ( + "Can't instantiate abstract class MissingMethods with abstract " + "methods evaluate, merge, update" + ) + with pytest.raises(Exception, match=msg): + df.collect() + + +def test_aggregate(df): + summarize = udaf( + Summarize, + pa.float64(), + pa.float64(), + [pa.float64()], + volatility="immutable", + ) + + df = df.aggregate([], [summarize(column("a"))]) + + # execute and collect the first (and only) batch + result = df.collect()[0] + + assert result.column(0) == pa.array([1.0 + 2.0 + 3.0]) + + +def test_group_by(df): + summarize = udaf( + Summarize, + pa.float64(), + pa.float64(), + [pa.float64()], + volatility="immutable", + ) + + df = df.aggregate([column("b")], [summarize(column("a"))]) + + batches = df.collect() + + arrays = [batch.column(1) for batch in batches] + joined = pa.concat_arrays(arrays) + assert joined == pa.array([1.0 + 2.0, 3.0]) diff --git a/ballista-python/dev/create_license.py b/ballista-python/dev/create_license.py new file mode 100644 index 000000000..2a67cb8fd --- /dev/null +++ b/ballista-python/dev/create_license.py @@ -0,0 +1,252 @@ +#!/usr/bin/python +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# This file is a mirror of https://github.com/apache/arrow-datafusion/blob/master/dev/create_license.py + +import json +import subprocess + +subprocess.check_output(["cargo", "install", "cargo-license"]) +data = subprocess.check_output( + [ + "cargo", + "license", + "--avoid-build-deps", + "--avoid-dev-deps", + "--do-not-bundle", + "--json", + ] +) +data = json.loads(data) + +result = """ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" +result += "\n------------------\n\n" +result += "This software is built and contains the following software:\n\n" +result += "(automatically generated via [cargo-license](https://crates.io/crates/cargo-license))\n\n" +for item in data: + license = item["license"] + name = item["name"] + version = item["version"] + repository = item["repository"] + result += "------------------\n\n" + result += f"### {name} {version}\n* source: [{repository}]({repository})\n* license: {license}\n\n" + +with open("LICENSE.txt", "w") as f: + f.write(result) diff --git a/ballista-python/pyproject.toml b/ballista-python/pyproject.toml new file mode 100644 index 000000000..2d9d30e78 --- /dev/null +++ b/ballista-python/pyproject.toml @@ -0,0 +1,58 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[build-system] +requires = ["maturin>=0.11,<0.13"] +build-backend = "maturin" + +[project] +name = "ballista" +description = "Build and run queries against data" +readme = "README.md" +license = {file = "LICENSE.txt"} +requires-python = ">=3.6" +keywords = ["ballista", "datafusion", "dataframe", "rust", "query-engine"] +classifier = [ + "Development Status :: 2 - Pre-Alpha", + "Intended Audience :: Developers", + "License :: OSI Approved :: Apache Software License", + "License :: OSI Approved", + "Operating System :: MacOS", + "Operating System :: Microsoft :: Windows", + "Operating System :: POSIX :: Linux", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python", + "Programming Language :: Rust", +] +dependencies = [ + "datafusion>=0.5.2", + "pyarrow>=1", +] + +[project.urls] +documentation = "https://arrow.apache.org/ballista/python" +repository = "https://github.com/apache/arrow-ballista" + +[tool.isort] +profile = "black" + +[tool.maturin] +sdist-include = ["Cargo.lock"] diff --git a/ballista-python/requirements-310.txt b/ballista-python/requirements-310.txt new file mode 100644 index 000000000..30a2291c4 --- /dev/null +++ b/ballista-python/requirements-310.txt @@ -0,0 +1,213 @@ +# +# This file is autogenerated by pip-compile with python 3.10 +# To update, run: +# +# pip-compile --generate-hashes +# +attrs==21.4.0 \ + --hash=sha256:2d27e3784d7a565d36ab851fe94887c5eccd6a463168875832a1be79c82828b4 \ + --hash=sha256:626ba8234211db98e869df76230a137c4c40a12d72445c45d5f5b716f076e2fd + # via pytest +black==22.3.0 \ + --hash=sha256:06f9d8846f2340dfac80ceb20200ea5d1b3f181dd0556b47af4e8e0b24fa0a6b \ + --hash=sha256:10dbe6e6d2988049b4655b2b739f98785a884d4d6b85bc35133a8fb9a2233176 \ + --hash=sha256:2497f9c2386572e28921fa8bec7be3e51de6801f7459dffd6e62492531c47e09 \ + --hash=sha256:30d78ba6bf080eeaf0b7b875d924b15cd46fec5fd044ddfbad38c8ea9171043a \ + --hash=sha256:328efc0cc70ccb23429d6be184a15ce613f676bdfc85e5fe8ea2a9354b4e9015 \ + --hash=sha256:35020b8886c022ced9282b51b5a875b6d1ab0c387b31a065b84db7c33085ca79 \ + --hash=sha256:5795a0375eb87bfe902e80e0c8cfaedf8af4d49694d69161e5bd3206c18618bb \ + --hash=sha256:5891ef8abc06576985de8fa88e95ab70641de6c1fca97e2a15820a9b69e51b20 \ + --hash=sha256:637a4014c63fbf42a692d22b55d8ad6968a946b4a6ebc385c5505d9625b6a464 \ + --hash=sha256:67c8301ec94e3bcc8906740fe071391bce40a862b7be0b86fb5382beefecd968 \ + --hash=sha256:6d2fc92002d44746d3e7db7cf9313cf4452f43e9ea77a2c939defce3b10b5c82 \ + --hash=sha256:6ee227b696ca60dd1c507be80a6bc849a5a6ab57ac7352aad1ffec9e8b805f21 \ + --hash=sha256:863714200ada56cbc366dc9ae5291ceb936573155f8bf8e9de92aef51f3ad0f0 \ + --hash=sha256:9b542ced1ec0ceeff5b37d69838106a6348e60db7b8fdd245294dc1d26136265 \ + --hash=sha256:a6342964b43a99dbc72f72812bf88cad8f0217ae9acb47c0d4f141a6416d2d7b \ + --hash=sha256:ad4efa5fad66b903b4a5f96d91461d90b9507a812b3c5de657d544215bb7877a \ + --hash=sha256:bc58025940a896d7e5356952228b68f793cf5fcb342be703c3a2669a1488cb72 \ + --hash=sha256:cc1e1de68c8e5444e8f94c3670bb48a2beef0e91dddfd4fcc29595ebd90bb9ce \ + --hash=sha256:cee3e11161dde1b2a33a904b850b0899e0424cc331b7295f2a9698e79f9a69a0 \ + --hash=sha256:e3556168e2e5c49629f7b0f377070240bd5511e45e25a4497bb0073d9dda776a \ + --hash=sha256:e8477ec6bbfe0312c128e74644ac8a02ca06bcdb8982d4ee06f209be28cdf163 \ + --hash=sha256:ee8f1f7228cce7dffc2b464f07ce769f478968bfb3dd1254a4c2eeed84928aad \ + --hash=sha256:fd57160949179ec517d32ac2ac898b5f20d68ed1a9c977346efbac9c2f1e779d + # via -r requirements.in +click==8.1.3 \ + --hash=sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e \ + --hash=sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48 + # via black +flake8==4.0.1 \ + --hash=sha256:479b1304f72536a55948cb40a32dce8bb0ffe3501e26eaf292c7e60eb5e0428d \ + --hash=sha256:806e034dda44114815e23c16ef92f95c91e4c71100ff52813adf7132a6ad870d + # via -r requirements.in +iniconfig==1.1.1 \ + --hash=sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3 \ + --hash=sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32 + # via pytest +isort==5.10.1 \ + --hash=sha256:6f62d78e2f89b4500b080fe3a81690850cd254227f27f75c3a0c491a1f351ba7 \ + --hash=sha256:e8443a5e7a020e9d7f97f1d7d9cd17c88bcb3bc7e218bf9cf5095fe550be2951 + # via -r requirements.in +maturin==0.12.16 \ + --hash=sha256:29a635699db1f4981b891a4ee51ddcae8c410136ed40103232aea3b5f62e8504 \ + --hash=sha256:4b5fe1de8b8e7ba5a9f52002b24a2d8148a23d1260c7bd59291c319ccc5b31f1 \ + --hash=sha256:54ecff17c64cf5c5dc59ff22745517ea56b791995e70008d1dcd1623ce609f78 \ + --hash=sha256:641c8ed8452cb8a288baf953be78d03e27e60189a64f00cc7bcc1731d158e8f6 \ + --hash=sha256:70a042197fdcb726c911146a1c875f65f596de122a01eeb58af10faf3bd3a2c5 \ + --hash=sha256:781abebb255061b5eda0413ecbac22b88a7ab50ecaee607fe5d8e3c55ab48e52 \ + --hash=sha256:83a8f9378c320e981412f8d367e181af22f145d489a7da0a0c3aea86cf23f048 \ + --hash=sha256:8aeb62a328bf4d9439758b59ccf5360a5f3127bbe58bedbcb6c64e888de3eb36 \ + --hash=sha256:8d11e801216f4c91b2ba9da4bad615ffc3638f80a7ba973245a0154dcfdbee64 \ + --hash=sha256:917f77382cdff55d2f290d0f58b7e6f4a7aaa74b58e2b61e4c67b37786d8a965 \ + --hash=sha256:97756ad5ff113478de237b029add91def0a40af0dc5e120c25e1595addd9c151 \ + --hash=sha256:9ce67a844d63d1ba8cdcf903ee2e6e0b21c0b0461b97c8737751d74002ded4c4 \ + --hash=sha256:a026515e39fd48ee5318de57ddc6841a5fbcd5169b3860fb9ac9ea9521cc6027 \ + --hash=sha256:bc4da52ef0c7e975396e7e6fb90da8858c518b4dccb810ceabec9db7ecedde57 \ + --hash=sha256:d63f60dd5dddb165f824b2d8e593dcb31300d832eb6cbc6288dd484e29dfbd89 \ + --hash=sha256:e5e4e3bfcf209ea1a6d20cade2de1ea716e17ea491a7a8b3fee0e45a10aa1e98 \ + --hash=sha256:e7e3fa53c5207c05d4148ecbc0ce7463b7757989dadebcd8ab3a61c67b874157 + # via -r requirements.in +mccabe==0.6.1 \ + --hash=sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42 \ + --hash=sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f + # via flake8 +mypy==0.950 \ + --hash=sha256:0112752a6ff07230f9ec2f71b0d3d4e088a910fdce454fdb6553e83ed0eced7d \ + --hash=sha256:0384d9f3af49837baa92f559d3fa673e6d2652a16550a9ee07fc08c736f5e6f8 \ + --hash=sha256:1b333cfbca1762ff15808a0ef4f71b5d3eed8528b23ea1c3fb50543c867d68de \ + --hash=sha256:1fdeb0a0f64f2a874a4c1f5271f06e40e1e9779bf55f9567f149466fc7a55038 \ + --hash=sha256:4c653e4846f287051599ed8f4b3c044b80e540e88feec76b11044ddc5612ffed \ + --hash=sha256:563514c7dc504698fb66bb1cf897657a173a496406f1866afae73ab5b3cdb334 \ + --hash=sha256:5b231afd6a6e951381b9ef09a1223b1feabe13625388db48a8690f8daa9b71ff \ + --hash=sha256:5ce6a09042b6da16d773d2110e44f169683d8cc8687e79ec6d1181a72cb028d2 \ + --hash=sha256:5e7647df0f8fc947388e6251d728189cfadb3b1e558407f93254e35abc026e22 \ + --hash=sha256:6003de687c13196e8a1243a5e4bcce617d79b88f83ee6625437e335d89dfebe2 \ + --hash=sha256:61504b9a5ae166ba5ecfed9e93357fd51aa693d3d434b582a925338a2ff57fd2 \ + --hash=sha256:77423570c04aca807508a492037abbd72b12a1fb25a385847d191cd50b2c9605 \ + --hash=sha256:a4d9898f46446bfb6405383b57b96737dcfd0a7f25b748e78ef3e8c576bba3cb \ + --hash=sha256:a952b8bc0ae278fc6316e6384f67bb9a396eb30aced6ad034d3a76120ebcc519 \ + --hash=sha256:b5b5bd0ffb11b4aba2bb6d31b8643902c48f990cc92fda4e21afac658044f0c0 \ + --hash=sha256:ca75ecf2783395ca3016a5e455cb322ba26b6d33b4b413fcdedfc632e67941dc \ + --hash=sha256:cf9c261958a769a3bd38c3e133801ebcd284ffb734ea12d01457cb09eacf7d7b \ + --hash=sha256:dd4d670eee9610bf61c25c940e9ade2d0ed05eb44227275cce88701fee014b1f \ + --hash=sha256:e19736af56947addedce4674c0971e5dceef1b5ec7d667fe86bcd2b07f8f9075 \ + --hash=sha256:eaea21d150fb26d7b4856766e7addcf929119dd19fc832b22e71d942835201ef \ + --hash=sha256:eaff8156016487c1af5ffa5304c3e3fd183edcb412f3e9c72db349faf3f6e0eb \ + --hash=sha256:ee0a36edd332ed2c5208565ae6e3a7afc0eabb53f5327e281f2ef03a6bc7687a \ + --hash=sha256:ef7beb2a3582eb7a9f37beaf38a28acfd801988cde688760aea9e6cc4832b10b + # via -r requirements.in +mypy-extensions==0.4.3 \ + --hash=sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d \ + --hash=sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8 + # via + # black + # mypy +numpy==1.22.3 \ + --hash=sha256:07a8c89a04997625236c5ecb7afe35a02af3896c8aa01890a849913a2309c676 \ + --hash=sha256:08d9b008d0156c70dc392bb3ab3abb6e7a711383c3247b410b39962263576cd4 \ + --hash=sha256:201b4d0552831f7250a08d3b38de0d989d6f6e4658b709a02a73c524ccc6ffce \ + --hash=sha256:2c10a93606e0b4b95c9b04b77dc349b398fdfbda382d2a39ba5a822f669a0123 \ + --hash=sha256:3ca688e1b9b95d80250bca34b11a05e389b1420d00e87a0d12dc45f131f704a1 \ + --hash=sha256:48a3aecd3b997bf452a2dedb11f4e79bc5bfd21a1d4cc760e703c31d57c84b3e \ + --hash=sha256:568dfd16224abddafb1cbcce2ff14f522abe037268514dd7e42c6776a1c3f8e5 \ + --hash=sha256:5bfb1bb598e8229c2d5d48db1860bcf4311337864ea3efdbe1171fb0c5da515d \ + --hash=sha256:639b54cdf6aa4f82fe37ebf70401bbb74b8508fddcf4797f9fe59615b8c5813a \ + --hash=sha256:8251ed96f38b47b4295b1ae51631de7ffa8260b5b087808ef09a39a9d66c97ab \ + --hash=sha256:92bfa69cfbdf7dfc3040978ad09a48091143cffb778ec3b03fa170c494118d75 \ + --hash=sha256:97098b95aa4e418529099c26558eeb8486e66bd1e53a6b606d684d0c3616b168 \ + --hash=sha256:a3bae1a2ed00e90b3ba5f7bd0a7c7999b55d609e0c54ceb2b076a25e345fa9f4 \ + --hash=sha256:c34ea7e9d13a70bf2ab64a2532fe149a9aced424cd05a2c4ba662fd989e3e45f \ + --hash=sha256:dbc7601a3b7472d559dc7b933b18b4b66f9aa7452c120e87dfb33d02008c8a18 \ + --hash=sha256:e7927a589df200c5e23c57970bafbd0cd322459aa7b1ff73b7c2e84d6e3eae62 \ + --hash=sha256:f8c1f39caad2c896bc0018f699882b345b2a63708008be29b1f355ebf6f933fe \ + --hash=sha256:f950f8845b480cffe522913d35567e29dd381b0dc7e4ce6a4a9f9156417d2430 \ + --hash=sha256:fade0d4f4d292b6f39951b6836d7a3c7ef5b2347f3c420cd9820a1d90d794802 \ + --hash=sha256:fdf3c08bce27132395d3c3ba1503cac12e17282358cb4bddc25cc46b0aca07aa + # via + # -r requirements.in + # pyarrow +packaging==21.3 \ + --hash=sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb \ + --hash=sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522 + # via pytest +pathspec==0.9.0 \ + --hash=sha256:7d15c4ddb0b5c802d161efc417ec1a2558ea2653c2e8ad9c19098201dc1c993a \ + --hash=sha256:e564499435a2673d586f6b2130bb5b95f04a3ba06f81b8f895b651a3c76aabb1 + # via black +platformdirs==2.5.2 \ + --hash=sha256:027d8e83a2d7de06bbac4e5ef7e023c02b863d7ea5d079477e722bb41ab25788 \ + --hash=sha256:58c8abb07dcb441e6ee4b11d8df0ac856038f944ab98b7be6b27b2a3c7feef19 + # via black +pluggy==1.0.0 \ + --hash=sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159 \ + --hash=sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3 + # via pytest +py==1.11.0 \ + --hash=sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719 \ + --hash=sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378 + # via pytest +pyarrow==8.0.0 \ + --hash=sha256:03a10daad957970e914920b793f6a49416699e791f4c827927fd4e4d892a5d16 \ + --hash=sha256:15511ce2f50343f3fd5e9f7c30e4d004da9134e9597e93e9c96c3985928cbe82 \ + --hash=sha256:1dd482ccb07c96188947ad94d7536ab696afde23ad172df8e18944ec79f55055 \ + --hash=sha256:25a5f7c7f36df520b0b7363ba9f51c3070799d4b05d587c60c0adaba57763479 \ + --hash=sha256:3bd201af6e01f475f02be88cf1f6ee9856ab98c11d8bbb6f58347c58cd07be00 \ + --hash=sha256:3fee786259d986f8c046100ced54d63b0c8c9f7cdb7d1bbe07dc69e0f928141c \ + --hash=sha256:42b7982301a9ccd06e1dd4fabd2e8e5df74b93ce4c6b87b81eb9e2d86dc79871 \ + --hash=sha256:4a18a211ed888f1ac0b0ebcb99e2d9a3e913a481120ee9b1fe33d3fedb945d4e \ + --hash=sha256:51e58778fcb8829fca37fbfaea7f208d5ce7ea89ea133dd13d8ce745278ee6f0 \ + --hash=sha256:541e7845ce5f27a861eb5b88ee165d931943347eec17b9ff1e308663531c9647 \ + --hash=sha256:65c7f4cc2be195e3db09296d31a654bb6d8786deebcab00f0e2455fd109d7456 \ + --hash=sha256:69b043a3fce064ebd9fbae6abc30e885680296e5bd5e6f7353e6a87966cf2ad7 \ + --hash=sha256:6ea2c54e6b5ecd64e8299d2abb40770fe83a718f5ddc3825ddd5cd28e352cce1 \ + --hash=sha256:78a6ac39cd793582998dac88ab5c1c1dd1e6503df6672f064f33a21937ec1d8d \ + --hash=sha256:81b87b782a1366279411f7b235deab07c8c016e13f9af9f7c7b0ee564fedcc8f \ + --hash=sha256:8392b9a1e837230090fe916415ed4c3433b2ddb1a798e3f6438303c70fbabcfc \ + --hash=sha256:863be6bad6c53797129610930794a3e797cb7d41c0a30e6794a2ac0e42ce41b8 \ + --hash=sha256:8cd86e04a899bef43e25184f4b934584861d787cf7519851a8c031803d45c6d8 \ + --hash=sha256:95c7822eb37663e073da9892f3499fe28e84f3464711a3e555e0c5463fd53a19 \ + --hash=sha256:98c13b2e28a91b0fbf24b483df54a8d7814c074c2623ecef40dce1fa52f6539b \ + --hash=sha256:ba2b7aa7efb59156b87987a06f5241932914e4d5bbb74a465306b00a6c808849 \ + --hash=sha256:c9c97c8e288847e091dfbcdf8ce51160e638346f51919a9e74fe038b2e8aee62 \ + --hash=sha256:cb06cacc19f3b426681f2f6803cc06ff481e7fe5b3a533b406bc5b2138843d4f \ + --hash=sha256:ce64bc1da3109ef5ab9e4c60316945a7239c798098a631358e9ab39f6e5529e9 \ + --hash=sha256:d5ef4372559b191cafe7db8932801eee252bfc35e983304e7d60b6954576a071 \ + --hash=sha256:d6f1e1040413651819074ef5b500835c6c42e6c446532a1ddef8bc5054e8dba5 \ + --hash=sha256:deb400df8f19a90b662babceb6dd12daddda6bb357c216e558b207c0770c7654 \ + --hash=sha256:ea132067ec712d1b1116a841db1c95861508862b21eddbcafefbce8e4b96b867 \ + --hash=sha256:ece333706a94c1221ced8b299042f85fd88b5db802d71be70024433ddf3aecab \ + --hash=sha256:edad25522ad509e534400d6ab98cf1872d30c31bc5e947712bfd57def7af15bb + # via -r requirements.in +pycodestyle==2.8.0 \ + --hash=sha256:720f8b39dde8b293825e7ff02c475f3077124006db4f440dcbc9a20b76548a20 \ + --hash=sha256:eddd5847ef438ea1c7870ca7eb78a9d47ce0cdb4851a5523949f2601d0cbbe7f + # via flake8 +pyflakes==2.4.0 \ + --hash=sha256:05a85c2872edf37a4ed30b0cce2f6093e1d0581f8c19d7393122da7e25b2b24c \ + --hash=sha256:3bb3a3f256f4b7968c9c788781e4ff07dce46bdf12339dcda61053375426ee2e + # via flake8 +pyparsing==3.0.9 \ + --hash=sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb \ + --hash=sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc + # via packaging +pytest==7.1.2 \ + --hash=sha256:13d0e3ccfc2b6e26be000cb6568c832ba67ba32e719443bfe725814d3c42433c \ + --hash=sha256:a06a0425453864a270bc45e71f783330a7428defb4230fb5e6a731fde06ecd45 + # via -r requirements.in +toml==0.10.2 \ + --hash=sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b \ + --hash=sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f + # via -r requirements.in +tomli==2.0.1 \ + --hash=sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc \ + --hash=sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f + # via + # black + # maturin + # mypy + # pytest +typing-extensions==4.2.0 \ + --hash=sha256:6657594ee297170d19f67d55c05852a874e7eb634f4f753dbd667855e07c1708 \ + --hash=sha256:f1c24655a0da0d1b67f07e17a5e6b2a105894e6824b92096378bb3668ef02376 + # via mypy diff --git a/ballista-python/requirements-37.txt b/ballista-python/requirements-37.txt new file mode 100644 index 000000000..b1776eed7 --- /dev/null +++ b/ballista-python/requirements-37.txt @@ -0,0 +1,268 @@ +# +# This file is autogenerated by pip-compile with python 3.7 +# To update, run: +# +# pip-compile --generate-hashes +# +attrs==21.4.0 \ + --hash=sha256:2d27e3784d7a565d36ab851fe94887c5eccd6a463168875832a1be79c82828b4 \ + --hash=sha256:626ba8234211db98e869df76230a137c4c40a12d72445c45d5f5b716f076e2fd + # via pytest +black==22.3.0 \ + --hash=sha256:06f9d8846f2340dfac80ceb20200ea5d1b3f181dd0556b47af4e8e0b24fa0a6b \ + --hash=sha256:10dbe6e6d2988049b4655b2b739f98785a884d4d6b85bc35133a8fb9a2233176 \ + --hash=sha256:2497f9c2386572e28921fa8bec7be3e51de6801f7459dffd6e62492531c47e09 \ + --hash=sha256:30d78ba6bf080eeaf0b7b875d924b15cd46fec5fd044ddfbad38c8ea9171043a \ + --hash=sha256:328efc0cc70ccb23429d6be184a15ce613f676bdfc85e5fe8ea2a9354b4e9015 \ + --hash=sha256:35020b8886c022ced9282b51b5a875b6d1ab0c387b31a065b84db7c33085ca79 \ + --hash=sha256:5795a0375eb87bfe902e80e0c8cfaedf8af4d49694d69161e5bd3206c18618bb \ + --hash=sha256:5891ef8abc06576985de8fa88e95ab70641de6c1fca97e2a15820a9b69e51b20 \ + --hash=sha256:637a4014c63fbf42a692d22b55d8ad6968a946b4a6ebc385c5505d9625b6a464 \ + --hash=sha256:67c8301ec94e3bcc8906740fe071391bce40a862b7be0b86fb5382beefecd968 \ + --hash=sha256:6d2fc92002d44746d3e7db7cf9313cf4452f43e9ea77a2c939defce3b10b5c82 \ + --hash=sha256:6ee227b696ca60dd1c507be80a6bc849a5a6ab57ac7352aad1ffec9e8b805f21 \ + --hash=sha256:863714200ada56cbc366dc9ae5291ceb936573155f8bf8e9de92aef51f3ad0f0 \ + --hash=sha256:9b542ced1ec0ceeff5b37d69838106a6348e60db7b8fdd245294dc1d26136265 \ + --hash=sha256:a6342964b43a99dbc72f72812bf88cad8f0217ae9acb47c0d4f141a6416d2d7b \ + --hash=sha256:ad4efa5fad66b903b4a5f96d91461d90b9507a812b3c5de657d544215bb7877a \ + --hash=sha256:bc58025940a896d7e5356952228b68f793cf5fcb342be703c3a2669a1488cb72 \ + --hash=sha256:cc1e1de68c8e5444e8f94c3670bb48a2beef0e91dddfd4fcc29595ebd90bb9ce \ + --hash=sha256:cee3e11161dde1b2a33a904b850b0899e0424cc331b7295f2a9698e79f9a69a0 \ + --hash=sha256:e3556168e2e5c49629f7b0f377070240bd5511e45e25a4497bb0073d9dda776a \ + --hash=sha256:e8477ec6bbfe0312c128e74644ac8a02ca06bcdb8982d4ee06f209be28cdf163 \ + --hash=sha256:ee8f1f7228cce7dffc2b464f07ce769f478968bfb3dd1254a4c2eeed84928aad \ + --hash=sha256:fd57160949179ec517d32ac2ac898b5f20d68ed1a9c977346efbac9c2f1e779d + # via -r requirements.in +click==8.1.3 \ + --hash=sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e \ + --hash=sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48 + # via black +flake8==4.0.1 \ + --hash=sha256:479b1304f72536a55948cb40a32dce8bb0ffe3501e26eaf292c7e60eb5e0428d \ + --hash=sha256:806e034dda44114815e23c16ef92f95c91e4c71100ff52813adf7132a6ad870d + # via -r requirements.in +importlib-metadata==4.2.0 ; python_version < "3.8" \ + --hash=sha256:057e92c15bc8d9e8109738a48db0ccb31b4d9d5cfbee5a8670879a30be66304b \ + --hash=sha256:b7e52a1f8dec14a75ea73e0891f3060099ca1d8e6a462a4dff11c3e119ea1b31 + # via + # -r requirements.in + # click + # flake8 + # pluggy + # pytest +iniconfig==1.1.1 \ + --hash=sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3 \ + --hash=sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32 + # via pytest +isort==5.10.1 \ + --hash=sha256:6f62d78e2f89b4500b080fe3a81690850cd254227f27f75c3a0c491a1f351ba7 \ + --hash=sha256:e8443a5e7a020e9d7f97f1d7d9cd17c88bcb3bc7e218bf9cf5095fe550be2951 + # via -r requirements.in +maturin==0.12.16 \ + --hash=sha256:29a635699db1f4981b891a4ee51ddcae8c410136ed40103232aea3b5f62e8504 \ + --hash=sha256:4b5fe1de8b8e7ba5a9f52002b24a2d8148a23d1260c7bd59291c319ccc5b31f1 \ + --hash=sha256:54ecff17c64cf5c5dc59ff22745517ea56b791995e70008d1dcd1623ce609f78 \ + --hash=sha256:641c8ed8452cb8a288baf953be78d03e27e60189a64f00cc7bcc1731d158e8f6 \ + --hash=sha256:70a042197fdcb726c911146a1c875f65f596de122a01eeb58af10faf3bd3a2c5 \ + --hash=sha256:781abebb255061b5eda0413ecbac22b88a7ab50ecaee607fe5d8e3c55ab48e52 \ + --hash=sha256:83a8f9378c320e981412f8d367e181af22f145d489a7da0a0c3aea86cf23f048 \ + --hash=sha256:8aeb62a328bf4d9439758b59ccf5360a5f3127bbe58bedbcb6c64e888de3eb36 \ + --hash=sha256:8d11e801216f4c91b2ba9da4bad615ffc3638f80a7ba973245a0154dcfdbee64 \ + --hash=sha256:917f77382cdff55d2f290d0f58b7e6f4a7aaa74b58e2b61e4c67b37786d8a965 \ + --hash=sha256:97756ad5ff113478de237b029add91def0a40af0dc5e120c25e1595addd9c151 \ + --hash=sha256:9ce67a844d63d1ba8cdcf903ee2e6e0b21c0b0461b97c8737751d74002ded4c4 \ + --hash=sha256:a026515e39fd48ee5318de57ddc6841a5fbcd5169b3860fb9ac9ea9521cc6027 \ + --hash=sha256:bc4da52ef0c7e975396e7e6fb90da8858c518b4dccb810ceabec9db7ecedde57 \ + --hash=sha256:d63f60dd5dddb165f824b2d8e593dcb31300d832eb6cbc6288dd484e29dfbd89 \ + --hash=sha256:e5e4e3bfcf209ea1a6d20cade2de1ea716e17ea491a7a8b3fee0e45a10aa1e98 \ + --hash=sha256:e7e3fa53c5207c05d4148ecbc0ce7463b7757989dadebcd8ab3a61c67b874157 + # via -r requirements.in +mccabe==0.6.1 \ + --hash=sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42 \ + --hash=sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f + # via flake8 +mypy==0.950 \ + --hash=sha256:0112752a6ff07230f9ec2f71b0d3d4e088a910fdce454fdb6553e83ed0eced7d \ + --hash=sha256:0384d9f3af49837baa92f559d3fa673e6d2652a16550a9ee07fc08c736f5e6f8 \ + --hash=sha256:1b333cfbca1762ff15808a0ef4f71b5d3eed8528b23ea1c3fb50543c867d68de \ + --hash=sha256:1fdeb0a0f64f2a874a4c1f5271f06e40e1e9779bf55f9567f149466fc7a55038 \ + --hash=sha256:4c653e4846f287051599ed8f4b3c044b80e540e88feec76b11044ddc5612ffed \ + --hash=sha256:563514c7dc504698fb66bb1cf897657a173a496406f1866afae73ab5b3cdb334 \ + --hash=sha256:5b231afd6a6e951381b9ef09a1223b1feabe13625388db48a8690f8daa9b71ff \ + --hash=sha256:5ce6a09042b6da16d773d2110e44f169683d8cc8687e79ec6d1181a72cb028d2 \ + --hash=sha256:5e7647df0f8fc947388e6251d728189cfadb3b1e558407f93254e35abc026e22 \ + --hash=sha256:6003de687c13196e8a1243a5e4bcce617d79b88f83ee6625437e335d89dfebe2 \ + --hash=sha256:61504b9a5ae166ba5ecfed9e93357fd51aa693d3d434b582a925338a2ff57fd2 \ + --hash=sha256:77423570c04aca807508a492037abbd72b12a1fb25a385847d191cd50b2c9605 \ + --hash=sha256:a4d9898f46446bfb6405383b57b96737dcfd0a7f25b748e78ef3e8c576bba3cb \ + --hash=sha256:a952b8bc0ae278fc6316e6384f67bb9a396eb30aced6ad034d3a76120ebcc519 \ + --hash=sha256:b5b5bd0ffb11b4aba2bb6d31b8643902c48f990cc92fda4e21afac658044f0c0 \ + --hash=sha256:ca75ecf2783395ca3016a5e455cb322ba26b6d33b4b413fcdedfc632e67941dc \ + --hash=sha256:cf9c261958a769a3bd38c3e133801ebcd284ffb734ea12d01457cb09eacf7d7b \ + --hash=sha256:dd4d670eee9610bf61c25c940e9ade2d0ed05eb44227275cce88701fee014b1f \ + --hash=sha256:e19736af56947addedce4674c0971e5dceef1b5ec7d667fe86bcd2b07f8f9075 \ + --hash=sha256:eaea21d150fb26d7b4856766e7addcf929119dd19fc832b22e71d942835201ef \ + --hash=sha256:eaff8156016487c1af5ffa5304c3e3fd183edcb412f3e9c72db349faf3f6e0eb \ + --hash=sha256:ee0a36edd332ed2c5208565ae6e3a7afc0eabb53f5327e281f2ef03a6bc7687a \ + --hash=sha256:ef7beb2a3582eb7a9f37beaf38a28acfd801988cde688760aea9e6cc4832b10b + # via -r requirements.in +mypy-extensions==0.4.3 \ + --hash=sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d \ + --hash=sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8 + # via + # black + # mypy +numpy==1.21.6 \ + --hash=sha256:1dbe1c91269f880e364526649a52eff93ac30035507ae980d2fed33aaee633ac \ + --hash=sha256:357768c2e4451ac241465157a3e929b265dfac85d9214074985b1786244f2ef3 \ + --hash=sha256:3820724272f9913b597ccd13a467cc492a0da6b05df26ea09e78b171a0bb9da6 \ + --hash=sha256:4391bd07606be175aafd267ef9bea87cf1b8210c787666ce82073b05f202add1 \ + --hash=sha256:4aa48afdce4660b0076a00d80afa54e8a97cd49f457d68a4342d188a09451c1a \ + --hash=sha256:58459d3bad03343ac4b1b42ed14d571b8743dc80ccbf27444f266729df1d6f5b \ + --hash=sha256:5c3c8def4230e1b959671eb959083661b4a0d2e9af93ee339c7dada6759a9470 \ + --hash=sha256:5f30427731561ce75d7048ac254dbe47a2ba576229250fb60f0fb74db96501a1 \ + --hash=sha256:643843bcc1c50526b3a71cd2ee561cf0d8773f062c8cbaf9ffac9fdf573f83ab \ + --hash=sha256:67c261d6c0a9981820c3a149d255a76918278a6b03b6a036800359aba1256d46 \ + --hash=sha256:67f21981ba2f9d7ba9ade60c9e8cbaa8cf8e9ae51673934480e45cf55e953673 \ + --hash=sha256:6aaf96c7f8cebc220cdfc03f1d5a31952f027dda050e5a703a0d1c396075e3e7 \ + --hash=sha256:7c4068a8c44014b2d55f3c3f574c376b2494ca9cc73d2f1bd692382b6dffe3db \ + --hash=sha256:7c7e5fa88d9ff656e067876e4736379cc962d185d5cd808014a8a928d529ef4e \ + --hash=sha256:7f5ae4f304257569ef3b948810816bc87c9146e8c446053539947eedeaa32786 \ + --hash=sha256:82691fda7c3f77c90e62da69ae60b5ac08e87e775b09813559f8901a88266552 \ + --hash=sha256:8737609c3bbdd48e380d463134a35ffad3b22dc56295eff6f79fd85bd0eeeb25 \ + --hash=sha256:9f411b2c3f3d76bba0865b35a425157c5dcf54937f82bbeb3d3c180789dd66a6 \ + --hash=sha256:a6be4cb0ef3b8c9250c19cc122267263093eee7edd4e3fa75395dfda8c17a8e2 \ + --hash=sha256:bcb238c9c96c00d3085b264e5c1a1207672577b93fa666c3b14a45240b14123a \ + --hash=sha256:bf2ec4b75d0e9356edea834d1de42b31fe11f726a81dfb2c2112bc1eaa508fcf \ + --hash=sha256:d136337ae3cc69aa5e447e78d8e1514be8c3ec9b54264e680cf0b4bd9011574f \ + --hash=sha256:d4bf4d43077db55589ffc9009c0ba0a94fa4908b9586d6ccce2e0b164c86303c \ + --hash=sha256:d6a96eef20f639e6a97d23e57dd0c1b1069a7b4fd7027482a4c5c451cd7732f4 \ + --hash=sha256:d9caa9d5e682102453d96a0ee10c7241b72859b01a941a397fd965f23b3e016b \ + --hash=sha256:dd1c8f6bd65d07d3810b90d02eba7997e32abbdf1277a481d698969e921a3be0 \ + --hash=sha256:e31f0bb5928b793169b87e3d1e070f2342b22d5245c755e2b81caa29756246c3 \ + --hash=sha256:ecb55251139706669fdec2ff073c98ef8e9a84473e51e716211b41aa0f18e656 \ + --hash=sha256:ee5ec40fdd06d62fe5d4084bef4fd50fd4bb6bfd2bf519365f569dc470163ab0 \ + --hash=sha256:f17e562de9edf691a42ddb1eb4a5541c20dd3f9e65b09ded2beb0799c0cf29bb \ + --hash=sha256:fdffbfb6832cd0b300995a2b08b8f6fa9f6e856d562800fea9182316d99c4e8e + # via + # -r requirements.in + # pyarrow +packaging==21.3 \ + --hash=sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb \ + --hash=sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522 + # via pytest +pathspec==0.9.0 \ + --hash=sha256:7d15c4ddb0b5c802d161efc417ec1a2558ea2653c2e8ad9c19098201dc1c993a \ + --hash=sha256:e564499435a2673d586f6b2130bb5b95f04a3ba06f81b8f895b651a3c76aabb1 + # via black +platformdirs==2.5.2 \ + --hash=sha256:027d8e83a2d7de06bbac4e5ef7e023c02b863d7ea5d079477e722bb41ab25788 \ + --hash=sha256:58c8abb07dcb441e6ee4b11d8df0ac856038f944ab98b7be6b27b2a3c7feef19 + # via black +pluggy==1.0.0 \ + --hash=sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159 \ + --hash=sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3 + # via pytest +py==1.11.0 \ + --hash=sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719 \ + --hash=sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378 + # via pytest +pyarrow==8.0.0 \ + --hash=sha256:03a10daad957970e914920b793f6a49416699e791f4c827927fd4e4d892a5d16 \ + --hash=sha256:15511ce2f50343f3fd5e9f7c30e4d004da9134e9597e93e9c96c3985928cbe82 \ + --hash=sha256:1dd482ccb07c96188947ad94d7536ab696afde23ad172df8e18944ec79f55055 \ + --hash=sha256:25a5f7c7f36df520b0b7363ba9f51c3070799d4b05d587c60c0adaba57763479 \ + --hash=sha256:3bd201af6e01f475f02be88cf1f6ee9856ab98c11d8bbb6f58347c58cd07be00 \ + --hash=sha256:3fee786259d986f8c046100ced54d63b0c8c9f7cdb7d1bbe07dc69e0f928141c \ + --hash=sha256:42b7982301a9ccd06e1dd4fabd2e8e5df74b93ce4c6b87b81eb9e2d86dc79871 \ + --hash=sha256:4a18a211ed888f1ac0b0ebcb99e2d9a3e913a481120ee9b1fe33d3fedb945d4e \ + --hash=sha256:51e58778fcb8829fca37fbfaea7f208d5ce7ea89ea133dd13d8ce745278ee6f0 \ + --hash=sha256:541e7845ce5f27a861eb5b88ee165d931943347eec17b9ff1e308663531c9647 \ + --hash=sha256:65c7f4cc2be195e3db09296d31a654bb6d8786deebcab00f0e2455fd109d7456 \ + --hash=sha256:69b043a3fce064ebd9fbae6abc30e885680296e5bd5e6f7353e6a87966cf2ad7 \ + --hash=sha256:6ea2c54e6b5ecd64e8299d2abb40770fe83a718f5ddc3825ddd5cd28e352cce1 \ + --hash=sha256:78a6ac39cd793582998dac88ab5c1c1dd1e6503df6672f064f33a21937ec1d8d \ + --hash=sha256:81b87b782a1366279411f7b235deab07c8c016e13f9af9f7c7b0ee564fedcc8f \ + --hash=sha256:8392b9a1e837230090fe916415ed4c3433b2ddb1a798e3f6438303c70fbabcfc \ + --hash=sha256:863be6bad6c53797129610930794a3e797cb7d41c0a30e6794a2ac0e42ce41b8 \ + --hash=sha256:8cd86e04a899bef43e25184f4b934584861d787cf7519851a8c031803d45c6d8 \ + --hash=sha256:95c7822eb37663e073da9892f3499fe28e84f3464711a3e555e0c5463fd53a19 \ + --hash=sha256:98c13b2e28a91b0fbf24b483df54a8d7814c074c2623ecef40dce1fa52f6539b \ + --hash=sha256:ba2b7aa7efb59156b87987a06f5241932914e4d5bbb74a465306b00a6c808849 \ + --hash=sha256:c9c97c8e288847e091dfbcdf8ce51160e638346f51919a9e74fe038b2e8aee62 \ + --hash=sha256:cb06cacc19f3b426681f2f6803cc06ff481e7fe5b3a533b406bc5b2138843d4f \ + --hash=sha256:ce64bc1da3109ef5ab9e4c60316945a7239c798098a631358e9ab39f6e5529e9 \ + --hash=sha256:d5ef4372559b191cafe7db8932801eee252bfc35e983304e7d60b6954576a071 \ + --hash=sha256:d6f1e1040413651819074ef5b500835c6c42e6c446532a1ddef8bc5054e8dba5 \ + --hash=sha256:deb400df8f19a90b662babceb6dd12daddda6bb357c216e558b207c0770c7654 \ + --hash=sha256:ea132067ec712d1b1116a841db1c95861508862b21eddbcafefbce8e4b96b867 \ + --hash=sha256:ece333706a94c1221ced8b299042f85fd88b5db802d71be70024433ddf3aecab \ + --hash=sha256:edad25522ad509e534400d6ab98cf1872d30c31bc5e947712bfd57def7af15bb + # via -r requirements.in +pycodestyle==2.8.0 \ + --hash=sha256:720f8b39dde8b293825e7ff02c475f3077124006db4f440dcbc9a20b76548a20 \ + --hash=sha256:eddd5847ef438ea1c7870ca7eb78a9d47ce0cdb4851a5523949f2601d0cbbe7f + # via flake8 +pyflakes==2.4.0 \ + --hash=sha256:05a85c2872edf37a4ed30b0cce2f6093e1d0581f8c19d7393122da7e25b2b24c \ + --hash=sha256:3bb3a3f256f4b7968c9c788781e4ff07dce46bdf12339dcda61053375426ee2e + # via flake8 +pyparsing==3.0.9 \ + --hash=sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb \ + --hash=sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc + # via packaging +pytest==7.1.2 \ + --hash=sha256:13d0e3ccfc2b6e26be000cb6568c832ba67ba32e719443bfe725814d3c42433c \ + --hash=sha256:a06a0425453864a270bc45e71f783330a7428defb4230fb5e6a731fde06ecd45 + # via -r requirements.in +toml==0.10.2 \ + --hash=sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b \ + --hash=sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f + # via -r requirements.in +tomli==2.0.1 \ + --hash=sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc \ + --hash=sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f + # via + # black + # maturin + # mypy + # pytest +typed-ast==1.5.3 \ + --hash=sha256:20d5118e494478ef2d3a2702d964dae830aedd7b4d3b626d003eea526be18718 \ + --hash=sha256:27e46cdd01d6c3a0dd8f728b6a938a6751f7bd324817501c15fb056307f918c6 \ + --hash=sha256:27f25232e2dd0edfe1f019d6bfaaf11e86e657d9bdb7b0956db95f560cceb2b3 \ + --hash=sha256:3042bfc9ca118712c9809201f55355479cfcdc17449f9f8db5e744e9625c6805 \ + --hash=sha256:37e5349d1d5de2f4763d534ccb26809d1c24b180a477659a12c4bde9dd677d74 \ + --hash=sha256:4fff9fdcce59dc61ec1b317bdb319f8f4e6b69ebbe61193ae0a60c5f9333dc49 \ + --hash=sha256:542cd732351ba8235f20faa0fc7398946fe1a57f2cdb289e5497e1e7f48cfedb \ + --hash=sha256:5dc2c11ae59003d4a26dda637222d9ae924387f96acae9492df663843aefad55 \ + --hash=sha256:8831479695eadc8b5ffed06fdfb3e424adc37962a75925668deeb503f446c0a3 \ + --hash=sha256:8cdf91b0c466a6c43f36c1964772918a2c04cfa83df8001ff32a89e357f8eb06 \ + --hash=sha256:8e0b8528838ffd426fea8d18bde4c73bcb4167218998cc8b9ee0a0f2bfe678a6 \ + --hash=sha256:8ef1d96ad05a291f5c36895d86d1375c0ee70595b90f6bb5f5fdbee749b146db \ + --hash=sha256:9ad3b48cf2b487be140072fb86feff36801487d4abb7382bb1929aaac80638ea \ + --hash=sha256:9cc9e1457e1feb06b075c8ef8aeb046a28ec351b1958b42c7c31c989c841403a \ + --hash=sha256:9e237e74fd321a55c90eee9bc5d44be976979ad38a29bbd734148295c1ce7617 \ + --hash=sha256:c9f1a27592fac87daa4e3f16538713d705599b0a27dfe25518b80b6b017f0a6d \ + --hash=sha256:d64dabc6336ddc10373922a146fa2256043b3b43e61f28961caec2a5207c56d5 \ + --hash=sha256:e20d196815eeffb3d76b75223e8ffed124e65ee62097e4e73afb5fec6b993e7a \ + --hash=sha256:e34f9b9e61333ecb0f7d79c21c28aa5cd63bec15cb7e1310d7d3da6ce886bc9b \ + --hash=sha256:ed44e81517364cb5ba367e4f68fca01fba42a7a4690d40c07886586ac267d9b9 \ + --hash=sha256:ee852185964744987609b40aee1d2eb81502ae63ee8eef614558f96a56c1902d \ + --hash=sha256:f60d9de0d087454c91b3999a296d0c4558c1666771e3460621875021bf899af9 \ + --hash=sha256:f818c5b81966d4728fec14caa338e30a70dfc3da577984d38f97816c4b3071ec \ + --hash=sha256:fd5df1313915dbd70eaaa88c19030b441742e8b05e6103c631c83b75e0435ccc + # via + # black + # mypy +typing-extensions==4.2.0 \ + --hash=sha256:6657594ee297170d19f67d55c05852a874e7eb634f4f753dbd667855e07c1708 \ + --hash=sha256:f1c24655a0da0d1b67f07e17a5e6b2a105894e6824b92096378bb3668ef02376 + # via + # black + # importlib-metadata + # mypy +zipp==3.8.0 \ + --hash=sha256:56bf8aadb83c24db6c4b577e13de374ccfb67da2078beba1d037c17980bf43ad \ + --hash=sha256:c4f6e5bbf48e74f7a38e7cc5b0480ff42b0ae5178957d564d18932525d5cf099 + # via importlib-metadata diff --git a/ballista-python/requirements.in b/ballista-python/requirements.in new file mode 100644 index 000000000..7ee6a48dc --- /dev/null +++ b/ballista-python/requirements.in @@ -0,0 +1,27 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +black +flake8 +isort +maturin +mypy +numpy +pyarrow +pytest +toml +importlib_metadata; python_version < "3.8" diff --git a/ballista-python/src/ballista_context.rs b/ballista-python/src/ballista_context.rs new file mode 100644 index 000000000..e3bf21923 --- /dev/null +++ b/ballista-python/src/ballista_context.rs @@ -0,0 +1,125 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use pyo3::exceptions::PyValueError; +use pyo3::prelude::*; +use std::path::PathBuf; + +use crate::errors::BallistaError; +use crate::utils::wait_for_future; + +use crate::dataframe::PyDataFrame; +use ballista::prelude::*; +use datafusion::arrow::datatypes::Schema; +use datafusion::prelude::{AvroReadOptions, CsvReadOptions, ParquetReadOptions}; + +/// `PyBallistaContext` is able to plan and execute DataFusion plans. +/// It has a powerful optimizer, a physical planner for local execution, and a +/// multi-threaded execution engine to perform the execution. +#[pyclass(name = "BallistaContext", module = "ballista", subclass, unsendable)] +pub(crate) struct PyBallistaContext { + ctx: BallistaContext, +} + +#[pymethods] +impl PyBallistaContext { + #[new] + #[args(port = "50050")] + fn new(py: Python, host: &str, port: u16) -> PyResult { + let config = BallistaConfig::builder() + .set("ballista.shuffle.partitions", "4") + .build() + .map_err(BallistaError::from)?; + + let result = BallistaContext::remote(host, port, &config); + let ctx = wait_for_future(py, result).map_err(BallistaError::from)?; + + Ok(PyBallistaContext { ctx }) + } + + /// Returns a PyDataFrame whose plan corresponds to the SQL statement. + fn sql(&mut self, query: &str, py: Python) -> PyResult { + let ctx = &self.ctx; + + let result = ctx.sql(query); + let df = wait_for_future(py, result).map_err(BallistaError::from)?; + Ok(PyDataFrame::new(df)) + } + + #[allow(clippy::too_many_arguments)] + #[args( + schema = "None", + has_header = "true", + delimiter = "\",\"", + schema_infer_max_records = "1000", + file_extension = "\".csv\"" + )] + fn register_csv( + &mut self, + name: &str, + path: PathBuf, + schema: Option, + has_header: bool, + delimiter: &str, + schema_infer_max_records: usize, + file_extension: &str, + py: Python, + ) -> PyResult<()> { + let ctx = &self.ctx; + + let path = path + .to_str() + .ok_or_else(|| PyValueError::new_err("Unable to convert path to a string"))?; + + let delimiter = delimiter.as_bytes(); + if delimiter.len() != 1 { + return Err(PyValueError::new_err( + "Delimiter must be a single character", + )); + } + + let mut options = CsvReadOptions::new() + .has_header(has_header) + .delimiter(delimiter[0]) + .schema_infer_max_records(schema_infer_max_records) + .file_extension(file_extension); + options.schema = schema.as_ref(); + + let result = ctx.register_csv(name, path, options); + wait_for_future(py, result).map_err(BallistaError::from)?; + + Ok(()) + } + + fn register_avro(&mut self, name: &str, path: &str, py: Python) -> PyResult<()> { + let ctx = &self.ctx; + + let result = ctx.register_avro(name, path, AvroReadOptions::default()); + wait_for_future(py, result).map_err(BallistaError::from)?; + + Ok(()) + } + + fn register_parquet(&mut self, name: &str, path: &str, py: Python) -> PyResult<()> { + let ctx = &self.ctx; + + let result = ctx.register_parquet(name, path, ParquetReadOptions::default()); + wait_for_future(py, result).map_err(BallistaError::from)?; + + Ok(()) + } +} diff --git a/ballista-python/src/dataframe.rs b/ballista-python/src/dataframe.rs new file mode 100644 index 000000000..cff1733fb --- /dev/null +++ b/ballista-python/src/dataframe.rs @@ -0,0 +1,163 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::errors::DataFusionError; +use crate::expression::PyExpr; +use crate::utils::wait_for_future; +use datafusion::arrow::datatypes::Schema; +use datafusion::arrow::pyarrow::PyArrowConvert; +use datafusion::arrow::util::pretty; +use datafusion::dataframe::DataFrame; +use datafusion::logical_plan::JoinType; +use pyo3::exceptions::PyTypeError; +use pyo3::prelude::*; +use pyo3::types::PyTuple; +use std::sync::Arc; + +/// A PyDataFrame is a representation of a logical plan and an API to compose statements. +/// Use it to build a plan and `.collect()` to execute the plan and collect the result. +/// The actual execution of a plan runs natively on Rust and Arrow on a multi-threaded environment. +#[pyclass(name = "DataFrame", module = "ballista", subclass)] +#[derive(Clone)] +pub(crate) struct PyDataFrame { + df: Arc, +} + +impl PyDataFrame { + /// creates a new PyDataFrame + pub fn new(df: Arc) -> Self { + Self { df } + } +} + +#[pymethods] +impl PyDataFrame { + fn __getitem__(&self, key: PyObject) -> PyResult { + Python::with_gil(|py| { + if let Ok(key) = key.extract::<&str>(py) { + self.select_columns(vec![key]) + } else if let Ok(tuple) = key.extract::<&PyTuple>(py) { + let keys = tuple + .iter() + .map(|item| item.extract::<&str>()) + .collect::>>()?; + self.select_columns(keys) + } else if let Ok(keys) = key.extract::>(py) { + self.select_columns(keys) + } else { + let message = "DataFrame can only be indexed by string index or indices"; + Err(PyTypeError::new_err(message)) + } + }) + } + + /// Returns the schema from the logical plan + fn schema(&self) -> Schema { + self.df.schema().into() + } + + #[args(args = "*")] + fn select_columns(&self, args: Vec<&str>) -> PyResult { + let df = self.df.select_columns(&args)?; + Ok(Self::new(df)) + } + + #[args(args = "*")] + fn select(&self, args: Vec) -> PyResult { + let expr = args.into_iter().map(|e| e.into()).collect(); + let df = self.df.select(expr)?; + Ok(Self::new(df)) + } + + fn filter(&self, predicate: PyExpr) -> PyResult { + let df = self.df.filter(predicate.into())?; + Ok(Self::new(df)) + } + + fn aggregate(&self, group_by: Vec, aggs: Vec) -> PyResult { + let group_by = group_by.into_iter().map(|e| e.into()).collect(); + let aggs = aggs.into_iter().map(|e| e.into()).collect(); + let df = self.df.aggregate(group_by, aggs)?; + Ok(Self::new(df)) + } + + #[args(exprs = "*")] + fn sort(&self, exprs: Vec) -> PyResult { + let exprs = exprs.into_iter().map(|e| e.into()).collect(); + let df = self.df.sort(exprs)?; + Ok(Self::new(df)) + } + + fn limit(&self, count: usize) -> PyResult { + let df = self.df.limit(count)?; + Ok(Self::new(df)) + } + + /// Executes the plan, returning a list of `RecordBatch`es. + /// Unless some order is specified in the plan, there is no + /// guarantee of the order of the result. + fn collect(&self, py: Python) -> PyResult> { + let batches = wait_for_future(py, self.df.collect())?; + // cannot use PyResult> return type due to + // https://github.com/PyO3/pyo3/issues/1813 + batches.into_iter().map(|rb| rb.to_pyarrow(py)).collect() + } + + /// Print the result, 20 lines by default + #[args(num = "20")] + fn show(&self, py: Python, num: usize) -> PyResult<()> { + let df = self.df.limit(num)?; + let batches = wait_for_future(py, df.collect())?; + Ok(pretty::print_batches(&batches)?) + } + + fn join( + &self, + right: PyDataFrame, + join_keys: (Vec<&str>, Vec<&str>), + how: &str, + ) -> PyResult { + let join_type = match how { + "inner" => JoinType::Inner, + "left" => JoinType::Left, + "right" => JoinType::Right, + "full" => JoinType::Full, + "semi" => JoinType::Semi, + "anti" => JoinType::Anti, + how => { + return Err(DataFusionError::Common(format!( + "The join type {} does not exist or is not implemented", + how + )) + .into()) + } + }; + + let df = self + .df + .join(right.df, join_type, &join_keys.0, &join_keys.1, None)?; + Ok(Self::new(df)) + } + + /// Print the query plan + #[args(verbose = false, analyze = false)] + fn explain(&self, py: Python, verbose: bool, analyze: bool) -> PyResult<()> { + let df = self.df.explain(verbose, analyze)?; + let batches = wait_for_future(py, df.collect())?; + Ok(pretty::print_batches(&batches)?) + } +} diff --git a/ballista-python/src/errors.rs b/ballista-python/src/errors.rs new file mode 100644 index 000000000..7aecbed90 --- /dev/null +++ b/ballista-python/src/errors.rs @@ -0,0 +1,95 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use core::fmt; + +use ballista::prelude::BallistaError as InnerBallistaError; +use datafusion::arrow::error::ArrowError; +use datafusion::error::DataFusionError as InnerDataFusionError; +use pyo3::{exceptions::PyException, PyErr}; + +#[derive(Debug)] +pub enum DataFusionError { + ExecutionError(InnerDataFusionError), + ArrowError(ArrowError), + Common(String), +} + +impl fmt::Display for DataFusionError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + DataFusionError::ExecutionError(e) => write!(f, "DataFusion error: {:?}", e), + DataFusionError::ArrowError(e) => write!(f, "Arrow error: {:?}", e), + DataFusionError::Common(e) => write!(f, "{}", e), + } + } +} + +impl From for DataFusionError { + fn from(err: ArrowError) -> DataFusionError { + DataFusionError::ArrowError(err) + } +} + +impl From for DataFusionError { + fn from(err: InnerDataFusionError) -> DataFusionError { + DataFusionError::ExecutionError(err) + } +} + +impl From for PyErr { + fn from(err: DataFusionError) -> PyErr { + PyException::new_err(err.to_string()) + } +} + +impl From for BallistaError { + fn from(err: InnerDataFusionError) -> BallistaError { + BallistaError::DataFusionExecutionError(err) + } +} + +impl From for BallistaError { + fn from(err: InnerBallistaError) -> BallistaError { + BallistaError::ExecutionError(err) + } +} + +impl fmt::Display for BallistaError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + BallistaError::DataFusionExecutionError(e) => write!(f, "Datafusion error: {:?}", e), + BallistaError::ExecutionError(e) => write!(f, "Ballista error: {:?}", e), + BallistaError::ArrowError(e) => write!(f, "Arrow error: {:?}", e), + BallistaError::Common(e) => write!(f, "{}", e), + } + } +} + +#[derive(Debug)] +pub enum BallistaError { + DataFusionExecutionError(InnerDataFusionError), + ExecutionError(InnerBallistaError), + ArrowError(ArrowError), + Common(String), +} + +impl From for PyErr { + fn from(err: BallistaError) -> PyErr { + PyException::new_err(err.to_string()) + } +} diff --git a/ballista-python/src/expression.rs b/ballista-python/src/expression.rs new file mode 100644 index 000000000..b3275ccf0 --- /dev/null +++ b/ballista-python/src/expression.rs @@ -0,0 +1,137 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use pyo3::{basic::CompareOp, prelude::*}; +use std::convert::{From, Into}; + +use datafusion::arrow::datatypes::DataType; +use datafusion::logical_plan::{col, lit, Expr}; + +use datafusion::scalar::ScalarValue; + +/// An PyExpr that can be used on a DataFrame +#[pyclass(name = "Expression", module = "ballista", subclass)] +#[derive(Debug, Clone)] +pub(crate) struct PyExpr { + pub(crate) expr: Expr, +} + +impl From for Expr { + fn from(expr: PyExpr) -> Expr { + expr.expr + } +} + +impl From for PyExpr { + fn from(expr: Expr) -> PyExpr { + PyExpr { expr } + } +} + +#[pymethods] +impl PyExpr { + fn __richcmp__(&self, other: PyExpr, op: CompareOp) -> PyExpr { + let expr = match op { + CompareOp::Lt => self.expr.clone().lt(other.expr), + CompareOp::Le => self.expr.clone().lt_eq(other.expr), + CompareOp::Eq => self.expr.clone().eq(other.expr), + CompareOp::Ne => self.expr.clone().not_eq(other.expr), + CompareOp::Gt => self.expr.clone().gt(other.expr), + CompareOp::Ge => self.expr.clone().gt_eq(other.expr), + }; + expr.into() + } + + fn __str__(&self) -> PyResult { + Ok(format!("{}", self.expr)) + } + + fn __add__(&self, rhs: PyExpr) -> PyResult { + Ok((self.expr.clone() + rhs.expr).into()) + } + + fn __sub__(&self, rhs: PyExpr) -> PyResult { + Ok((self.expr.clone() - rhs.expr).into()) + } + + fn __truediv__(&self, rhs: PyExpr) -> PyResult { + Ok((self.expr.clone() / rhs.expr).into()) + } + + fn __mul__(&self, rhs: PyExpr) -> PyResult { + Ok((self.expr.clone() * rhs.expr).into()) + } + + fn __mod__(&self, rhs: PyExpr) -> PyResult { + Ok(self.expr.clone().modulus(rhs.expr).into()) + } + + fn __and__(&self, rhs: PyExpr) -> PyResult { + Ok(self.expr.clone().and(rhs.expr).into()) + } + + fn __or__(&self, rhs: PyExpr) -> PyResult { + Ok(self.expr.clone().or(rhs.expr).into()) + } + + fn __invert__(&self) -> PyResult { + Ok(self.expr.clone().not().into()) + } + + fn __getitem__(&self, key: &str) -> PyResult { + Ok(Expr::GetIndexedField { + expr: Box::new(self.expr.clone()), + key: ScalarValue::Utf8(Some(key.to_string())), + } + .into()) + } + + #[staticmethod] + pub fn literal(value: ScalarValue) -> PyExpr { + lit(value).into() + } + + #[staticmethod] + pub fn column(value: &str) -> PyExpr { + col(value).into() + } + + /// assign a name to the PyExpr + pub fn alias(&self, name: &str) -> PyExpr { + self.expr.clone().alias(name).into() + } + + /// Create a sort PyExpr from an existing PyExpr. + #[args(ascending = true, nulls_first = true)] + pub fn sort(&self, ascending: bool, nulls_first: bool) -> PyExpr { + self.expr.clone().sort(ascending, nulls_first).into() + } + + pub fn is_null(&self) -> PyExpr { + self.expr.clone().is_null().into() + } + + pub fn cast(&self, to: DataType) -> PyExpr { + // self.expr.cast_to() requires DFSchema to validate that the cast + // is supported, omit that for now + let expr = Expr::Cast { + expr: Box::new(self.expr.clone()), + data_type: to, + }; + expr.into() + } +} diff --git a/ballista-python/src/functions.rs b/ballista-python/src/functions.rs new file mode 100644 index 000000000..1a2c4ed73 --- /dev/null +++ b/ballista-python/src/functions.rs @@ -0,0 +1,338 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use pyo3::{prelude::*, wrap_pyfunction}; + +use datafusion::logical_plan; +use datafusion::physical_plan::aggregates::AggregateFunction; +use datafusion_expr::BuiltinScalarFunction; + +use crate::errors; +use crate::expression::PyExpr; + +#[pyfunction] +fn array(value: Vec) -> PyExpr { + PyExpr { + expr: logical_plan::array(value.into_iter().map(|x| x.expr).collect::>()), + } +} + +#[pyfunction] +fn in_list(expr: PyExpr, value: Vec, negated: bool) -> PyExpr { + logical_plan::in_list( + expr.expr, + value.into_iter().map(|x| x.expr).collect::>(), + negated, + ) + .into() +} + +/// Current date and time +#[pyfunction] +fn now() -> PyExpr { + PyExpr { + // here lit(0) is a stub for conform to arity + expr: logical_plan::now(logical_plan::lit(0)), + } +} + +/// Returns a random value in the range 0.0 <= x < 1.0 +#[pyfunction] +fn random() -> PyExpr { + PyExpr { + expr: logical_plan::random(), + } +} + +/// Computes a binary hash of the given data. type is the algorithm to use. +/// Standard algorithms are md5, sha224, sha256, sha384, sha512, blake2s, blake2b, and blake3. +#[pyfunction(value, method)] +fn digest(value: PyExpr, method: PyExpr) -> PyExpr { + PyExpr { + expr: logical_plan::digest(value.expr, method.expr), + } +} + +/// Concatenates the text representations of all the arguments. +/// NULL arguments are ignored. +#[pyfunction(args = "*")] +fn concat(args: Vec) -> PyResult { + let args = args.into_iter().map(|e| e.expr).collect::>(); + Ok(logical_plan::concat(&args).into()) +} + +/// Concatenates all but the first argument, with separators. +/// The first argument is used as the separator string, and should not be NULL. +/// Other NULL arguments are ignored. +#[pyfunction(sep, args = "*")] +fn concat_ws(sep: String, args: Vec) -> PyResult { + let args = args.into_iter().map(|e| e.expr).collect::>(); + Ok(logical_plan::concat_ws(sep, &args).into()) +} + +/// Creates a new Sort expression +#[pyfunction] +fn order_by(expr: PyExpr, asc: Option, nulls_first: Option) -> PyResult { + Ok(PyExpr { + expr: datafusion::logical_plan::Expr::Sort { + expr: Box::new(expr.expr), + asc: asc.unwrap_or(true), + nulls_first: nulls_first.unwrap_or(true), + }, + }) +} + +/// Creates a new Alias expression +#[pyfunction] +fn alias(expr: PyExpr, name: &str) -> PyResult { + Ok(PyExpr { + expr: datafusion::logical_plan::Expr::Alias(Box::new(expr.expr), String::from(name)), + }) +} + +/// Creates a new Window function expression +#[pyfunction] +fn window( + name: &str, + args: Vec, + partition_by: Option>, + order_by: Option>, +) -> PyResult { + use std::str::FromStr; + let fun = datafusion_expr::window_function::WindowFunction::from_str(name) + .map_err(|e| -> errors::DataFusionError { e.into() })?; + Ok(PyExpr { + expr: datafusion::logical_plan::Expr::WindowFunction { + fun, + args: args.into_iter().map(|x| x.expr).collect::>(), + partition_by: partition_by + .unwrap_or_default() + .into_iter() + .map(|x| x.expr) + .collect::>(), + order_by: order_by + .unwrap_or_default() + .into_iter() + .map(|x| x.expr) + .collect::>(), + window_frame: None, + }, + }) +} + +macro_rules! scalar_function { + ($NAME: ident, $FUNC: ident) => { + scalar_function!($NAME, $FUNC, stringify!($NAME)); + }; + ($NAME: ident, $FUNC: ident, $DOC: expr) => { + #[doc = $DOC] + #[pyfunction(args = "*")] + fn $NAME(args: Vec) -> PyExpr { + let expr = logical_plan::Expr::ScalarFunction { + fun: BuiltinScalarFunction::$FUNC, + args: args.into_iter().map(|e| e.into()).collect(), + }; + expr.into() + } + }; +} + +macro_rules! aggregate_function { + ($NAME: ident, $FUNC: ident) => { + aggregate_function!($NAME, $FUNC, stringify!($NAME)); + }; + ($NAME: ident, $FUNC: ident, $DOC: expr) => { + #[doc = $DOC] + #[pyfunction(args = "*", distinct = "false")] + fn $NAME(args: Vec, distinct: bool) -> PyExpr { + let expr = logical_plan::Expr::AggregateFunction { + fun: AggregateFunction::$FUNC, + args: args.into_iter().map(|e| e.into()).collect(), + distinct, + }; + expr.into() + } + }; +} + +scalar_function!(abs, Abs); +scalar_function!(acos, Acos); +scalar_function!(ascii, Ascii, "Returns the numeric code of the first character of the argument. In UTF8 encoding, returns the Unicode code point of the character. In other multibyte encodings, the argument must be an ASCII character."); +scalar_function!(asin, Asin); +scalar_function!(atan, Atan); +scalar_function!( + bit_length, + BitLength, + "Returns number of bits in the string (8 times the octet_length)." +); +scalar_function!(btrim, Btrim, "Removes the longest string containing only characters in characters (a space by default) from the start and end of string."); +scalar_function!(ceil, Ceil); +scalar_function!( + character_length, + CharacterLength, + "Returns number of characters in the string." +); +scalar_function!(chr, Chr, "Returns the character with the given code."); +scalar_function!(cos, Cos); +scalar_function!(exp, Exp); +scalar_function!(floor, Floor); +scalar_function!(initcap, InitCap, "Converts the first letter of each word to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters."); +scalar_function!(left, Left, "Returns first n characters in the string, or when n is negative, returns all but last |n| characters."); +scalar_function!(ln, Ln); +scalar_function!(log10, Log10); +scalar_function!(log2, Log2); +scalar_function!(lower, Lower, "Converts the string to all lower case"); +scalar_function!(lpad, Lpad, "Extends the string to length length by prepending the characters fill (a space by default). If the string is already longer than length then it is truncated (on the right)."); +scalar_function!(ltrim, Ltrim, "Removes the longest string containing only characters in characters (a space by default) from the start of string."); +scalar_function!( + md5, + MD5, + "Computes the MD5 hash of the argument, with the result written in hexadecimal." +); +scalar_function!(octet_length, OctetLength, "Returns number of bytes in the string. Since this version of the function accepts type character directly, it will not strip trailing spaces."); +scalar_function!(regexp_match, RegexpMatch); +scalar_function!( + regexp_replace, + RegexpReplace, + "Replaces substring(s) matching a POSIX regular expression" +); +scalar_function!( + repeat, + Repeat, + "Repeats string the specified number of times." +); +scalar_function!( + replace, + Replace, + "Replaces all occurrences in string of substring from with substring to." +); +scalar_function!( + reverse, + Reverse, + "Reverses the order of the characters in the string." +); +scalar_function!(right, Right, "Returns last n characters in the string, or when n is negative, returns all but first |n| characters."); +scalar_function!(round, Round); +scalar_function!(rpad, Rpad, "Extends the string to length length by appending the characters fill (a space by default). If the string is already longer than length then it is truncated."); +scalar_function!(rtrim, Rtrim, "Removes the longest string containing only characters in characters (a space by default) from the end of string."); +scalar_function!(sha224, SHA224); +scalar_function!(sha256, SHA256); +scalar_function!(sha384, SHA384); +scalar_function!(sha512, SHA512); +scalar_function!(signum, Signum); +scalar_function!(sin, Sin); +scalar_function!( + split_part, + SplitPart, + "Splits string at occurrences of delimiter and returns the n'th field (counting from one)." +); +scalar_function!(sqrt, Sqrt); +scalar_function!( + starts_with, + StartsWith, + "Returns true if string starts with prefix." +); +scalar_function!(strpos, Strpos, "Returns starting index of specified substring within string, or zero if it's not present. (Same as position(substring in string), but note the reversed argument order.)"); +scalar_function!(substr, Substr); +scalar_function!(tan, Tan); +scalar_function!( + to_hex, + ToHex, + "Converts the number to its equivalent hexadecimal representation." +); +scalar_function!(to_timestamp, ToTimestamp); +scalar_function!(translate, Translate, "Replaces each character in string that matches a character in the from set with the corresponding character in the to set. If from is longer than to, occurrences of the extra characters in from are deleted."); +scalar_function!(trim, Trim, "Removes the longest string containing only characters in characters (a space by default) from the start, end, or both ends (BOTH is the default) of string."); +scalar_function!(trunc, Trunc); +scalar_function!(upper, Upper, "Converts the string to all upper case."); + +aggregate_function!(avg, Avg); +aggregate_function!(count, Count); +aggregate_function!(max, Max); +aggregate_function!(min, Min); +aggregate_function!(sum, Sum); +aggregate_function!(approx_distinct, ApproxDistinct); + +pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { + m.add_wrapped(wrap_pyfunction!(abs))?; + m.add_wrapped(wrap_pyfunction!(acos))?; + m.add_wrapped(wrap_pyfunction!(approx_distinct))?; + m.add_wrapped(wrap_pyfunction!(alias))?; + m.add_wrapped(wrap_pyfunction!(array))?; + m.add_wrapped(wrap_pyfunction!(ascii))?; + m.add_wrapped(wrap_pyfunction!(asin))?; + m.add_wrapped(wrap_pyfunction!(atan))?; + m.add_wrapped(wrap_pyfunction!(avg))?; + m.add_wrapped(wrap_pyfunction!(bit_length))?; + m.add_wrapped(wrap_pyfunction!(btrim))?; + m.add_wrapped(wrap_pyfunction!(ceil))?; + m.add_wrapped(wrap_pyfunction!(character_length))?; + m.add_wrapped(wrap_pyfunction!(chr))?; + m.add_wrapped(wrap_pyfunction!(concat_ws))?; + m.add_wrapped(wrap_pyfunction!(concat))?; + m.add_wrapped(wrap_pyfunction!(cos))?; + m.add_wrapped(wrap_pyfunction!(count))?; + m.add_wrapped(wrap_pyfunction!(digest))?; + m.add_wrapped(wrap_pyfunction!(exp))?; + m.add_wrapped(wrap_pyfunction!(floor))?; + m.add_wrapped(wrap_pyfunction!(in_list))?; + m.add_wrapped(wrap_pyfunction!(initcap))?; + m.add_wrapped(wrap_pyfunction!(left))?; + m.add_wrapped(wrap_pyfunction!(ln))?; + m.add_wrapped(wrap_pyfunction!(log10))?; + m.add_wrapped(wrap_pyfunction!(log2))?; + m.add_wrapped(wrap_pyfunction!(lower))?; + m.add_wrapped(wrap_pyfunction!(lpad))?; + m.add_wrapped(wrap_pyfunction!(ltrim))?; + m.add_wrapped(wrap_pyfunction!(max))?; + m.add_wrapped(wrap_pyfunction!(md5))?; + m.add_wrapped(wrap_pyfunction!(min))?; + m.add_wrapped(wrap_pyfunction!(now))?; + m.add_wrapped(wrap_pyfunction!(octet_length))?; + m.add_wrapped(wrap_pyfunction!(order_by))?; + m.add_wrapped(wrap_pyfunction!(random))?; + m.add_wrapped(wrap_pyfunction!(regexp_match))?; + m.add_wrapped(wrap_pyfunction!(regexp_replace))?; + m.add_wrapped(wrap_pyfunction!(repeat))?; + m.add_wrapped(wrap_pyfunction!(replace))?; + m.add_wrapped(wrap_pyfunction!(reverse))?; + m.add_wrapped(wrap_pyfunction!(right))?; + m.add_wrapped(wrap_pyfunction!(round))?; + m.add_wrapped(wrap_pyfunction!(rpad))?; + m.add_wrapped(wrap_pyfunction!(rtrim))?; + m.add_wrapped(wrap_pyfunction!(sha224))?; + m.add_wrapped(wrap_pyfunction!(sha256))?; + m.add_wrapped(wrap_pyfunction!(sha384))?; + m.add_wrapped(wrap_pyfunction!(sha512))?; + m.add_wrapped(wrap_pyfunction!(signum))?; + m.add_wrapped(wrap_pyfunction!(sin))?; + m.add_wrapped(wrap_pyfunction!(split_part))?; + m.add_wrapped(wrap_pyfunction!(sqrt))?; + m.add_wrapped(wrap_pyfunction!(starts_with))?; + m.add_wrapped(wrap_pyfunction!(strpos))?; + m.add_wrapped(wrap_pyfunction!(substr))?; + m.add_wrapped(wrap_pyfunction!(sum))?; + m.add_wrapped(wrap_pyfunction!(tan))?; + m.add_wrapped(wrap_pyfunction!(to_hex))?; + m.add_wrapped(wrap_pyfunction!(to_timestamp))?; + m.add_wrapped(wrap_pyfunction!(translate))?; + m.add_wrapped(wrap_pyfunction!(trim))?; + m.add_wrapped(wrap_pyfunction!(trunc))?; + m.add_wrapped(wrap_pyfunction!(upper))?; + m.add_wrapped(wrap_pyfunction!(window))?; + Ok(()) +} diff --git a/ballista-python/src/lib.rs b/ballista-python/src/lib.rs new file mode 100644 index 000000000..70400f3f9 --- /dev/null +++ b/ballista-python/src/lib.rs @@ -0,0 +1,48 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use mimalloc::MiMalloc; +use pyo3::prelude::*; + +mod ballista_context; +mod dataframe; +pub mod errors; +mod expression; +mod functions; +pub mod utils; + +#[global_allocator] +static GLOBAL: MiMalloc = MiMalloc; + +/// Low-level DataFusion internal package. +/// +/// The higher-level public API is defined in pure python files under the +/// datafusion directory. +#[pymodule] +fn _internal(py: Python, m: &PyModule) -> PyResult<()> { + // Register the python classes + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + + // Register the functions as a submodule + let funcs = PyModule::new(py, "functions")?; + functions::init_module(funcs)?; + m.add_submodule(funcs)?; + + Ok(()) +} diff --git a/ballista-python/src/utils.rs b/ballista-python/src/utils.rs new file mode 100644 index 000000000..795058bc9 --- /dev/null +++ b/ballista-python/src/utils.rs @@ -0,0 +1,30 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use pyo3::prelude::*; +use std::future::Future; +use tokio::runtime::Runtime; + +/// Utility to collect rust futures with GIL released +pub fn wait_for_future(py: Python, f: F) -> F::Output +where + F: Send, + F::Output: Send, +{ + let rt = Runtime::new().unwrap(); + py.allow_threads(|| rt.block_on(f)) +} From 29eed4a220cabfb556f2930542c47d80f9410a8d Mon Sep 17 00:00:00 2001 From: Remco Verhoef Date: Sun, 5 Jun 2022 09:40:05 +0200 Subject: [PATCH 2/5] fmt cargo toml --- ballista-python/Cargo.toml | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/ballista-python/Cargo.toml b/ballista-python/Cargo.toml index c9edb2d6f..0c3a42c3d 100644 --- a/ballista-python/Cargo.toml +++ b/ballista-python/Cargo.toml @@ -27,25 +27,25 @@ license = "Apache-2.0" edition = "2021" rust-version = "1.57" +[package.metadata.maturin] +name = "ballista._internal" + [dependencies] -tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync"] } -rand = "0.7" -pyo3 = { version = "~0.16.5", features = ["extension-module", "abi3", "abi3-py37"] } -datafusion = {git="https://github.com/apache/arrow-datafusion", rev = "585bc3a629b92ea7a86ebfe8bf762dbef4155710", features=["pyarrow"]} -datafusion-expr = {git="https://github.com/apache/arrow-datafusion", rev = "585bc3a629b92ea7a86ebfe8bf762dbef4155710"} -datafusion-common = {git="https://github.com/apache/arrow-datafusion", rev = "585bc3a629b92ea7a86ebfe8bf762dbef4155710"} -uuid = { version = "0.8", features = ["v4"] } +ballista = { git = "https://github.com/apache/arrow-ballista", rev = "9e78b8ecd55cc3e9d46387c87098c4e6625294eb" } +datafusion = { git = "https://github.com/apache/arrow-datafusion", rev = "585bc3a629b92ea7a86ebfe8bf762dbef4155710", features = ["pyarrow"] } +datafusion-common = { git = "https://github.com/apache/arrow-datafusion", rev = "585bc3a629b92ea7a86ebfe8bf762dbef4155710" } +datafusion-expr = { git = "https://github.com/apache/arrow-datafusion", rev = "585bc3a629b92ea7a86ebfe8bf762dbef4155710" } mimalloc = { version = "*", default-features = false } -ballista = { git = "https://github.com/apache/arrow-ballista", rev = "9e78b8ecd55cc3e9d46387c87098c4e6625294eb"} +pyo3 = { version = "~0.16.5", features = ["extension-module", "abi3", "abi3-py37"] } +rand = "0.7" +tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync"] } +uuid = { version = "0.8", features = ["v4"] } [lib] -name = "ballista_python" crate-type = ["cdylib", "rlib"] - -[package.metadata.maturin] -name = "ballista._internal" +name = "ballista_python" [profile.release] -lto = true codegen-units = 1 +lto = true From 959e033d36bcaff95efc930b60f349f6e47700ea Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 8 Jun 2022 10:20:30 -0600 Subject: [PATCH 3/5] Revert "remove python (#1518)" (#61) * Revert "remove python (#1518)" This reverts commit bac97fa04ad8e9def814507b845d64038687f4a2. * fix cargo.toml lint issues and exclude from workspace * use original datafusion dependency * fix build * lint --- .github/workflows/python_build.yml | 131 ++ .github/workflows/python_test.yaml | 62 + Cargo.toml | 2 +- python/.cargo/config | 22 + python/.dockerignore | 19 + python/.gitignore | 20 + python/CHANGELOG.md | 129 ++ python/Cargo.lock | 1464 +++++++++++++++++++ python/Cargo.toml | 46 + python/LICENSE.txt | 202 +++ python/README.md | 159 +- python/datafusion/__init__.py | 111 ++ python/datafusion/functions.py | 23 + python/datafusion/tests/__init__.py | 16 + python/datafusion/tests/generic.py | 87 ++ python/datafusion/tests/test_aggregation.py | 48 + python/datafusion/tests/test_catalog.py | 72 + python/datafusion/tests/test_context.py | 63 + python/datafusion/tests/test_dataframe.py | 181 +++ python/datafusion/tests/test_functions.py | 219 +++ python/datafusion/tests/test_imports.py | 65 + python/datafusion/tests/test_sql.py | 250 ++++ python/datafusion/tests/test_udaf.py | 135 ++ python/pyproject.toml | 55 + python/requirements-37.txt | 329 +++++ python/requirements.in | 27 + python/requirements.txt | 282 ++++ python/rust-toolchain | 1 + python/src/catalog.rs | 123 ++ python/src/context.rs | 173 +++ python/src/dataframe.rs | 130 ++ python/src/errors.rs | 57 + python/src/expression.rs | 147 ++ python/src/functions.rs | 343 +++++ python/src/lib.rs | 52 + python/src/udaf.rs | 153 ++ python/src/udf.rs | 98 ++ python/src/utils.rs | 50 + 38 files changed, 5543 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/python_build.yml create mode 100644 .github/workflows/python_test.yaml create mode 100644 python/.cargo/config create mode 100644 python/.dockerignore create mode 100644 python/.gitignore create mode 100644 python/CHANGELOG.md create mode 100644 python/Cargo.lock create mode 100644 python/Cargo.toml create mode 100644 python/LICENSE.txt create mode 100644 python/datafusion/__init__.py create mode 100644 python/datafusion/functions.py create mode 100644 python/datafusion/tests/__init__.py create mode 100644 python/datafusion/tests/generic.py create mode 100644 python/datafusion/tests/test_aggregation.py create mode 100644 python/datafusion/tests/test_catalog.py create mode 100644 python/datafusion/tests/test_context.py create mode 100644 python/datafusion/tests/test_dataframe.py create mode 100644 python/datafusion/tests/test_functions.py create mode 100644 python/datafusion/tests/test_imports.py create mode 100644 python/datafusion/tests/test_sql.py create mode 100644 python/datafusion/tests/test_udaf.py create mode 100644 python/pyproject.toml create mode 100644 python/requirements-37.txt create mode 100644 python/requirements.in create mode 100644 python/requirements.txt create mode 100644 python/rust-toolchain create mode 100644 python/src/catalog.rs create mode 100644 python/src/context.rs create mode 100644 python/src/dataframe.rs create mode 100644 python/src/errors.rs create mode 100644 python/src/expression.rs create mode 100644 python/src/functions.rs create mode 100644 python/src/lib.rs create mode 100644 python/src/udaf.rs create mode 100644 python/src/udf.rs create mode 100644 python/src/utils.rs diff --git a/.github/workflows/python_build.yml b/.github/workflows/python_build.yml new file mode 100644 index 000000000..6e54d1296 --- /dev/null +++ b/.github/workflows/python_build.yml @@ -0,0 +1,131 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: Python Release Build +on: + push: + tags: + - "*-rc*" + +defaults: + run: + working-directory: ./python + +jobs: + generate-license: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + - name: Generate license file + run: python ../dev/create_license.py + - uses: actions/upload-artifact@v2 + with: + name: python-wheel-license + path: python/LICENSE.txt + + build-python-mac-win: + needs: [generate-license] + name: Mac/Win + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + python-version: ["3.10"] + os: [macos-latest, windows-latest] + steps: + - uses: actions/checkout@v2 + + - uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - uses: actions-rs/toolchain@v1 + with: + toolchain: nightly-2021-10-23 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install maturin==0.11.5 + + - run: rm LICENSE.txt + - name: Download LICENSE.txt + uses: actions/download-artifact@v2 + with: + name: python-wheel-license + path: python + + - name: Build Python package + run: maturin build --release --no-sdist --strip + + - name: List Windows wheels + if: matrix.os == 'windows-latest' + run: dir target\wheels\ + + - name: List Mac wheels + if: matrix.os != 'windows-latest' + run: find target/wheels/ + + - name: Archive wheels + uses: actions/upload-artifact@v2 + with: + name: dist + path: python/target/wheels/* + + build-manylinux: + needs: [generate-license] + name: Manylinux + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - run: rm LICENSE.txt + - name: Download LICENSE.txt + uses: actions/download-artifact@v2 + with: + name: python-wheel-license + path: python + - run: cat LICENSE.txt + - name: Build wheels + run: | + export RUSTFLAGS='-C target-cpu=skylake' + docker run --rm -v $(pwd)/..:/io \ + --workdir /io/python \ + konstin2/maturin:v0.11.2 \ + build --release --manylinux 2010 + - name: Archive wheels + uses: actions/upload-artifact@v2 + with: + name: dist + path: python/target/wheels/* + + # NOTE: PyPI publish needs to be done manually for now after release passed the vote + # release: + # name: Publish in PyPI + # needs: [build-manylinux, build-python-mac-win] + # runs-on: ubuntu-latest + # steps: + # - uses: actions/download-artifact@v2 + # - name: Publish to PyPI + # uses: pypa/gh-action-pypi-publish@master + # with: + # user: __token__ + # password: ${{ secrets.pypi_password }} diff --git a/.github/workflows/python_test.yaml b/.github/workflows/python_test.yaml new file mode 100644 index 000000000..01a36af87 --- /dev/null +++ b/.github/workflows/python_test.yaml @@ -0,0 +1,62 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: Python test +on: [push, pull_request] + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Setup Rust toolchain + run: | + rustup toolchain install nightly-2021-10-23 + rustup default nightly-2021-10-23 + rustup component add rustfmt + - name: Cache Cargo + uses: actions/cache@v2 + with: + path: /home/runner/.cargo + key: cargo-maturin-cache- + - name: Cache Rust dependencies + uses: actions/cache@v2 + with: + path: /home/runner/target + key: target-maturin-cache- + - uses: actions/setup-python@v2 + with: + python-version: "3.10" + - name: Create Virtualenv + run: | + python -m venv venv + source venv/bin/activate + pip install -r python/requirements.txt + - name: Run Linters + run: | + source venv/bin/activate + flake8 python --ignore=E501 + black --line-length 79 --diff --check python + - name: Run tests + run: | + source venv/bin/activate + cd python + maturin develop + RUST_BACKTRACE=1 pytest -v . + env: + CARGO_HOME: "/home/runner/.cargo" + CARGO_TARGET_DIR: "/home/runner/target" diff --git a/Cargo.toml b/Cargo.toml index 78076d482..9b9e79324 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,7 +24,7 @@ members = [ "ballista/rust/scheduler", "examples", ] -exclude = ["ballista-cli"] +exclude = ["ballista-cli", "python"] # cargo build --profile release-lto [profile.release-lto] diff --git a/python/.cargo/config b/python/.cargo/config new file mode 100644 index 000000000..0b24f30cf --- /dev/null +++ b/python/.cargo/config @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[target.x86_64-apple-darwin] +rustflags = [ + "-C", "link-arg=-undefined", + "-C", "link-arg=dynamic_lookup", +] diff --git a/python/.dockerignore b/python/.dockerignore new file mode 100644 index 000000000..08c131c2e --- /dev/null +++ b/python/.dockerignore @@ -0,0 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +target +venv diff --git a/python/.gitignore b/python/.gitignore new file mode 100644 index 000000000..586db7c4a --- /dev/null +++ b/python/.gitignore @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +/target +venv +.venv diff --git a/python/CHANGELOG.md b/python/CHANGELOG.md new file mode 100644 index 000000000..a07cb003c --- /dev/null +++ b/python/CHANGELOG.md @@ -0,0 +1,129 @@ + + +# Changelog + +## [python-0.4.0](https://github.com/apache/arrow-datafusion/tree/python-0.4.0) (2021-11-13) + +[Full Changelog](https://github.com/apache/arrow-datafusion/compare/python-0.3.0...python-0.4.0) + +**Breaking changes:** + +- Add function volatility to Signature [\#1071](https://github.com/apache/arrow-datafusion/pull/1071) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([pjmore](https://github.com/pjmore)) +- Make TableProvider.scan\(\) and PhysicalPlanner::create\_physical\_plan\(\) async [\#1013](https://github.com/apache/arrow-datafusion/pull/1013) ([rdettai](https://github.com/rdettai)) +- Reorganize table providers by table format [\#1010](https://github.com/apache/arrow-datafusion/pull/1010) ([rdettai](https://github.com/rdettai)) + +**Implemented enhancements:** + +- Build abi3 wheels for python binding [\#921](https://github.com/apache/arrow-datafusion/issues/921) +- Release documentation for python binding [\#837](https://github.com/apache/arrow-datafusion/issues/837) +- use arrow 6.1.0 [\#1255](https://github.com/apache/arrow-datafusion/pull/1255) ([Jimexist](https://github.com/Jimexist)) +- python `lit` function to support bool and byte vec [\#1152](https://github.com/apache/arrow-datafusion/pull/1152) ([Jimexist](https://github.com/Jimexist)) +- add python binding for `approx_distinct` aggregate function [\#1134](https://github.com/apache/arrow-datafusion/pull/1134) ([Jimexist](https://github.com/Jimexist)) +- refactor datafusion python `lit` function to allow different types [\#1130](https://github.com/apache/arrow-datafusion/pull/1130) ([Jimexist](https://github.com/Jimexist)) +- \[python\] add digest python function [\#1127](https://github.com/apache/arrow-datafusion/pull/1127) ([Jimexist](https://github.com/Jimexist)) +- \[crypto\] add `blake3` algorithm to `digest` function [\#1086](https://github.com/apache/arrow-datafusion/pull/1086) ([Jimexist](https://github.com/Jimexist)) +- \[crypto\] add blake2b and blake2s functions [\#1081](https://github.com/apache/arrow-datafusion/pull/1081) ([Jimexist](https://github.com/Jimexist)) +- fix: fix joins on Float32/Float64 columns bug [\#1054](https://github.com/apache/arrow-datafusion/pull/1054) ([francis-du](https://github.com/francis-du)) +- Update DataFusion to arrow 6.0 [\#984](https://github.com/apache/arrow-datafusion/pull/984) ([alamb](https://github.com/alamb)) +- \[Python\] Add support to perform sql query on in-memory datasource. [\#981](https://github.com/apache/arrow-datafusion/pull/981) ([mmuru](https://github.com/mmuru)) +- \[Python\] - Support show function for DataFrame api of python library [\#942](https://github.com/apache/arrow-datafusion/pull/942) ([francis-du](https://github.com/francis-du)) +- Rework the python bindings using conversion traits from arrow-rs [\#873](https://github.com/apache/arrow-datafusion/pull/873) ([kszucs](https://github.com/kszucs)) + +**Fixed bugs:** + +- Error in `python test` check / maturn python build: `function or associated item not found in `proc_macro::Literal` [\#961](https://github.com/apache/arrow-datafusion/issues/961) +- Use UUID to create unique table names in python binding [\#1111](https://github.com/apache/arrow-datafusion/pull/1111) ([hippowdon](https://github.com/hippowdon)) +- python: fix generated table name in dataframe creation [\#1078](https://github.com/apache/arrow-datafusion/pull/1078) ([houqp](https://github.com/houqp)) +- fix: joins on Timestamp columns [\#1055](https://github.com/apache/arrow-datafusion/pull/1055) ([francis-du](https://github.com/francis-du)) +- register datafusion.functions as a python package [\#995](https://github.com/apache/arrow-datafusion/pull/995) ([houqp](https://github.com/houqp)) + +**Documentation updates:** + +- python: update docs to use new APIs [\#1287](https://github.com/apache/arrow-datafusion/pull/1287) ([houqp](https://github.com/houqp)) +- Fix typo on Python functions [\#1207](https://github.com/apache/arrow-datafusion/pull/1207) ([j-a-m-l](https://github.com/j-a-m-l)) +- fix deadlink in python/readme [\#1002](https://github.com/apache/arrow-datafusion/pull/1002) ([waynexia](https://github.com/waynexia)) + +**Performance improvements:** + +- optimize build profile for datafusion python binding, cli and ballista [\#1137](https://github.com/apache/arrow-datafusion/pull/1137) ([houqp](https://github.com/houqp)) + +**Closed issues:** + +- InList expr with NULL literals do not work [\#1190](https://github.com/apache/arrow-datafusion/issues/1190) +- update the homepage README to include values, `approx_distinct`, etc. [\#1171](https://github.com/apache/arrow-datafusion/issues/1171) +- \[Python\]: Inconsistencies with Python package name [\#1011](https://github.com/apache/arrow-datafusion/issues/1011) +- Wanting to contribute to project where to start? [\#983](https://github.com/apache/arrow-datafusion/issues/983) +- delete redundant code [\#973](https://github.com/apache/arrow-datafusion/issues/973) +- \[Python\]: register custom datasource [\#906](https://github.com/apache/arrow-datafusion/issues/906) +- How to build DataFusion python wheel [\#853](https://github.com/apache/arrow-datafusion/issues/853) +- Produce a design for a metrics framework [\#21](https://github.com/apache/arrow-datafusion/issues/21) + + +For older versions, see [apache/arrow/CHANGELOG.md](https://github.com/apache/arrow/blob/master/CHANGELOG.md) + +## [python-0.3.0](https://github.com/apache/arrow-datafusion/tree/python-0.3.0) (2021-08-10) + +[Full Changelog](https://github.com/apache/arrow-datafusion/compare/4.0.0...python-0.3.0) + +**Implemented enhancements:** + +- add more math functions and unit tests to `python` crate [\#748](https://github.com/apache/arrow-datafusion/pull/748) ([Jimexist](https://github.com/Jimexist)) +- Expose ExecutionContext.register\_csv to the python bindings [\#524](https://github.com/apache/arrow-datafusion/pull/524) ([kszucs](https://github.com/kszucs)) +- Implement missing join types for Python dataframe [\#503](https://github.com/apache/arrow-datafusion/pull/503) ([Dandandan](https://github.com/Dandandan)) +- Add missing functions to python [\#388](https://github.com/apache/arrow-datafusion/pull/388) ([jgoday](https://github.com/jgoday)) + +**Fixed bugs:** + +- fix maturin version in pyproject.toml [\#756](https://github.com/apache/arrow-datafusion/pull/756) ([Jimexist](https://github.com/Jimexist)) +- fix pyarrow type id mapping in `python` crate [\#742](https://github.com/apache/arrow-datafusion/pull/742) ([Jimexist](https://github.com/Jimexist)) + +**Closed issues:** + +- Confirm git tagging strategy for releases [\#770](https://github.com/apache/arrow-datafusion/issues/770) +- arrow::util::pretty::pretty\_format\_batches missing [\#769](https://github.com/apache/arrow-datafusion/issues/769) +- move the `assert_batches_eq!` macros to a non part of datafusion [\#745](https://github.com/apache/arrow-datafusion/issues/745) +- fix an issue where aliases are not respected in generating downstream schemas in window expr [\#592](https://github.com/apache/arrow-datafusion/issues/592) +- make the planner to print more succinct and useful information in window function explain clause [\#526](https://github.com/apache/arrow-datafusion/issues/526) +- move window frame module to be in `logical_plan` [\#517](https://github.com/apache/arrow-datafusion/issues/517) +- use a more rust idiomatic way of handling nth\_value [\#448](https://github.com/apache/arrow-datafusion/issues/448) +- create a test with more than one partition for window functions [\#435](https://github.com/apache/arrow-datafusion/issues/435) +- Implement hash-partitioned hash aggregate [\#27](https://github.com/apache/arrow-datafusion/issues/27) +- Consider using GitHub pages for DataFusion/Ballista documentation [\#18](https://github.com/apache/arrow-datafusion/issues/18) +- Update "repository" in Cargo.toml [\#16](https://github.com/apache/arrow-datafusion/issues/16) + +**Merged pull requests:** + +- fix python binding for `concat`, `concat_ws`, and `random` [\#768](https://github.com/apache/arrow-datafusion/pull/768) ([Jimexist](https://github.com/Jimexist)) +- fix 226, make `concat`, `concat_ws`, and `random` work with `Python` crate [\#761](https://github.com/apache/arrow-datafusion/pull/761) ([Jimexist](https://github.com/Jimexist)) +- fix python crate with the changes to logical plan builder [\#650](https://github.com/apache/arrow-datafusion/pull/650) ([Jimexist](https://github.com/Jimexist)) +- use nightly nightly-2021-05-10 [\#536](https://github.com/apache/arrow-datafusion/pull/536) ([Jimexist](https://github.com/Jimexist)) +- Define the unittests using pytest [\#493](https://github.com/apache/arrow-datafusion/pull/493) ([kszucs](https://github.com/kszucs)) +- use requirements.txt to formalize python deps [\#484](https://github.com/apache/arrow-datafusion/pull/484) ([Jimexist](https://github.com/Jimexist)) +- update cargo.toml in python crate and fix unit test due to hash joins [\#483](https://github.com/apache/arrow-datafusion/pull/483) ([Jimexist](https://github.com/Jimexist)) +- simplify python function definitions [\#477](https://github.com/apache/arrow-datafusion/pull/477) ([Jimexist](https://github.com/Jimexist)) +- Expose DataFrame::sort in the python bindings [\#469](https://github.com/apache/arrow-datafusion/pull/469) ([kszucs](https://github.com/kszucs)) +- Revert "Revert "Add datafusion-python \(\#69\)" \(\#257\)" [\#270](https://github.com/apache/arrow-datafusion/pull/270) ([andygrove](https://github.com/andygrove)) +- Revert "Add datafusion-python \(\#69\)" [\#257](https://github.com/apache/arrow-datafusion/pull/257) ([andygrove](https://github.com/andygrove)) +- update arrow-rs deps to latest master [\#216](https://github.com/apache/arrow-datafusion/pull/216) ([alamb](https://github.com/alamb)) +- Add datafusion-python [\#69](https://github.com/apache/arrow-datafusion/pull/69) ([jorgecarleitao](https://github.com/jorgecarleitao)) + + + +\* *This Changelog was automatically generated by [github_changelog_generator](https://github.com/github-changelog-generator/github-changelog-generator)* diff --git a/python/Cargo.lock b/python/Cargo.lock new file mode 100644 index 000000000..74c6e9000 --- /dev/null +++ b/python/Cargo.lock @@ -0,0 +1,1464 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "ahash" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" +dependencies = [ + "getrandom 0.2.3", + "once_cell", + "version_check", +] + +[[package]] +name = "aho-corasick" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" +dependencies = [ + "memchr", +] + +[[package]] +name = "alloc-no-stdlib" +version = "2.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35ef4730490ad1c4eae5c4325b2a95f521d023e5c885853ff7aca0a6a1631db3" + +[[package]] +name = "alloc-stdlib" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "697ed7edc0f1711de49ce108c541623a0af97c6c60b2f6e2b65229847ac843c2" +dependencies = [ + "alloc-no-stdlib", +] + +[[package]] +name = "arrayref" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544" + +[[package]] +name = "arrayvec" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4dc07131ffa69b8072d35f5007352af944213cde02545e2103680baed38fcd" + +[[package]] +name = "arrow" +version = "6.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "216c6846a292bdd93c2b93c1baab58c32ff50e2ab5e8d50db333ab518535dd8b" +dependencies = [ + "bitflags", + "chrono", + "comfy-table", + "csv", + "flatbuffers", + "hex", + "indexmap", + "lazy_static", + "lexical-core", + "multiversion", + "num", + "pyo3", + "rand 0.8.4", + "regex", + "serde", + "serde_derive", + "serde_json", +] + +[[package]] +name = "async-trait" +version = "0.1.51" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44318e776df68115a881de9a8fd1b9e53368d7a4a5ce4cc48517da3393233a5e" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "autocfg" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" + +[[package]] +name = "base64" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "blake2" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a4e37d16930f5459780f5621038b6382b9bb37c19016f39fb6b5808d831f174" +dependencies = [ + "crypto-mac", + "digest", + "opaque-debug", +] + +[[package]] +name = "blake3" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2607a74355ce2e252d0c483b2d8a348e1bba36036e786ccc2dcd777213c86ffd" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq", + "digest", +] + +[[package]] +name = "block-buffer" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4152116fd6e9dadb291ae18fc1ec3575ed6d84c29642d97890f4b4a3417297e4" +dependencies = [ + "generic-array", +] + +[[package]] +name = "brotli" +version = "3.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71cb90ade945043d3d53597b2fc359bb063db8ade2bcffe7997351d0756e9d50" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", + "brotli-decompressor", +] + +[[package]] +name = "brotli-decompressor" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59ad2d4653bf5ca36ae797b1f4bb4dbddb60ce49ca4aed8a2ce4829f60425b80" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", +] + +[[package]] +name = "bstr" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" +dependencies = [ + "lazy_static", + "memchr", + "regex-automata", + "serde", +] + +[[package]] +name = "byteorder" +version = "1.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" + +[[package]] +name = "cc" +version = "1.0.71" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79c2681d6594606957bbb8631c4b90a7fcaaa72cdb714743a437b156d6a7eedd" +dependencies = [ + "jobserver", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "chrono" +version = "0.4.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73" +dependencies = [ + "libc", + "num-integer", + "num-traits", + "time", + "winapi", +] + +[[package]] +name = "comfy-table" +version = "5.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b103d85ca6e209388771bfb7aa6b68a7aeec4afbf6f0a0264bfbf50360e5212e" +dependencies = [ + "strum", + "strum_macros", + "unicode-width", +] + +[[package]] +name = "constant_time_eq" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc" + +[[package]] +name = "cpufeatures" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95059428f66df56b63431fdb4e1947ed2190586af5c5a8a8b71122bdf5a7f469" +dependencies = [ + "libc", +] + +[[package]] +name = "crc32fast" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81156fece84ab6a9f2afdb109ce3ae577e42b1228441eded99bd77f627953b1a" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crypto-mac" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b584a330336237c1eecd3e94266efb216c56ed91225d634cb2991c5f3fd1aeab" +dependencies = [ + "generic-array", + "subtle", +] + +[[package]] +name = "csv" +version = "1.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1" +dependencies = [ + "bstr", + "csv-core", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" +dependencies = [ + "memchr", +] + +[[package]] +name = "datafusion" +version = "6.0.0" +source = "git+https://github.com/apache/arrow-datafusion?rev=7607ace992a5a42840bf546221a8635e70e10885#7607ace992a5a42840bf546221a8635e70e10885" +dependencies = [ + "ahash", + "arrow", + "async-trait", + "blake2", + "blake3", + "chrono", + "futures", + "hashbrown", + "lazy_static", + "log", + "md-5", + "num_cpus", + "ordered-float 2.8.0", + "parquet", + "paste 1.0.5", + "pin-project-lite", + "pyo3", + "rand 0.8.4", + "regex", + "sha2", + "smallvec", + "sqlparser", + "tokio", + "tokio-stream", + "unicode-segmentation", +] + +[[package]] +name = "datafusion-python" +version = "0.4.0" +dependencies = [ + "datafusion", + "pyo3", + "rand 0.7.3", + "tokio", + "uuid", +] + +[[package]] +name = "digest" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3dd60d1080a57a05ab032377049e0591415d2b31afd7028356dbf3cc6dcb066" +dependencies = [ + "generic-array", +] + +[[package]] +name = "flatbuffers" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef4c5738bcd7fad10315029c50026f83c9da5e4a21f8ed66826f43e0e2bde5f6" +dependencies = [ + "bitflags", + "smallvec", + "thiserror", +] + +[[package]] +name = "flate2" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e6988e897c1c9c485f43b47a529cef42fde0547f9d8d41a7062518f1d8fc53f" +dependencies = [ + "cfg-if", + "crc32fast", + "libc", + "miniz_oxide", +] + +[[package]] +name = "futures" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a12aa0eb539080d55c3f2d45a67c3b58b6b0773c1a3ca2dfec66d58c97fd66ca" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5da6ba8c3bb3c165d3c7319fc1cc8304facf1fb8db99c5de877183c08a273888" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88d1c26957f23603395cd326b0ffe64124b818f4449552f960d815cfba83a53d" + +[[package]] +name = "futures-executor" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45025be030969d763025784f7f355043dc6bc74093e4ecc5000ca4dc50d8745c" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "522de2a0fe3e380f1bc577ba0474108faf3f6b18321dbf60b3b9c39a75073377" + +[[package]] +name = "futures-macro" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18e4a4b95cea4b4ccbcf1c5675ca7c4ee4e9e75eb79944d07defde18068f79bb" +dependencies = [ + "autocfg", + "proc-macro-hack", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "futures-sink" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36ea153c13024fe480590b3e3d4cad89a0cfacecc24577b68f86c6ced9c2bc11" + +[[package]] +name = "futures-task" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d3d00f4eddb73e498a54394f228cd55853bdf059259e8e7bc6e69d408892e99" + +[[package]] +name = "futures-util" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36568465210a3a6ee45e1f165136d68671471a501e632e9a98d96872222b5481" +dependencies = [ + "autocfg", + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "proc-macro-hack", + "proc-macro-nested", + "slab", +] + +[[package]] +name = "generic-array" +version = "0.14.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "501466ecc8a30d1d3b7fc9229b122b2ce8ed6e9d9223f1138d4babb253e51817" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.9.0+wasi-snapshot-preview1", +] + +[[package]] +name = "getrandom" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.10.2+wasi-snapshot-preview1", +] + +[[package]] +name = "hashbrown" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" +dependencies = [ + "ahash", +] + +[[package]] +name = "heck" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" +dependencies = [ + "unicode-segmentation", +] + +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "indexmap" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc633605454125dec4b66843673f01c7df2b89479b32e0ed634e43a91cff62a5" +dependencies = [ + "autocfg", + "hashbrown", +] + +[[package]] +name = "indoc" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47741a8bc60fb26eb8d6e0238bbb26d8575ff623fdc97b1a2c00c050b9684ed8" +dependencies = [ + "indoc-impl", + "proc-macro-hack", +] + +[[package]] +name = "indoc-impl" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce046d161f000fffde5f432a0d034d0341dc152643b2598ed5bfce44c4f3a8f0" +dependencies = [ + "proc-macro-hack", + "proc-macro2", + "quote", + "syn", + "unindent", +] + +[[package]] +name = "instant" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "integer-encoding" +version = "1.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48dc51180a9b377fd75814d0cc02199c20f8e99433d6762f650d39cdbbd3b56f" + +[[package]] +name = "itoa" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" + +[[package]] +name = "jobserver" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af25a77299a7f711a01975c35a6a424eb6862092cc2d6c72c4ed6cbc56dfc1fa" +dependencies = [ + "libc", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "lexical-core" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a3926d8f156019890be4abe5fd3785e0cff1001e06f59c597641fd513a5a284" +dependencies = [ + "lexical-parse-float", + "lexical-parse-integer", + "lexical-util", + "lexical-write-float", + "lexical-write-integer", +] + +[[package]] +name = "lexical-parse-float" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4d066d004fa762d9da995ed21aa8845bb9f6e4265f540d716fb4b315197bf0e" +dependencies = [ + "lexical-parse-integer", + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-parse-integer" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2c92badda8cc0fc4f3d3cc1c30aaefafb830510c8781ce4e8669881f3ed53ac" +dependencies = [ + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-util" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ff669ccaae16ee33af90dc51125755efed17f1309626ba5c12052512b11e291" +dependencies = [ + "static_assertions", +] + +[[package]] +name = "lexical-write-float" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b5186948c7b297abaaa51560f2581dae625e5ce7dfc2d8fdc56345adb6dc576" +dependencies = [ + "lexical-util", + "lexical-write-integer", + "static_assertions", +] + +[[package]] +name = "lexical-write-integer" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ece956492e0e40fd95ef8658a34d53a3b8c2015762fdcaaff2167b28de1f56ef" +dependencies = [ + "lexical-util", + "static_assertions", +] + +[[package]] +name = "libc" +version = "0.2.105" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "869d572136620d55835903746bcb5cdc54cb2851fd0aeec53220b4bb65ef3013" + +[[package]] +name = "lock_api" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712a4d093c9976e24e7dbca41db895dabcbac38eb5f4045393d17a95bdfb1109" +dependencies = [ + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "lz4" +version = "1.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aac20ed6991e01bf6a2e68cc73df2b389707403662a8ba89f68511fb340f724c" +dependencies = [ + "libc", + "lz4-sys", +] + +[[package]] +name = "lz4-sys" +version = "1.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dca79aa95d8b3226213ad454d328369853be3a1382d89532a854f4d69640acae" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "md-5" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b5a279bb9607f9f53c22d496eade00d138d1bdcccd07d74650387cf94942a15" +dependencies = [ + "block-buffer", + "digest", + "opaque-debug", +] + +[[package]] +name = "memchr" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" + +[[package]] +name = "miniz_oxide" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a92518e98c078586bc6c934028adcca4c92a53d6a958196de835170a01d84e4b" +dependencies = [ + "adler", + "autocfg", +] + +[[package]] +name = "multiversion" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "025c962a3dd3cc5e0e520aa9c612201d127dcdf28616974961a649dca64f5373" +dependencies = [ + "multiversion-macros", +] + +[[package]] +name = "multiversion-macros" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8a3e2bde382ebf960c1f3e79689fa5941625fe9bf694a1cb64af3e85faff3af" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "num" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43db66d1170d347f9a065114077f7dccb00c1b9478c89384490a3425279a4606" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74e768dff5fb39a41b3bcd30bb25cf989706c90d028d1ad71971987aa309d535" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-complex" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26873667bbbb7c5182d4a37c1add32cdf09f841af72da53318fdb81543c15085" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db" +dependencies = [ + "autocfg", + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2021c8337a54d21aca0d59a92577a029af9431cb59b909b03252b9c164fad59" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d41702bd167c2df5520b384281bc111a4b5efcf7fbc4c9c222c815b07e0a6a6a" +dependencies = [ + "autocfg", + "num-bigint", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290" +dependencies = [ + "autocfg", +] + +[[package]] +name = "num_cpus" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "once_cell" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "692fcb63b64b1758029e0a96ee63e049ce8c5948587f2f7208df04625e5f6b56" + +[[package]] +name = "opaque-debug" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5" + +[[package]] +name = "ordered-float" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3305af35278dd29f46fcdd139e0b1fbfae2153f0e5928b39b035542dd31e37b7" +dependencies = [ + "num-traits", +] + +[[package]] +name = "ordered-float" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97c9d06878b3a851e8026ef94bf7fef9ba93062cd412601da4d9cf369b1cc62d" +dependencies = [ + "num-traits", +] + +[[package]] +name = "parking_lot" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" +dependencies = [ + "instant", + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d76e8e1493bcac0d2766c42737f34458f1c8c50c0d23bcb24ea953affb273216" +dependencies = [ + "cfg-if", + "instant", + "libc", + "redox_syscall", + "smallvec", + "winapi", +] + +[[package]] +name = "parquet" +version = "6.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "788d9953f4cfbe9db1beff7bebd54299d105e34680d78b82b1ddc85d432cac9d" +dependencies = [ + "arrow", + "base64", + "brotli", + "byteorder", + "chrono", + "flate2", + "lz4", + "num-bigint", + "parquet-format", + "rand 0.8.4", + "snap", + "thrift", + "zstd", +] + +[[package]] +name = "parquet-format" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5bc6b23543b5dedc8f6cce50758a35e5582e148e0cfa26bd0cacd569cda5b71" +dependencies = [ + "thrift", +] + +[[package]] +name = "paste" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45ca20c77d80be666aef2b45486da86238fabe33e38306bd3118fe4af33fa880" +dependencies = [ + "paste-impl", + "proc-macro-hack", +] + +[[package]] +name = "paste" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acbf547ad0c65e31259204bd90935776d1c693cec2f4ff7abb7a1bbbd40dfe58" + +[[package]] +name = "paste-impl" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d95a7db200b97ef370c8e6de0088252f7e0dfff7d047a28528e47456c0fc98b6" +dependencies = [ + "proc-macro-hack", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d31d11c69a6b52a174b42bdc0c30e5e11670f90788b2c471c31c1d17d449443" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "ppv-lite86" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed0cfbc8191465bed66e1718596ee0b0b35d5ee1f41c5df2189d0fe8bde535ba" + +[[package]] +name = "proc-macro-hack" +version = "0.5.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" + +[[package]] +name = "proc-macro-nested" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc881b2c22681370c6a780e47af9840ef841837bc98118431d4e1868bd0c1086" + +[[package]] +name = "proc-macro2" +version = "1.0.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edc3358ebc67bc8b7fa0c007f945b0b18226f78437d61bec735a9eb96b61ee70" +dependencies = [ + "unicode-xid", +] + +[[package]] +name = "pyo3" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35100f9347670a566a67aa623369293703322bb9db77d99d7df7313b575ae0c8" +dependencies = [ + "cfg-if", + "indoc", + "libc", + "parking_lot", + "paste 0.1.18", + "pyo3-build-config", + "pyo3-macros", + "unindent", +] + +[[package]] +name = "pyo3-build-config" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d12961738cacbd7f91b7c43bc25cfeeaa2698ad07a04b3be0aa88b950865738f" +dependencies = [ + "once_cell", +] + +[[package]] +name = "pyo3-macros" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0bc5215d704824dfddddc03f93cb572e1155c68b6761c37005e1c288808ea8" +dependencies = [ + "pyo3-macros-backend", + "quote", + "syn", +] + +[[package]] +name = "pyo3-macros-backend" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71623fc593224afaab918aa3afcaf86ed2f43d34f6afde7f3922608f253240df" +dependencies = [ + "proc-macro2", + "pyo3-build-config", + "quote", + "syn", +] + +[[package]] +name = "quote" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38bc8cc6a5f2e3655e0899c1b848643b2562f853f114bfec7be120678e3ace05" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" +dependencies = [ + "getrandom 0.1.16", + "libc", + "rand_chacha 0.2.2", + "rand_core 0.5.1", + "rand_hc 0.2.0", +] + +[[package]] +name = "rand" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e7573632e6454cf6b99d7aac4ccca54be06da05aca2ef7423d22d27d4d4bcd8" +dependencies = [ + "libc", + "rand_chacha 0.3.1", + "rand_core 0.6.3", + "rand_hc 0.3.1", +] + +[[package]] +name = "rand_chacha" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" +dependencies = [ + "ppv-lite86", + "rand_core 0.5.1", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core 0.6.3", +] + +[[package]] +name = "rand_core" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" +dependencies = [ + "getrandom 0.1.16", +] + +[[package]] +name = "rand_core" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" +dependencies = [ + "getrandom 0.2.3", +] + +[[package]] +name = "rand_hc" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" +dependencies = [ + "rand_core 0.5.1", +] + +[[package]] +name = "rand_hc" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d51e9f596de227fda2ea6c84607f5558e196eeaf43c986b724ba4fb8fdf497e7" +dependencies = [ + "rand_core 0.6.3", +] + +[[package]] +name = "redox_syscall" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8383f39639269cde97d255a32bdb68c047337295414940c68bdd30c2e13203ff" +dependencies = [ + "bitflags", +] + +[[package]] +name = "regex" +version = "1.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" + +[[package]] +name = "regex-syntax" +version = "0.6.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" + +[[package]] +name = "rustversion" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2cc38e8fa666e2de3c4aba7edeb5ffc5246c1c2ed0e3d17e560aeeba736b23f" + +[[package]] +name = "ryu" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" + +[[package]] +name = "scopeguard" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" + +[[package]] +name = "serde" +version = "1.0.130" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f12d06de37cf59146fbdecab66aa99f9fe4f78722e3607577a5375d66bd0c913" + +[[package]] +name = "serde_derive" +version = "1.0.130" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7bc1a1ab1961464eae040d96713baa5a724a8152c1222492465b54322ec508b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.68" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f690853975602e1bfe1ccbf50504d67174e3bcf340f23b5ea9992e0587a52d8" +dependencies = [ + "indexmap", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "sha2" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b69f9a4c9740d74c5baa3fd2e547f9525fa8088a8a958e0ca2409a514e33f5fa" +dependencies = [ + "block-buffer", + "cfg-if", + "cpufeatures", + "digest", + "opaque-debug", +] + +[[package]] +name = "slab" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9def91fd1e018fe007022791f865d0ccc9b3a0d5001e01aabb8b40e46000afb5" + +[[package]] +name = "smallvec" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ecab6c735a6bb4139c0caafd0cc3635748bbb3acf4550e8138122099251f309" + +[[package]] +name = "snap" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45456094d1983e2ee2a18fdfebce3189fa451699d0502cb8e3b49dba5ba41451" + +[[package]] +name = "sqlparser" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9907f54bd0f7b6ce72c2be1e570a614819ee08e3deb66d90480df341d8a12a8" +dependencies = [ + "log", +] + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "strum" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cae14b91c7d11c9a851d3fbc80a963198998c2a64eec840477fa92d8ce9b70bb" + +[[package]] +name = "strum_macros" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5bb0dc7ee9c15cea6199cde9a127fa16a4c5819af85395457ad72d68edc85a38" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "rustversion", + "syn", +] + +[[package]] +name = "subtle" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601" + +[[package]] +name = "syn" +version = "1.0.80" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d010a1623fbd906d51d650a9916aaefc05ffa0e4053ff7fe601167f3e715d194" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + +[[package]] +name = "thiserror" +version = "1.0.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "854babe52e4df1653706b98fcfc05843010039b406875930a70e4d9644e5c417" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa32fd3f627f367fe16f893e2597ae3c05020f8bba2666a4e6ea73d377e5714b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "threadpool" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d050e60b33d41c19108b32cea32164033a9013fe3b46cbd4457559bfbf77afaa" +dependencies = [ + "num_cpus", +] + +[[package]] +name = "thrift" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c6d965454947cc7266d22716ebfd07b18d84ebaf35eec558586bbb2a8cb6b5b" +dependencies = [ + "byteorder", + "integer-encoding", + "log", + "ordered-float 1.1.1", + "threadpool", +] + +[[package]] +name = "time" +version = "0.1.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca8a50ef2360fbd1eeb0ecd46795a87a19024eb4b53c5dc916ca1fd95fe62438" +dependencies = [ + "libc", + "winapi", +] + +[[package]] +name = "tokio" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2c2416fdedca8443ae44b4527de1ea633af61d8f7169ffa6e72c5b53d24efcc" +dependencies = [ + "autocfg", + "num_cpus", + "pin-project-lite", + "tokio-macros", +] + +[[package]] +name = "tokio-macros" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2dd85aeaba7b68df939bd357c6afb36c87951be9e80bf9c859f2fc3e9fca0fd" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tokio-stream" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b2f3f698253f03119ac0102beaa64f67a67e08074d03a22d18784104543727f" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "typenum" +version = "1.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b63708a265f51345575b27fe43f9500ad611579e764c79edbc2037b1121959ec" + +[[package]] +name = "unicode-segmentation" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8895849a949e7845e06bd6dc1aa51731a103c42707010a5b591c0038fb73385b" + +[[package]] +name = "unicode-width" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973" + +[[package]] +name = "unicode-xid" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" + +[[package]] +name = "unindent" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f14ee04d9415b52b3aeab06258a3f07093182b88ba0f9b8d203f211a7a7d41c7" + +[[package]] +name = "uuid" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7" +dependencies = [ + "getrandom 0.2.3", +] + +[[package]] +name = "version_check" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe" + +[[package]] +name = "wasi" +version = "0.9.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" + +[[package]] +name = "wasi" +version = "0.10.2+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "zstd" +version = "0.9.0+zstd.1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07749a5dc2cb6b36661290245e350f15ec3bbb304e493db54a1d354480522ccd" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "4.1.1+zstd.1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c91c90f2c593b003603e5e0493c837088df4469da25aafff8bce42ba48caf079" +dependencies = [ + "libc", + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "1.6.1+zstd.1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "615120c7a2431d16cf1cf979e7fc31ba7a5b5e5707b29c8a99e5dbf8a8392a33" +dependencies = [ + "cc", + "libc", +] diff --git a/python/Cargo.toml b/python/Cargo.toml new file mode 100644 index 000000000..a71ce3c49 --- /dev/null +++ b/python/Cargo.toml @@ -0,0 +1,46 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[package] +name = "datafusion-python" +version = "0.4.0" +homepage = "https://github.com/apache/arrow" +repository = "https://github.com/apache/arrow" +authors = ["Apache Arrow "] +description = "Build and run queries against data" +readme = "README.md" +license = "Apache-2.0" +edition = "2021" +rust-version = "1.57" + +[package.metadata.maturin] +name = "datafusion._internal" + +[dependencies] +datafusion = { git = "https://github.com/apache/arrow-datafusion", rev = "7607ace992a5a42840bf546221a8635e70e10885", features = ["pyarrow"] } +pyo3 = { version = "0.14", features = ["extension-module", "abi3", "abi3-py36"] } +rand = "0.7" +tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync"] } +uuid = { version = "0.8", features = ["v4"] } + +[lib] +crate-type = ["cdylib"] +name = "_internal" + +[profile.release] +codegen-units = 1 +lto = true diff --git a/python/LICENSE.txt b/python/LICENSE.txt new file mode 100644 index 000000000..d64569567 --- /dev/null +++ b/python/LICENSE.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/python/README.md b/python/README.md index b3a2a3061..5979803dc 100644 --- a/python/README.md +++ b/python/README.md @@ -17,6 +17,161 @@ under the License. --> -# DataFusion in Python +## DataFusion in Python -This directory is now moved to its [dedicated repository](https://github.com/datafusion-contrib/datafusion-python). +This is a Python library that binds to [Apache Arrow](https://arrow.apache.org/) in-memory query engine [DataFusion](https://github.com/apache/arrow-datafusion). + +Like pyspark, it allows you to build a plan through SQL or a DataFrame API against in-memory data, parquet or CSV files, run it in a multi-threaded environment, and obtain the result back in Python. + +It also allows you to use UDFs and UDAFs for complex operations. + +The major advantage of this library over other execution engines is that this library achieves zero-copy between Python and its execution engine: there is no cost in using UDFs, UDAFs, and collecting the results to Python apart from having to lock the GIL when running those operations. + +Its query engine, DataFusion, is written in [Rust](https://www.rust-lang.org/), which makes strong assumptions about thread safety and lack of memory leaks. + +Technically, zero-copy is achieved via the [c data interface](https://arrow.apache.org/docs/format/CDataInterface.html). + +## How to use it + +Simple usage: + +```python +import datafusion +import pyarrow + +# an alias +f = datafusion.functions + +# create a context +ctx = datafusion.ExecutionContext() + +# create a RecordBatch and a new DataFrame from it +batch = pyarrow.RecordBatch.from_arrays( + [pyarrow.array([1, 2, 3]), pyarrow.array([4, 5, 6])], + names=["a", "b"], +) +df = ctx.create_dataframe([[batch]]) + +# create a new statement +df = df.select( + f.col("a") + f.col("b"), + f.col("a") - f.col("b"), +) + +# execute and collect the first (and only) batch +result = df.collect()[0] + +assert result.column(0) == pyarrow.array([5, 7, 9]) +assert result.column(1) == pyarrow.array([-3, -3, -3]) +``` + +### UDFs + +```python +def is_null(array: pyarrow.Array) -> pyarrow.Array: + return array.is_null() + +udf = f.udf(is_null, [pyarrow.int64()], pyarrow.bool_()) + +df = df.select(udf(f.col("a"))) +``` + +### UDAF + +```python +import pyarrow +import pyarrow.compute + + +class Accumulator: + """ + Interface of a user-defined accumulation. + """ + def __init__(self): + self._sum = pyarrow.scalar(0.0) + + def to_scalars(self) -> [pyarrow.Scalar]: + return [self._sum] + + def update(self, values: pyarrow.Array) -> None: + # not nice since pyarrow scalars can't be summed yet. This breaks on `None` + self._sum = pyarrow.scalar(self._sum.as_py() + pyarrow.compute.sum(values).as_py()) + + def merge(self, states: pyarrow.Array) -> None: + # not nice since pyarrow scalars can't be summed yet. This breaks on `None` + self._sum = pyarrow.scalar(self._sum.as_py() + pyarrow.compute.sum(states).as_py()) + + def evaluate(self) -> pyarrow.Scalar: + return self._sum + + +df = ... + +udaf = f.udaf(Accumulator, pyarrow.float64(), pyarrow.float64(), [pyarrow.float64()]) + +df = df.aggregate( + [], + [udaf(f.col("a"))] +) +``` + +## How to install (from pip) + +```bash +pip install datafusion +# or +python -m pip install datafusion +``` + +## How to develop + +This assumes that you have rust and cargo installed. We use the workflow recommended by [pyo3](https://github.com/PyO3/pyo3) and [maturin](https://github.com/PyO3/maturin). + +Bootstrap: + +```bash +# fetch this repo +git clone git@github.com:apache/arrow-datafusion.git +# change to python directory +cd arrow-datafusion/python +# prepare development environment (used to build wheel / install in development) +python3 -m venv venv +# activate the venv +source venv/bin/activate +# update pip itself if necessary +python -m pip install -U pip +# if python -V gives python 3.7 +python -m pip install -r requirements-37.txt +# if python -V gives python 3.8/3.9/3.10 +python -m pip install -r requirements.txt +``` + +Whenever rust code changes (your changes or via `git pull`): + +```bash +# make sure you activate the venv using "source venv/bin/activate" first +maturin develop +python -m pytest +``` + +## How to update dependencies + +To change test dependencies, change the `requirements.in` and run + +```bash +# install pip-tools (this can be done only once), also consider running in venv +python -m pip install pip-tools + +# change requirements.in and then run +python -m piptools compile --generate-hashes -o requirements-37.txt +# or run this is you are on python 3.8/3.9/3.10 +python -m piptools compile --generate-hashes -o requirements.txt +``` + +To update dependencies, run with `-U` + +```bash +python -m piptools compile -U --generate-hashes -o requirements-310.txt +``` + +More details [here](https://github.com/jazzband/pip-tools) diff --git a/python/datafusion/__init__.py b/python/datafusion/__init__.py new file mode 100644 index 000000000..0a25592f8 --- /dev/null +++ b/python/datafusion/__init__.py @@ -0,0 +1,111 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from abc import ABCMeta, abstractmethod +from typing import List + +import pyarrow as pa + +from ._internal import ( + AggregateUDF, + DataFrame, + ExecutionContext, + Expression, + ScalarUDF, +) + + +__all__ = [ + "DataFrame", + "ExecutionContext", + "Expression", + "AggregateUDF", + "ScalarUDF", + "column", + "literal", +] + + +class Accumulator(metaclass=ABCMeta): + @abstractmethod + def state(self) -> List[pa.Scalar]: + pass + + @abstractmethod + def update(self, values: pa.Array) -> None: + pass + + @abstractmethod + def merge(self, states: pa.Array) -> None: + pass + + @abstractmethod + def evaluate(self) -> pa.Scalar: + pass + + +def column(value): + return Expression.column(value) + + +col = column + + +def literal(value): + if not isinstance(value, pa.Scalar): + value = pa.scalar(value) + return Expression.literal(value) + + +lit = literal + + +def udf(func, input_types, return_type, volatility, name=None): + """ + Create a new User Defined Function + """ + if not callable(func): + raise TypeError("`func` argument must be callable") + if name is None: + name = func.__qualname__ + return ScalarUDF( + name=name, + func=func, + input_types=input_types, + return_type=return_type, + volatility=volatility, + ) + + +def udaf(accum, input_type, return_type, state_type, volatility, name=None): + """ + Create a new User Defined Aggregate Function + """ + if not issubclass(accum, Accumulator): + raise TypeError( + "`accum` must implement the abstract base class Accumulator" + ) + if name is None: + name = accum.__qualname__ + return AggregateUDF( + name=name, + accumulator=accum, + input_type=input_type, + return_type=return_type, + state_type=state_type, + volatility=volatility, + ) diff --git a/python/datafusion/functions.py b/python/datafusion/functions.py new file mode 100644 index 000000000..782ecba22 --- /dev/null +++ b/python/datafusion/functions.py @@ -0,0 +1,23 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +from ._internal import functions + + +def __getattr__(name): + return getattr(functions, name) diff --git a/python/datafusion/tests/__init__.py b/python/datafusion/tests/__init__.py new file mode 100644 index 000000000..13a83393a --- /dev/null +++ b/python/datafusion/tests/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/python/datafusion/tests/generic.py b/python/datafusion/tests/generic.py new file mode 100644 index 000000000..1f984a40a --- /dev/null +++ b/python/datafusion/tests/generic.py @@ -0,0 +1,87 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import datetime + +import numpy as np +import pyarrow as pa +import pyarrow.csv + +# used to write parquet files +import pyarrow.parquet as pq + + +def data(): + np.random.seed(1) + data = np.concatenate( + [ + np.random.normal(0, 0.01, size=50), + np.random.normal(50, 0.01, size=50), + ] + ) + return pa.array(data) + + +def data_with_nans(): + np.random.seed(0) + data = np.random.normal(0, 0.01, size=50) + mask = np.random.randint(0, 2, size=50) + data[mask == 0] = np.NaN + return data + + +def data_datetime(f): + data = [ + datetime.datetime.now(), + datetime.datetime.now() - datetime.timedelta(days=1), + datetime.datetime.now() + datetime.timedelta(days=1), + ] + return pa.array( + data, type=pa.timestamp(f), mask=np.array([False, True, False]) + ) + + +def data_date32(): + data = [ + datetime.date(2000, 1, 1), + datetime.date(1980, 1, 1), + datetime.date(2030, 1, 1), + ] + return pa.array( + data, type=pa.date32(), mask=np.array([False, True, False]) + ) + + +def data_timedelta(f): + data = [ + datetime.timedelta(days=100), + datetime.timedelta(days=1), + datetime.timedelta(seconds=1), + ] + return pa.array( + data, type=pa.duration(f), mask=np.array([False, True, False]) + ) + + +def data_binary_other(): + return np.array([1, 0, 0], dtype="u4") + + +def write_parquet(path, data): + table = pa.Table.from_arrays([data], names=["a"]) + pq.write_table(table, path) + return str(path) diff --git a/python/datafusion/tests/test_aggregation.py b/python/datafusion/tests/test_aggregation.py new file mode 100644 index 000000000..d539c4458 --- /dev/null +++ b/python/datafusion/tests/test_aggregation.py @@ -0,0 +1,48 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pyarrow as pa +import pytest + +from datafusion import ExecutionContext, column +from datafusion import functions as f + + +@pytest.fixture +def df(): + ctx = ExecutionContext() + + # create a RecordBatch and a new DataFrame from it + batch = pa.RecordBatch.from_arrays( + [pa.array([1, 2, 3]), pa.array([4, 4, 6])], + names=["a", "b"], + ) + return ctx.create_dataframe([[batch]]) + + +def test_built_in_aggregation(df): + col_a = column("a") + col_b = column("b") + df = df.aggregate( + [], + [f.max(col_a), f.min(col_a), f.count(col_a), f.approx_distinct(col_b)], + ) + result = df.collect()[0] + assert result.column(0) == pa.array([3]) + assert result.column(1) == pa.array([1]) + assert result.column(2) == pa.array([3], type=pa.uint64()) + assert result.column(3) == pa.array([2], type=pa.uint64()) diff --git a/python/datafusion/tests/test_catalog.py b/python/datafusion/tests/test_catalog.py new file mode 100644 index 000000000..2e64a810a --- /dev/null +++ b/python/datafusion/tests/test_catalog.py @@ -0,0 +1,72 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pyarrow as pa +import pytest + +from datafusion import ExecutionContext + + +@pytest.fixture +def ctx(): + return ExecutionContext() + + +@pytest.fixture +def database(ctx, tmp_path): + path = tmp_path / "test.csv" + + table = pa.Table.from_arrays( + [ + [1, 2, 3, 4], + ["a", "b", "c", "d"], + [1.1, 2.2, 3.3, 4.4], + ], + names=["int", "str", "float"], + ) + pa.csv.write_csv(table, path) + + ctx.register_csv("csv", path) + ctx.register_csv("csv1", str(path)) + ctx.register_csv( + "csv2", + path, + has_header=True, + delimiter=",", + schema_infer_max_records=10, + ) + + +def test_basic(ctx, database): + with pytest.raises(KeyError): + ctx.catalog("non-existent") + + default = ctx.catalog() + assert default.names() == ["public"] + + for database in [default.database("public"), default.database()]: + assert database.names() == {"csv1", "csv", "csv2"} + + table = database.table("csv") + assert table.kind == "physical" + assert table.schema == pa.schema( + [ + pa.field("int", pa.int64(), nullable=False), + pa.field("str", pa.string(), nullable=False), + pa.field("float", pa.float64(), nullable=False), + ] + ) diff --git a/python/datafusion/tests/test_context.py b/python/datafusion/tests/test_context.py new file mode 100644 index 000000000..60beea4a0 --- /dev/null +++ b/python/datafusion/tests/test_context.py @@ -0,0 +1,63 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pyarrow as pa +import pytest + +from datafusion import ExecutionContext + + +@pytest.fixture +def ctx(): + return ExecutionContext() + + +def test_register_record_batches(ctx): + # create a RecordBatch and register it as memtable + batch = pa.RecordBatch.from_arrays( + [pa.array([1, 2, 3]), pa.array([4, 5, 6])], + names=["a", "b"], + ) + + ctx.register_record_batches("t", [[batch]]) + + assert ctx.tables() == {"t"} + + result = ctx.sql("SELECT a+b, a-b FROM t").collect() + + assert result[0].column(0) == pa.array([5, 7, 9]) + assert result[0].column(1) == pa.array([-3, -3, -3]) + + +def test_create_dataframe_registers_unique_table_name(ctx): + # create a RecordBatch and register it as memtable + batch = pa.RecordBatch.from_arrays( + [pa.array([1, 2, 3]), pa.array([4, 5, 6])], + names=["a", "b"], + ) + + df = ctx.create_dataframe([[batch]]) + tables = list(ctx.tables()) + + assert df + assert len(tables) == 1 + assert len(tables[0]) == 33 + assert tables[0].startswith("c") + # ensure that the rest of the table name contains + # only hexadecimal numbers + for c in tables[0][1:]: + assert c in "0123456789abcdef" diff --git a/python/datafusion/tests/test_dataframe.py b/python/datafusion/tests/test_dataframe.py new file mode 100644 index 000000000..9a97c25f2 --- /dev/null +++ b/python/datafusion/tests/test_dataframe.py @@ -0,0 +1,181 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pyarrow as pa +import pytest + +from datafusion import functions as f +from datafusion import DataFrame, ExecutionContext, column, literal, udf + + +@pytest.fixture +def df(): + ctx = ExecutionContext() + + # create a RecordBatch and a new DataFrame from it + batch = pa.RecordBatch.from_arrays( + [pa.array([1, 2, 3]), pa.array([4, 5, 6])], + names=["a", "b"], + ) + + return ctx.create_dataframe([[batch]]) + + +@pytest.fixture +def struct_df(): + ctx = ExecutionContext() + + # create a RecordBatch and a new DataFrame from it + batch = pa.RecordBatch.from_arrays( + [pa.array([{"c": 1}, {"c": 2}, {"c": 3}]), pa.array([4, 5, 6])], + names=["a", "b"], + ) + + return ctx.create_dataframe([[batch]]) + + +def test_select(df): + df = df.select( + column("a") + column("b"), + column("a") - column("b"), + ) + + # execute and collect the first (and only) batch + result = df.collect()[0] + + assert result.column(0) == pa.array([5, 7, 9]) + assert result.column(1) == pa.array([-3, -3, -3]) + + +def test_filter(df): + df = df.select( + column("a") + column("b"), + column("a") - column("b"), + ).filter(column("a") > literal(2)) + + # execute and collect the first (and only) batch + result = df.collect()[0] + + assert result.column(0) == pa.array([9]) + assert result.column(1) == pa.array([-3]) + + +def test_sort(df): + df = df.sort(column("b").sort(ascending=False)) + + table = pa.Table.from_batches(df.collect()) + expected = {"a": [3, 2, 1], "b": [6, 5, 4]} + + assert table.to_pydict() == expected + + +def test_limit(df): + df = df.limit(1) + + # execute and collect the first (and only) batch + result = df.collect()[0] + + assert len(result.column(0)) == 1 + assert len(result.column(1)) == 1 + + +def test_udf(df): + # is_null is a pa function over arrays + is_null = udf( + lambda x: x.is_null(), + [pa.int64()], + pa.bool_(), + volatility="immutable", + ) + + df = df.select(is_null(column("a"))) + result = df.collect()[0].column(0) + + assert result == pa.array([False, False, False]) + + +def test_join(): + ctx = ExecutionContext() + + batch = pa.RecordBatch.from_arrays( + [pa.array([1, 2, 3]), pa.array([4, 5, 6])], + names=["a", "b"], + ) + df = ctx.create_dataframe([[batch]]) + + batch = pa.RecordBatch.from_arrays( + [pa.array([1, 2]), pa.array([8, 10])], + names=["a", "c"], + ) + df1 = ctx.create_dataframe([[batch]]) + + df = df.join(df1, join_keys=(["a"], ["a"]), how="inner") + df = df.sort(column("a").sort(ascending=True)) + table = pa.Table.from_batches(df.collect()) + + expected = {"a": [1, 2], "c": [8, 10], "b": [4, 5]} + assert table.to_pydict() == expected + + +def test_window_lead(df): + df = df.select( + column("a"), + f.alias( + f.window( + "lead", [column("b")], order_by=[f.order_by(column("b"))] + ), + "a_next", + ), + ) + + table = pa.Table.from_batches(df.collect()) + + expected = {"a": [1, 2, 3], "a_next": [5, 6, None]} + assert table.to_pydict() == expected + + +def test_get_dataframe(tmp_path): + ctx = ExecutionContext() + + path = tmp_path / "test.csv" + table = pa.Table.from_arrays( + [ + [1, 2, 3, 4], + ["a", "b", "c", "d"], + [1.1, 2.2, 3.3, 4.4], + ], + names=["int", "str", "float"], + ) + pa.csv.write_csv(table, path) + + ctx.register_csv("csv", path) + + df = ctx.table("csv") + assert isinstance(df, DataFrame) + + +def test_struct_select(struct_df): + df = struct_df.select( + column("a")["c"] + column("b"), + column("a")["c"] - column("b"), + ) + + # execute and collect the first (and only) batch + result = df.collect()[0] + + assert result.column(0) == pa.array([5, 7, 9]) + assert result.column(1) == pa.array([-3, -3, -3]) diff --git a/python/datafusion/tests/test_functions.py b/python/datafusion/tests/test_functions.py new file mode 100644 index 000000000..84718eaf0 --- /dev/null +++ b/python/datafusion/tests/test_functions.py @@ -0,0 +1,219 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import numpy as np +import pyarrow as pa +import pytest + +from datafusion import ExecutionContext, column +from datafusion import functions as f +from datafusion import literal + + +@pytest.fixture +def df(): + ctx = ExecutionContext() + # create a RecordBatch and a new DataFrame from it + batch = pa.RecordBatch.from_arrays( + [pa.array(["Hello", "World", "!"]), pa.array([4, 5, 6])], + names=["a", "b"], + ) + return ctx.create_dataframe([[batch]]) + + +def test_literal(df): + df = df.select( + literal(1), + literal("1"), + literal("OK"), + literal(3.14), + literal(True), + literal(b"hello world"), + ) + result = df.collect() + assert len(result) == 1 + result = result[0] + assert result.column(0) == pa.array([1] * 3) + assert result.column(1) == pa.array(["1"] * 3) + assert result.column(2) == pa.array(["OK"] * 3) + assert result.column(3) == pa.array([3.14] * 3) + assert result.column(4) == pa.array([True] * 3) + assert result.column(5) == pa.array([b"hello world"] * 3) + + +def test_lit_arith(df): + """ + Test literals with arithmetic operations + """ + df = df.select( + literal(1) + column("b"), f.concat(column("a"), literal("!")) + ) + result = df.collect() + assert len(result) == 1 + result = result[0] + assert result.column(0) == pa.array([5, 6, 7]) + assert result.column(1) == pa.array(["Hello!", "World!", "!!"]) + + +def test_math_functions(): + ctx = ExecutionContext() + # create a RecordBatch and a new DataFrame from it + batch = pa.RecordBatch.from_arrays( + [pa.array([0.1, -0.7, 0.55])], names=["value"] + ) + df = ctx.create_dataframe([[batch]]) + + values = np.array([0.1, -0.7, 0.55]) + col_v = column("value") + df = df.select( + f.abs(col_v), + f.sin(col_v), + f.cos(col_v), + f.tan(col_v), + f.asin(col_v), + f.acos(col_v), + f.exp(col_v), + f.ln(col_v + literal(pa.scalar(1))), + f.log2(col_v + literal(pa.scalar(1))), + f.log10(col_v + literal(pa.scalar(1))), + f.random(), + ) + batches = df.collect() + assert len(batches) == 1 + result = batches[0] + + np.testing.assert_array_almost_equal(result.column(0), np.abs(values)) + np.testing.assert_array_almost_equal(result.column(1), np.sin(values)) + np.testing.assert_array_almost_equal(result.column(2), np.cos(values)) + np.testing.assert_array_almost_equal(result.column(3), np.tan(values)) + np.testing.assert_array_almost_equal(result.column(4), np.arcsin(values)) + np.testing.assert_array_almost_equal(result.column(5), np.arccos(values)) + np.testing.assert_array_almost_equal(result.column(6), np.exp(values)) + np.testing.assert_array_almost_equal( + result.column(7), np.log(values + 1.0) + ) + np.testing.assert_array_almost_equal( + result.column(8), np.log2(values + 1.0) + ) + np.testing.assert_array_almost_equal( + result.column(9), np.log10(values + 1.0) + ) + np.testing.assert_array_less(result.column(10), np.ones_like(values)) + + +def test_string_functions(df): + df = df.select(f.md5(column("a")), f.lower(column("a"))) + result = df.collect() + assert len(result) == 1 + result = result[0] + assert result.column(0) == pa.array( + [ + "8b1a9953c4611296a827abf8c47804d7", + "f5a7924e621e84c9280a9a27e1bcb7f6", + "9033e0e305f247c0c3c80d0c7848c8b3", + ] + ) + assert result.column(1) == pa.array(["hello", "world", "!"]) + + +def test_hash_functions(df): + exprs = [ + f.digest(column("a"), literal(m)) + for m in ("md5", "sha256", "sha512", "blake2s", "blake3") + ] + df = df.select(*exprs) + result = df.collect() + assert len(result) == 1 + result = result[0] + b = bytearray.fromhex + assert result.column(0) == pa.array( + [ + b("8B1A9953C4611296A827ABF8C47804D7"), + b("F5A7924E621E84C9280A9A27E1BCB7F6"), + b("9033E0E305F247C0C3C80D0C7848C8B3"), + ] + ) + assert result.column(1) == pa.array( + [ + b( + "185F8DB32271FE25F561A6FC938B2E26" + "4306EC304EDA518007D1764826381969" + ), + b( + "78AE647DC5544D227130A0682A51E30B" + "C7777FBB6D8A8F17007463A3ECD1D524" + ), + b( + "BB7208BC9B5D7C04F1236A82A0093A5E" + "33F40423D5BA8D4266F7092C3BA43B62" + ), + ] + ) + assert result.column(2) == pa.array( + [ + b( + "3615F80C9D293ED7402687F94B22D58E" + "529B8CC7916F8FAC7FDDF7FBD5AF4CF7" + "77D3D795A7A00A16BF7E7F3FB9561EE9" + "BAAE480DA9FE7A18769E71886B03F315" + ), + b( + "8EA77393A42AB8FA92500FB077A9509C" + "C32BC95E72712EFA116EDAF2EDFAE34F" + "BB682EFDD6C5DD13C117E08BD4AAEF71" + "291D8AACE2F890273081D0677C16DF0F" + ), + b( + "3831A6A6155E509DEE59A7F451EB3532" + "4D8F8F2DF6E3708894740F98FDEE2388" + "9F4DE5ADB0C5010DFB555CDA77C8AB5D" + "C902094C52DE3278F35A75EBC25F093A" + ), + ] + ) + assert result.column(3) == pa.array( + [ + b( + "F73A5FBF881F89B814871F46E26AD3FA" + "37CB2921C5E8561618639015B3CCBB71" + ), + b( + "B792A0383FB9E7A189EC150686579532" + "854E44B71AC394831DAED169BA85CCC5" + ), + b( + "27988A0E51812297C77A433F63523334" + "6AEE29A829DCF4F46E0F58F402C6CFCB" + ), + ] + ) + assert result.column(4) == pa.array( + [ + b( + "FBC2B0516EE8744D293B980779178A35" + "08850FDCFE965985782C39601B65794F" + ), + b( + "BF73D18575A736E4037D45F9E316085B" + "86C19BE6363DE6AA789E13DEAACC1C4E" + ), + b( + "C8D11B9F7237E4034ADBCD2005735F9B" + "C4C597C75AD89F4492BEC8F77D15F7EB" + ), + ] + ) diff --git a/python/datafusion/tests/test_imports.py b/python/datafusion/tests/test_imports.py new file mode 100644 index 000000000..423800248 --- /dev/null +++ b/python/datafusion/tests/test_imports.py @@ -0,0 +1,65 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pytest + +import datafusion +from datafusion import ( + AggregateUDF, + DataFrame, + ExecutionContext, + Expression, + ScalarUDF, + functions, +) + + +def test_import_datafusion(): + assert datafusion.__name__ == "datafusion" + + +def test_class_module_is_datafusion(): + for klass in [ + ExecutionContext, + Expression, + DataFrame, + ScalarUDF, + AggregateUDF, + ]: + assert klass.__module__ == "datafusion" + + +def test_import_from_functions_submodule(): + from datafusion.functions import abs, sin # noqa + + assert functions.abs is abs + assert functions.sin is sin + + msg = "cannot import name 'foobar' from 'datafusion.functions'" + with pytest.raises(ImportError, match=msg): + from datafusion.functions import foobar # noqa + + +def test_classes_are_inheritable(): + class MyExecContext(ExecutionContext): + pass + + class MyExpression(Expression): + pass + + class MyDataFrame(DataFrame): + pass diff --git a/python/datafusion/tests/test_sql.py b/python/datafusion/tests/test_sql.py new file mode 100644 index 000000000..23f20079f --- /dev/null +++ b/python/datafusion/tests/test_sql.py @@ -0,0 +1,250 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import numpy as np +import pyarrow as pa +import pytest + +from datafusion import ExecutionContext, udf + +from . import generic as helpers + + +@pytest.fixture +def ctx(): + return ExecutionContext() + + +def test_no_table(ctx): + with pytest.raises(Exception, match="DataFusion error"): + ctx.sql("SELECT a FROM b").collect() + + +def test_register_csv(ctx, tmp_path): + path = tmp_path / "test.csv" + + table = pa.Table.from_arrays( + [ + [1, 2, 3, 4], + ["a", "b", "c", "d"], + [1.1, 2.2, 3.3, 4.4], + ], + names=["int", "str", "float"], + ) + pa.csv.write_csv(table, path) + + ctx.register_csv("csv", path) + ctx.register_csv("csv1", str(path)) + ctx.register_csv( + "csv2", + path, + has_header=True, + delimiter=",", + schema_infer_max_records=10, + ) + alternative_schema = pa.schema( + [ + ("some_int", pa.int16()), + ("some_bytes", pa.string()), + ("some_floats", pa.float32()), + ] + ) + ctx.register_csv("csv3", path, schema=alternative_schema) + + assert ctx.tables() == {"csv", "csv1", "csv2", "csv3"} + + for table in ["csv", "csv1", "csv2"]: + result = ctx.sql(f"SELECT COUNT(int) AS cnt FROM {table}").collect() + result = pa.Table.from_batches(result) + assert result.to_pydict() == {"cnt": [4]} + + result = ctx.sql("SELECT * FROM csv3").collect() + result = pa.Table.from_batches(result) + assert result.schema == alternative_schema + + with pytest.raises( + ValueError, match="Delimiter must be a single character" + ): + ctx.register_csv("csv4", path, delimiter="wrong") + + +def test_register_parquet(ctx, tmp_path): + path = helpers.write_parquet(tmp_path / "a.parquet", helpers.data()) + ctx.register_parquet("t", path) + assert ctx.tables() == {"t"} + + result = ctx.sql("SELECT COUNT(a) AS cnt FROM t").collect() + result = pa.Table.from_batches(result) + assert result.to_pydict() == {"cnt": [100]} + + +def test_execute(ctx, tmp_path): + data = [1, 1, 2, 2, 3, 11, 12] + + # single column, "a" + path = helpers.write_parquet(tmp_path / "a.parquet", pa.array(data)) + ctx.register_parquet("t", path) + + assert ctx.tables() == {"t"} + + # count + result = ctx.sql("SELECT COUNT(a) AS cnt FROM t").collect() + + expected = pa.array([7], pa.uint64()) + expected = [pa.RecordBatch.from_arrays([expected], ["cnt"])] + assert result == expected + + # where + expected = pa.array([2], pa.uint64()) + expected = [pa.RecordBatch.from_arrays([expected], ["cnt"])] + result = ctx.sql("SELECT COUNT(a) AS cnt FROM t WHERE a > 10").collect() + assert result == expected + + # group by + results = ctx.sql( + "SELECT CAST(a as int) AS a, COUNT(a) AS cnt FROM t GROUP BY a" + ).collect() + + # group by returns batches + result_keys = [] + result_values = [] + for result in results: + pydict = result.to_pydict() + result_keys.extend(pydict["a"]) + result_values.extend(pydict["cnt"]) + + result_keys, result_values = ( + list(t) for t in zip(*sorted(zip(result_keys, result_values))) + ) + + assert result_keys == [1, 2, 3, 11, 12] + assert result_values == [2, 2, 1, 1, 1] + + # order by + result = ctx.sql( + "SELECT a, CAST(a AS int) AS a_int FROM t ORDER BY a DESC LIMIT 2" + ).collect() + expected_a = pa.array([50.0219, 50.0152], pa.float64()) + expected_cast = pa.array([50, 50], pa.int32()) + expected = [ + pa.RecordBatch.from_arrays([expected_a, expected_cast], ["a", "a_int"]) + ] + np.testing.assert_equal(expected[0].column(1), expected[0].column(1)) + + +def test_cast(ctx, tmp_path): + """ + Verify that we can cast + """ + path = helpers.write_parquet(tmp_path / "a.parquet", helpers.data()) + ctx.register_parquet("t", path) + + valid_types = [ + "smallint", + "int", + "bigint", + "float(32)", + "float(64)", + "float", + ] + + select = ", ".join( + [f"CAST(9 AS {t}) AS A{i}" for i, t in enumerate(valid_types)] + ) + + # can execute, which implies that we can cast + ctx.sql(f"SELECT {select} FROM t").collect() + + +@pytest.mark.parametrize( + ("fn", "input_types", "output_type", "input_values", "expected_values"), + [ + ( + lambda x: x, + [pa.float64()], + pa.float64(), + [-1.2, None, 1.2], + [-1.2, None, 1.2], + ), + ( + lambda x: x.is_null(), + [pa.float64()], + pa.bool_(), + [-1.2, None, 1.2], + [False, True, False], + ), + ], +) +def test_udf( + ctx, tmp_path, fn, input_types, output_type, input_values, expected_values +): + # write to disk + path = helpers.write_parquet( + tmp_path / "a.parquet", pa.array(input_values) + ) + ctx.register_parquet("t", path) + + func = udf( + fn, input_types, output_type, name="func", volatility="immutable" + ) + ctx.register_udf(func) + + batches = ctx.sql("SELECT func(a) AS tt FROM t").collect() + result = batches[0].column(0) + + assert result == pa.array(expected_values) + + +_null_mask = np.array([False, True, False]) + + +@pytest.mark.parametrize( + "arr", + [ + pa.array(["a", "b", "c"], pa.utf8(), _null_mask), + pa.array(["a", "b", "c"], pa.large_utf8(), _null_mask), + pa.array([b"1", b"2", b"3"], pa.binary(), _null_mask), + pa.array([b"1111", b"2222", b"3333"], pa.large_binary(), _null_mask), + pa.array([False, True, True], None, _null_mask), + pa.array([0, 1, 2], None), + helpers.data_binary_other(), + helpers.data_date32(), + helpers.data_with_nans(), + # C data interface missing + pytest.param( + pa.array([b"1111", b"2222", b"3333"], pa.binary(4), _null_mask), + marks=pytest.mark.xfail, + ), + pytest.param(helpers.data_datetime("s"), marks=pytest.mark.xfail), + pytest.param(helpers.data_datetime("ms"), marks=pytest.mark.xfail), + pytest.param(helpers.data_datetime("us"), marks=pytest.mark.xfail), + pytest.param(helpers.data_datetime("ns"), marks=pytest.mark.xfail), + # Not writtable to parquet + pytest.param(helpers.data_timedelta("s"), marks=pytest.mark.xfail), + pytest.param(helpers.data_timedelta("ms"), marks=pytest.mark.xfail), + pytest.param(helpers.data_timedelta("us"), marks=pytest.mark.xfail), + pytest.param(helpers.data_timedelta("ns"), marks=pytest.mark.xfail), + ], +) +def test_simple_select(ctx, tmp_path, arr): + path = helpers.write_parquet(tmp_path / "a.parquet", arr) + ctx.register_parquet("t", path) + + batches = ctx.sql("SELECT a AS tt FROM t").collect() + result = batches[0].column(0) + + np.testing.assert_equal(result, arr) diff --git a/python/datafusion/tests/test_udaf.py b/python/datafusion/tests/test_udaf.py new file mode 100644 index 000000000..2f286ba10 --- /dev/null +++ b/python/datafusion/tests/test_udaf.py @@ -0,0 +1,135 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from typing import List + +import pyarrow as pa +import pyarrow.compute as pc +import pytest + +from datafusion import Accumulator, ExecutionContext, column, udaf + + +class Summarize(Accumulator): + """ + Interface of a user-defined accumulation. + """ + + def __init__(self): + self._sum = pa.scalar(0.0) + + def state(self) -> List[pa.Scalar]: + return [self._sum] + + def update(self, values: pa.Array) -> None: + # Not nice since pyarrow scalars can't be summed yet. + # This breaks on `None` + self._sum = pa.scalar(self._sum.as_py() + pc.sum(values).as_py()) + + def merge(self, states: pa.Array) -> None: + # Not nice since pyarrow scalars can't be summed yet. + # This breaks on `None` + self._sum = pa.scalar(self._sum.as_py() + pc.sum(states).as_py()) + + def evaluate(self) -> pa.Scalar: + return self._sum + + +class NotSubclassOfAccumulator: + pass + + +class MissingMethods(Accumulator): + def __init__(self): + self._sum = pa.scalar(0) + + def state(self) -> List[pa.Scalar]: + return [self._sum] + + +@pytest.fixture +def df(): + ctx = ExecutionContext() + + # create a RecordBatch and a new DataFrame from it + batch = pa.RecordBatch.from_arrays( + [pa.array([1, 2, 3]), pa.array([4, 4, 6])], + names=["a", "b"], + ) + return ctx.create_dataframe([[batch]]) + + +def test_errors(df): + with pytest.raises(TypeError): + udaf( + NotSubclassOfAccumulator, + pa.float64(), + pa.float64(), + [pa.float64()], + volatility="immutable", + ) + + accum = udaf( + MissingMethods, + pa.int64(), + pa.int64(), + [pa.int64()], + volatility="immutable", + ) + df = df.aggregate([], [accum(column("a"))]) + + msg = ( + "Can't instantiate abstract class MissingMethods with abstract " + "methods evaluate, merge, update" + ) + with pytest.raises(Exception, match=msg): + df.collect() + + +def test_aggregate(df): + summarize = udaf( + Summarize, + pa.float64(), + pa.float64(), + [pa.float64()], + volatility="immutable", + ) + + df = df.aggregate([], [summarize(column("a"))]) + + # execute and collect the first (and only) batch + result = df.collect()[0] + + assert result.column(0) == pa.array([1.0 + 2.0 + 3.0]) + + +def test_group_by(df): + summarize = udaf( + Summarize, + pa.float64(), + pa.float64(), + [pa.float64()], + volatility="immutable", + ) + + df = df.aggregate([column("b")], [summarize(column("a"))]) + + batches = df.collect() + + arrays = [batch.column(1) for batch in batches] + joined = pa.concat_arrays(arrays) + assert joined == pa.array([1.0 + 2.0, 3.0]) diff --git a/python/pyproject.toml b/python/pyproject.toml new file mode 100644 index 000000000..c6ee36349 --- /dev/null +++ b/python/pyproject.toml @@ -0,0 +1,55 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[build-system] +requires = ["maturin>=0.11,<0.12"] +build-backend = "maturin" + +[project] +name = "datafusion" +description = "Build and run queries against data" +readme = "README.md" +license = {file = "LICENSE.txt"} +requires-python = ">=3.6" +keywords = ["datafusion", "dataframe", "rust", "query-engine"] +classifier = [ + "Development Status :: 2 - Pre-Alpha", + "Intended Audience :: Developers", + "License :: OSI Approved :: Apache Software License", + "License :: OSI Approved", + "Operating System :: MacOS", + "Operating System :: Microsoft :: Windows", + "Operating System :: POSIX :: Linux", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python", + "Programming Language :: Rust", +] +dependencies = [ + "pyarrow>=1", +] + +[project.urls] +documentation = "https://arrow.apache.org/datafusion/python" +repository = "https://github.com/apache/arrow-datafusion" + +[tool.isort] +profile = "black" diff --git a/python/requirements-37.txt b/python/requirements-37.txt new file mode 100644 index 000000000..e64bebf32 --- /dev/null +++ b/python/requirements-37.txt @@ -0,0 +1,329 @@ +# +# This file is autogenerated by pip-compile with python 3.7 +# To update, run: +# +# pip-compile --generate-hashes +# +attrs==21.2.0 \ + --hash=sha256:149e90d6d8ac20db7a955ad60cf0e6881a3f20d37096140088356da6c716b0b1 \ + --hash=sha256:ef6aaac3ca6cd92904cdd0d83f629a15f18053ec84e6432106f7a4d04ae4f5fb + # via pytest +black==21.9b0 \ + --hash=sha256:380f1b5da05e5a1429225676655dddb96f5ae8c75bdf91e53d798871b902a115 \ + --hash=sha256:7de4cfc7eb6b710de325712d40125689101d21d25283eed7e9998722cf10eb91 + # via -r requirements.in +click==8.0.3 \ + --hash=sha256:353f466495adaeb40b6b5f592f9f91cb22372351c84caeb068132442a4518ef3 \ + --hash=sha256:410e932b050f5eed773c4cda94de75971c89cdb3155a72a0831139a79e5ecb5b + # via black +flake8==4.0.1 \ + --hash=sha256:479b1304f72536a55948cb40a32dce8bb0ffe3501e26eaf292c7e60eb5e0428d \ + --hash=sha256:806e034dda44114815e23c16ef92f95c91e4c71100ff52813adf7132a6ad870d + # via -r requirements.in +importlib-metadata==4.2.0 \ + --hash=sha256:057e92c15bc8d9e8109738a48db0ccb31b4d9d5cfbee5a8670879a30be66304b \ + --hash=sha256:b7e52a1f8dec14a75ea73e0891f3060099ca1d8e6a462a4dff11c3e119ea1b31 + # via + # click + # flake8 + # pluggy + # pytest +iniconfig==1.1.1 \ + --hash=sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3 \ + --hash=sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32 + # via pytest +isort==5.9.3 \ + --hash=sha256:9c2ea1e62d871267b78307fe511c0838ba0da28698c5732d54e2790bf3ba9899 \ + --hash=sha256:e17d6e2b81095c9db0a03a8025a957f334d6ea30b26f9ec70805411e5c7c81f2 + # via -r requirements.in +maturin==0.11.5 \ + --hash=sha256:07074778b063a439fdfd5501bd1d1823a216ec5b657d3ecde78fd7f2c4782422 \ + --hash=sha256:1ce666c386ff9c3c2b5d7d3ca4b1f9f675c38d7540ffbda0d5d5bc7d6ddde49a \ + --hash=sha256:20f9c30701c9932ed8026ceaf896fc77ecc76cebd6a182668dbc10ed597f8789 \ + --hash=sha256:3354d030b88c938a33bf407a6c0f79ccdd2cce3e1e3e4a2d0c92dc2e063adc6e \ + --hash=sha256:4191b0b7362b3025096faf126ff15cb682fbff324ac4a6ca18d55bb16e2b759b \ + --hash=sha256:70381be1585cb9fa5c02b83af80ae661aaad959e8aa0fddcfe195b004054bd69 \ + --hash=sha256:7bf96e7586bfdb5b0fadc6d662534b8a41123b33dff084fa383a81ded0ce5334 \ + --hash=sha256:ab2b3ccf66f5e0f9c3904d215835337b1bd305e79e3bf53b65bbc80a5755e01b \ + --hash=sha256:b0ac45879a7d624b47d72b093ae3370270894c19779f42aad7568a92951c5d47 \ + --hash=sha256:c2ded8b4ef9210d627bb966bc67661b7db259535f6062afe1ce5605406b50f3f \ + --hash=sha256:d78f24561a5e02f7d119b348b26e5772ad5698a43ca49e8facb9ce77cf273714 + # via -r requirements.in +mccabe==0.6.1 \ + --hash=sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42 \ + --hash=sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f + # via flake8 +mypy==0.910 \ + --hash=sha256:088cd9c7904b4ad80bec811053272986611b84221835e079be5bcad029e79dd9 \ + --hash=sha256:0aadfb2d3935988ec3815952e44058a3100499f5be5b28c34ac9d79f002a4a9a \ + --hash=sha256:119bed3832d961f3a880787bf621634ba042cb8dc850a7429f643508eeac97b9 \ + --hash=sha256:1a85e280d4d217150ce8cb1a6dddffd14e753a4e0c3cf90baabb32cefa41b59e \ + --hash=sha256:3c4b8ca36877fc75339253721f69603a9c7fdb5d4d5a95a1a1b899d8b86a4de2 \ + --hash=sha256:3e382b29f8e0ccf19a2df2b29a167591245df90c0b5a2542249873b5c1d78212 \ + --hash=sha256:42c266ced41b65ed40a282c575705325fa7991af370036d3f134518336636f5b \ + --hash=sha256:53fd2eb27a8ee2892614370896956af2ff61254c275aaee4c230ae771cadd885 \ + --hash=sha256:704098302473cb31a218f1775a873b376b30b4c18229421e9e9dc8916fd16150 \ + --hash=sha256:7df1ead20c81371ccd6091fa3e2878559b5c4d4caadaf1a484cf88d93ca06703 \ + --hash=sha256:866c41f28cee548475f146aa4d39a51cf3b6a84246969f3759cb3e9c742fc072 \ + --hash=sha256:a155d80ea6cee511a3694b108c4494a39f42de11ee4e61e72bc424c490e46457 \ + --hash=sha256:adaeee09bfde366d2c13fe6093a7df5df83c9a2ba98638c7d76b010694db760e \ + --hash=sha256:b6fb13123aeef4a3abbcfd7e71773ff3ff1526a7d3dc538f3929a49b42be03f0 \ + --hash=sha256:b94e4b785e304a04ea0828759172a15add27088520dc7e49ceade7834275bedb \ + --hash=sha256:c0df2d30ed496a08de5daed2a9ea807d07c21ae0ab23acf541ab88c24b26ab97 \ + --hash=sha256:c6c2602dffb74867498f86e6129fd52a2770c48b7cd3ece77ada4fa38f94eba8 \ + --hash=sha256:ceb6e0a6e27fb364fb3853389607cf7eb3a126ad335790fa1e14ed02fba50811 \ + --hash=sha256:d9dd839eb0dc1bbe866a288ba3c1afc33a202015d2ad83b31e875b5905a079b6 \ + --hash=sha256:e4dab234478e3bd3ce83bac4193b2ecd9cf94e720ddd95ce69840273bf44f6de \ + --hash=sha256:ec4e0cd079db280b6bdabdc807047ff3e199f334050db5cbb91ba3e959a67504 \ + --hash=sha256:ecd2c3fe726758037234c93df7e98deb257fd15c24c9180dacf1ef829da5f921 \ + --hash=sha256:ef565033fa5a958e62796867b1df10c40263ea9ded87164d67572834e57a174d + # via -r requirements.in +mypy-extensions==0.4.3 \ + --hash=sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d \ + --hash=sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8 + # via + # black + # mypy +numpy==1.21.3 \ + --hash=sha256:043e83bfc274649c82a6f09836943e4a4aebe5e33656271c7dbf9621dd58b8ec \ + --hash=sha256:160ccc1bed3a8371bf0d760971f09bfe80a3e18646620e9ded0ad159d9749baa \ + --hash=sha256:188031f833bbb623637e66006cf75e933e00e7231f67e2b45cf8189612bb5dc3 \ + --hash=sha256:28f15209fb535dd4c504a7762d3bc440779b0e37d50ed810ced209e5cea60d96 \ + --hash=sha256:29fb3dcd0468b7715f8ce2c0c2d9bbbaf5ae686334951343a41bd8d155c6ea27 \ + --hash=sha256:2a6ee9620061b2a722749b391c0d80a0e2ae97290f1b32e28d5a362e21941ee4 \ + --hash=sha256:300321e3985c968e3ae7fbda187237b225f3ffe6528395a5b7a5407f73cf093e \ + --hash=sha256:32437f0b275c1d09d9c3add782516413e98cd7c09e6baf4715cbce781fc29912 \ + --hash=sha256:3c09418a14471c7ae69ba682e2428cae5b4420a766659605566c0fa6987f6b7e \ + --hash=sha256:49c6249260890e05b8111ebfc391ed58b3cb4b33e63197b2ec7f776e45330721 \ + --hash=sha256:4cc9b512e9fb590797474f58b7f6d1f1b654b3a94f4fa8558b48ca8b3cfc97cf \ + --hash=sha256:508b0b513fa1266875524ba8a9ecc27b02ad771fe1704a16314dc1a816a68737 \ + --hash=sha256:50cd26b0cf6664cb3b3dd161ba0a09c9c1343db064e7c69f9f8b551f5104d654 \ + --hash=sha256:5c4193f70f8069550a1788bd0cd3268ab7d3a2b70583dfe3b2e7f421e9aace06 \ + --hash=sha256:5dfe9d6a4c39b8b6edd7990091fea4f852888e41919d0e6722fe78dd421db0eb \ + --hash=sha256:63571bb7897a584ca3249c86dd01c10bcb5fe4296e3568b2e9c1a55356b6410e \ + --hash=sha256:75621882d2230ab77fb6a03d4cbccd2038511491076e7964ef87306623aa5272 \ + --hash=sha256:75eb7cadc8da49302f5b659d40ba4f6d94d5045fbd9569c9d058e77b0514c9e4 \ + --hash=sha256:88a5d6b268e9ad18f3533e184744acdaa2e913b13148160b1152300c949bbb5f \ + --hash=sha256:8a10968963640e75cc0193e1847616ab4c718e83b6938ae74dea44953950f6b7 \ + --hash=sha256:90bec6a86b348b4559b6482e2b684db4a9a7eed1fa054b86115a48d58fbbf62a \ + --hash=sha256:98339aa9911853f131de11010f6dd94c8cec254d3d1f7261528c3b3e3219f139 \ + --hash=sha256:a99a6b067e5190ac6d12005a4d85aa6227c5606fa93211f86b1dafb16233e57d \ + --hash=sha256:bffa2eee3b87376cc6b31eee36d05349571c236d1de1175b804b348dc0941e3f \ + --hash=sha256:c6c2d535a7beb1f8790aaa98fd089ceab2e3dd7ca48aca0af7dc60e6ef93ffe1 \ + --hash=sha256:cc14e7519fab2a4ed87d31f99c31a3796e4e1fe63a86ebdd1c5a1ea78ebd5896 \ + --hash=sha256:dd0482f3fc547f1b1b5d6a8b8e08f63fdc250c58ce688dedd8851e6e26cff0f3 \ + --hash=sha256:dde972a1e11bb7b702ed0e447953e7617723760f420decb97305e66fb4afc54f \ + --hash=sha256:e54af82d68ef8255535a6cdb353f55d6b8cf418a83e2be3569243787a4f4866f \ + --hash=sha256:e606e6316911471c8d9b4618e082635cfe98876007556e89ce03d52ff5e8fcf0 \ + --hash=sha256:f41b018f126aac18583956c54544db437f25c7ee4794bcb23eb38bef8e5e192a \ + --hash=sha256:f8f4625536926a155b80ad2bbff44f8cc59e9f2ad14cdda7acf4c135b4dc8ff2 \ + --hash=sha256:fe52dbe47d9deb69b05084abd4b0df7abb39a3c51957c09f635520abd49b29dd + # via + # -r requirements.in + # pandas + # pyarrow +packaging==21.0 \ + --hash=sha256:7dc96269f53a4ccec5c0670940a4281106dd0bb343f47b7471f779df49c2fbe7 \ + --hash=sha256:c86254f9220d55e31cc94d69bade760f0847da8000def4dfe1c6b872fd14ff14 + # via pytest +pandas==1.3.4 \ + --hash=sha256:003ba92db58b71a5f8add604a17a059f3068ef4e8c0c365b088468d0d64935fd \ + --hash=sha256:10e10a2527db79af6e830c3d5842a4d60383b162885270f8cffc15abca4ba4a9 \ + --hash=sha256:22808afb8f96e2269dcc5b846decacb2f526dd0b47baebc63d913bf847317c8f \ + --hash=sha256:2d1dc09c0013d8faa7474574d61b575f9af6257ab95c93dcf33a14fd8d2c1bab \ + --hash=sha256:35c77609acd2e4d517da41bae0c11c70d31c87aae8dd1aabd2670906c6d2c143 \ + --hash=sha256:372d72a3d8a5f2dbaf566a5fa5fa7f230842ac80f29a931fb4b071502cf86b9a \ + --hash=sha256:42493f8ae67918bf129869abea8204df899902287a7f5eaf596c8e54e0ac7ff4 \ + --hash=sha256:5298a733e5bfbb761181fd4672c36d0c627320eb999c59c65156c6a90c7e1b4f \ + --hash=sha256:5ba0aac1397e1d7b654fccf263a4798a9e84ef749866060d19e577e927d66e1b \ + --hash=sha256:a2aa18d3f0b7d538e21932f637fbfe8518d085238b429e4790a35e1e44a96ffc \ + --hash=sha256:a388960f979665b447f0847626e40f99af8cf191bce9dc571d716433130cb3a7 \ + --hash=sha256:a51528192755f7429c5bcc9e80832c517340317c861318fea9cea081b57c9afd \ + --hash=sha256:b528e126c13816a4374e56b7b18bfe91f7a7f6576d1aadba5dee6a87a7f479ae \ + --hash=sha256:c1aa4de4919358c5ef119f6377bc5964b3a7023c23e845d9db7d9016fa0c5b1c \ + --hash=sha256:c2646458e1dce44df9f71a01dc65f7e8fa4307f29e5c0f2f92c97f47a5bf22f5 \ + --hash=sha256:d47750cf07dee6b55d8423471be70d627314277976ff2edd1381f02d52dbadf9 \ + --hash=sha256:d99d2350adb7b6c3f7f8f0e5dfb7d34ff8dd4bc0a53e62c445b7e43e163fce63 \ + --hash=sha256:dd324f8ee05925ee85de0ea3f0d66e1362e8c80799eb4eb04927d32335a3e44a \ + --hash=sha256:eaca36a80acaacb8183930e2e5ad7f71539a66805d6204ea88736570b2876a7b \ + --hash=sha256:f567e972dce3bbc3a8076e0b675273b4a9e8576ac629149cf8286ee13c259ae5 \ + --hash=sha256:fe48e4925455c964db914b958f6e7032d285848b7538a5e1b19aeb26ffaea3ec + # via -r requirements.in +pathspec==0.9.0 \ + --hash=sha256:7d15c4ddb0b5c802d161efc417ec1a2558ea2653c2e8ad9c19098201dc1c993a \ + --hash=sha256:e564499435a2673d586f6b2130bb5b95f04a3ba06f81b8f895b651a3c76aabb1 + # via black +platformdirs==2.4.0 \ + --hash=sha256:367a5e80b3d04d2428ffa76d33f124cf11e8fff2acdaa9b43d545f5c7d661ef2 \ + --hash=sha256:8868bbe3c3c80d42f20156f22e7131d2fb321f5bc86a2a345375c6481a67021d + # via black +pluggy==1.0.0 \ + --hash=sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159 \ + --hash=sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3 + # via pytest +py==1.10.0 \ + --hash=sha256:21b81bda15b66ef5e1a777a21c4dcd9c20ad3efd0b3f817e7a809035269e1bd3 \ + --hash=sha256:3b80836aa6d1feeaa108e046da6423ab8f6ceda6468545ae8d02d9d58d18818a + # via pytest +pyarrow==6.0.0 \ + --hash=sha256:004185e0babc6f3c3fba6ba4f106e406a0113d0f82bb9ad9a8571a1978c45d04 \ + --hash=sha256:0204e80777ab8f4e9abd3a765a8ec07ed1e3c4630bacda50d2ce212ef0f3826f \ + --hash=sha256:072c1a0fca4509eefd7d018b78542fb7e5c63aaf5698f1c0a6e45628ae17ba44 \ + --hash=sha256:15dc0d673d3f865ca63c877bd7a2eced70b0a08969fb733a28247134b8a1f18b \ + --hash=sha256:1c38263ea438a1666b13372e7565450cfeec32dbcd1c2595749476a58465eaec \ + --hash=sha256:281ce5fa03621d786a9beb514abb09846db7f0221b50eabf543caa24037eaacd \ + --hash=sha256:2d2c681659396c745e4f1988d5dd41dcc3ad557bb8d4a8c2e44030edafc08a91 \ + --hash=sha256:376c4b5f248ae63df21fe15c194e9013753164be2d38f4b3fb8bde63ac5a1958 \ + --hash=sha256:465f87fa0be0b2928b2beeba22b5813a0203fb05d90fd8563eea48e08ecc030e \ + --hash=sha256:477c746ef42c039348a288584800e299456c80c5691401bb9b19aa9c02a427b7 \ + --hash=sha256:5144bd9db2920c7cb566c96462d62443cc239104f94771d110f74393f2fb42a2 \ + --hash=sha256:5408fa8d623e66a0445f3fb0e4027fd219bf99bfb57422d543d7b7876e2c5b55 \ + --hash=sha256:5be62679201c441356d3f2a739895dcc8d4d299f2a6eabcd2163bfb6a898abba \ + --hash=sha256:5c666bc6a1cebf01206e2dc1ab05f25f39f35d3a499e0ef5cd635225e07306ca \ + --hash=sha256:6163d82cca7541774b00503c295fe86a1722820eddb958b57f091bb6f5b0a6db \ + --hash=sha256:6a1d9a2f4ee812ed0bd4182cabef99ea914ac297274f0de086f2488093d284ef \ + --hash=sha256:7a683f71b848eb6310b4ec48c0def55dac839e9994c1ac874c9b2d3d5625def1 \ + --hash=sha256:82fe80309e01acf29e3943a1f6d3c98ec109fe1d356bc1ac37d639bcaadcf684 \ + --hash=sha256:8c23f8cdecd3d9e49f9b0f9a651ae5549d1d32fd4901fb1bdc2d327edfba844f \ + --hash=sha256:8d41dfb09ba9236cca6245f33088eb42f3c54023da281139241e0f9f3b4b754e \ + --hash=sha256:a19e58dfb04e451cd8b7bdec3ac8848373b95dfc53492c9a69789aa9074a3c1b \ + --hash=sha256:a50d2f77b86af38ceabf45617208b9105d20e7a5eebc584e7c8c0acededd82ce \ + --hash=sha256:a5bed4f948c032c40597302e9bdfa65f62295240306976ecbe43a54924c6f94f \ + --hash=sha256:ac941a147d14993987cc8b605b721735a34b3e54d167302501fb4db1ad7382c7 \ + --hash=sha256:b86d175262db1eb46afdceb36d459409eb6f8e532d3dec162f8bf572c7f57623 \ + --hash=sha256:bf3400780c4d3c9cb43b1e8a1aaf2e1b7199a0572d0a645529d2784e4d0d8497 \ + --hash=sha256:c7a6e7e0bf8779e9c3428ced85507541f3da9a0675e2f4781d4eb2c7042cbf81 \ + --hash=sha256:cc1d4a70efd583befe92d4ea6f74ed2e0aa31ccdde767cd5cae8e77c65a1c2d4 \ + --hash=sha256:d046dc78a9337baa6415be915c5a16222505233e238a1017f368243c89817eea \ + --hash=sha256:da7860688c33ca88ac05f1a487d32d96d9caa091412496c35f3d1d832145675a \ + --hash=sha256:ddf2e6e3b321adaaf716f2d5af8e92d205a9671e0cb7c0779710a567fd1dd580 \ + --hash=sha256:e81508239a71943759cee272ce625ae208092dd36ef2c6713fccee30bbcf52bb \ + --hash=sha256:ea64a48a85c631eb2a0ea13ccdec5143c85b5897836b16331ee4289d27a57247 \ + --hash=sha256:ed0be080cf595ea15ff1c9ff4097bbf1fcc4b50847d98c0a3c0412fbc6ede7e9 \ + --hash=sha256:fb701ec4a94b92102606d4e88f0b8eba34f09a5ad8e014eaa4af76f42b7f62ae \ + --hash=sha256:fbda7595f24a639bcef3419ecfac17216efacb09f7b0f1b4c4c97f900d65ca0e + # via -r requirements.in +pycodestyle==2.8.0 \ + --hash=sha256:720f8b39dde8b293825e7ff02c475f3077124006db4f440dcbc9a20b76548a20 \ + --hash=sha256:eddd5847ef438ea1c7870ca7eb78a9d47ce0cdb4851a5523949f2601d0cbbe7f + # via flake8 +pyflakes==2.4.0 \ + --hash=sha256:05a85c2872edf37a4ed30b0cce2f6093e1d0581f8c19d7393122da7e25b2b24c \ + --hash=sha256:3bb3a3f256f4b7968c9c788781e4ff07dce46bdf12339dcda61053375426ee2e + # via flake8 +pyparsing==3.0.3 \ + --hash=sha256:9e3511118010f112a4b4b435ae50e1eaa610cda191acb9e421d60cf5fde83455 \ + --hash=sha256:f8d3fe9fc404576c5164f0f0c4e382c96b85265e023c409c43d48f65da9d60d0 + # via packaging +pytest==6.2.5 \ + --hash=sha256:131b36680866a76e6781d13f101efb86cf674ebb9762eb70d3082b6f29889e89 \ + --hash=sha256:7310f8d27bc79ced999e760ca304d69f6ba6c6649c0b60fb0e04a4a77cacc134 + # via -r requirements.in +python-dateutil==2.8.2 \ + --hash=sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86 \ + --hash=sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9 + # via pandas +pytz==2021.3 \ + --hash=sha256:3672058bc3453457b622aab7a1c3bfd5ab0bdae451512f6cf25f64ed37f5b87c \ + --hash=sha256:acad2d8b20a1af07d4e4c9d2e9285c5ed9104354062f275f3fcd88dcef4f1326 + # via pandas +regex==2021.10.23 \ + --hash=sha256:0c186691a7995ef1db61205e00545bf161fb7b59cdb8c1201c89b333141c438a \ + --hash=sha256:0dcc0e71118be8c69252c207630faf13ca5e1b8583d57012aae191e7d6d28b84 \ + --hash=sha256:0f7552429dd39f70057ac5d0e897e5bfe211629652399a21671e53f2a9693a4e \ + --hash=sha256:129472cd06062fb13e7b4670a102951a3e655e9b91634432cfbdb7810af9d710 \ + --hash=sha256:13ec99df95003f56edcd307db44f06fbeb708c4ccdcf940478067dd62353181e \ + --hash=sha256:1f2b59c28afc53973d22e7bc18428721ee8ca6079becf1b36571c42627321c65 \ + --hash=sha256:2b20f544cbbeffe171911f6ce90388ad36fe3fad26b7c7a35d4762817e9ea69c \ + --hash=sha256:2fb698037c35109d3c2e30f2beb499e5ebae6e4bb8ff2e60c50b9a805a716f79 \ + --hash=sha256:34d870f9f27f2161709054d73646fc9aca49480617a65533fc2b4611c518e455 \ + --hash=sha256:391703a2abf8013d95bae39145d26b4e21531ab82e22f26cd3a181ee2644c234 \ + --hash=sha256:450dc27483548214314640c89a0f275dbc557968ed088da40bde7ef8fb52829e \ + --hash=sha256:45b65d6a275a478ac2cbd7fdbf7cc93c1982d613de4574b56fd6972ceadb8395 \ + --hash=sha256:5095a411c8479e715784a0c9236568ae72509450ee2226b649083730f3fadfc6 \ + --hash=sha256:530fc2bbb3dc1ebb17f70f7b234f90a1dd43b1b489ea38cea7be95fb21cdb5c7 \ + --hash=sha256:56f0c81c44638dfd0e2367df1a331b4ddf2e771366c4b9c5d9a473de75e3e1c7 \ + --hash=sha256:5e9c9e0ce92f27cef79e28e877c6b6988c48b16942258f3bc55d39b5f911df4f \ + --hash=sha256:6d7722136c6ed75caf84e1788df36397efdc5dbadab95e59c2bba82d4d808a4c \ + --hash=sha256:74d071dbe4b53c602edd87a7476ab23015a991374ddb228d941929ad7c8c922e \ + --hash=sha256:7b568809dca44cb75c8ebb260844ea98252c8c88396f9d203f5094e50a70355f \ + --hash=sha256:80bb5d2e92b2258188e7dcae5b188c7bf868eafdf800ea6edd0fbfc029984a88 \ + --hash=sha256:8d1cdcda6bd16268316d5db1038965acf948f2a6f43acc2e0b1641ceab443623 \ + --hash=sha256:9f665677e46c5a4d288ece12fdedf4f4204a422bb28ff05f0e6b08b7447796d1 \ + --hash=sha256:a30513828180264294953cecd942202dfda64e85195ae36c265daf4052af0464 \ + --hash=sha256:a7a986c45d1099a5de766a15de7bee3840b1e0e1a344430926af08e5297cf666 \ + --hash=sha256:a940ca7e7189d23da2bfbb38973832813eab6bd83f3bf89a977668c2f813deae \ + --hash=sha256:ab7c5684ff3538b67df3f93d66bd3369b749087871ae3786e70ef39e601345b0 \ + --hash=sha256:be04739a27be55631069b348dda0c81d8ea9822b5da10b8019b789e42d1fe452 \ + --hash=sha256:c0938ddd60cc04e8f1faf7a14a166ac939aac703745bfcd8e8f20322a7373019 \ + --hash=sha256:cb46b542133999580ffb691baf67410306833ee1e4f58ed06b6a7aaf4e046952 \ + --hash=sha256:d134757a37d8640f3c0abb41f5e68b7cf66c644f54ef1cb0573b7ea1c63e1509 \ + --hash=sha256:de557502c3bec8e634246588a94e82f1ee1b9dfcfdc453267c4fb652ff531570 \ + --hash=sha256:ded0c4a3eee56b57fcb2315e40812b173cafe79d2f992d50015f4387445737fa \ + --hash=sha256:e1dae12321b31059a1a72aaa0e6ba30156fe7e633355e445451e4021b8e122b6 \ + --hash=sha256:eb672217f7bd640411cfc69756ce721d00ae600814708d35c930930f18e8029f \ + --hash=sha256:ee684f139c91e69fe09b8e83d18b4d63bf87d9440c1eb2eeb52ee851883b1b29 \ + --hash=sha256:f3f9a91d3cc5e5b0ddf1043c0ae5fa4852f18a1c0050318baf5fc7930ecc1f9c + # via black +six==1.16.0 \ + --hash=sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926 \ + --hash=sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254 + # via python-dateutil +toml==0.10.2 \ + --hash=sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b \ + --hash=sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f + # via + # -r requirements.in + # maturin + # mypy + # pytest +tomli==1.2.2 \ + --hash=sha256:c6ce0015eb38820eaf32b5db832dbc26deb3dd427bd5f6556cf0acac2c214fee \ + --hash=sha256:f04066f68f5554911363063a30b108d2b5a5b1a010aa8b6132af78489fe3aade + # via black +typed-ast==1.4.3 \ + --hash=sha256:01ae5f73431d21eead5015997ab41afa53aa1fbe252f9da060be5dad2c730ace \ + --hash=sha256:067a74454df670dcaa4e59349a2e5c81e567d8d65458d480a5b3dfecec08c5ff \ + --hash=sha256:0fb71b8c643187d7492c1f8352f2c15b4c4af3f6338f21681d3681b3dc31a266 \ + --hash=sha256:1b3ead4a96c9101bef08f9f7d1217c096f31667617b58de957f690c92378b528 \ + --hash=sha256:2068531575a125b87a41802130fa7e29f26c09a2833fea68d9a40cf33902eba6 \ + --hash=sha256:209596a4ec71d990d71d5e0d312ac935d86930e6eecff6ccc7007fe54d703808 \ + --hash=sha256:2c726c276d09fc5c414693a2de063f521052d9ea7c240ce553316f70656c84d4 \ + --hash=sha256:398e44cd480f4d2b7ee8d98385ca104e35c81525dd98c519acff1b79bdaac363 \ + --hash=sha256:52b1eb8c83f178ab787f3a4283f68258525f8d70f778a2f6dd54d3b5e5fb4341 \ + --hash=sha256:5feca99c17af94057417d744607b82dd0a664fd5e4ca98061480fd8b14b18d04 \ + --hash=sha256:7538e495704e2ccda9b234b82423a4038f324f3a10c43bc088a1636180f11a41 \ + --hash=sha256:760ad187b1041a154f0e4d0f6aae3e40fdb51d6de16e5c99aedadd9246450e9e \ + --hash=sha256:777a26c84bea6cd934422ac2e3b78863a37017618b6e5c08f92ef69853e765d3 \ + --hash=sha256:95431a26309a21874005845c21118c83991c63ea800dd44843e42a916aec5899 \ + --hash=sha256:9ad2c92ec681e02baf81fdfa056fe0d818645efa9af1f1cd5fd6f1bd2bdfd805 \ + --hash=sha256:9c6d1a54552b5330bc657b7ef0eae25d00ba7ffe85d9ea8ae6540d2197a3788c \ + --hash=sha256:aee0c1256be6c07bd3e1263ff920c325b59849dc95392a05f258bb9b259cf39c \ + --hash=sha256:af3d4a73793725138d6b334d9d247ce7e5f084d96284ed23f22ee626a7b88e39 \ + --hash=sha256:b36b4f3920103a25e1d5d024d155c504080959582b928e91cb608a65c3a49e1a \ + --hash=sha256:b9574c6f03f685070d859e75c7f9eeca02d6933273b5e69572e5ff9d5e3931c3 \ + --hash=sha256:bff6ad71c81b3bba8fa35f0f1921fb24ff4476235a6e94a26ada2e54370e6da7 \ + --hash=sha256:c190f0899e9f9f8b6b7863debfb739abcb21a5c054f911ca3596d12b8a4c4c7f \ + --hash=sha256:c907f561b1e83e93fad565bac5ba9c22d96a54e7ea0267c708bffe863cbe4075 \ + --hash=sha256:cae53c389825d3b46fb37538441f75d6aecc4174f615d048321b716df2757fb0 \ + --hash=sha256:dd4a21253f42b8d2b48410cb31fe501d32f8b9fbeb1f55063ad102fe9c425e40 \ + --hash=sha256:dde816ca9dac1d9c01dd504ea5967821606f02e510438120091b84e852367428 \ + --hash=sha256:f2362f3cb0f3172c42938946dbc5b7843c2a28aec307c49100c8b38764eb6927 \ + --hash=sha256:f328adcfebed9f11301eaedfa48e15bdece9b519fb27e6a8c01aa52a17ec31b3 \ + --hash=sha256:f8afcf15cc511ada719a88e013cec87c11aff7b91f019295eb4530f96fe5ef2f \ + --hash=sha256:fb1bbeac803adea29cedd70781399c99138358c26d05fcbd23c13016b7f5ec65 + # via + # black + # mypy +typing-extensions==3.10.0.2 \ + --hash=sha256:49f75d16ff11f1cd258e1b988ccff82a3ca5570217d7ad8c5f48205dd99a677e \ + --hash=sha256:d8226d10bc02a29bcc81df19a26e56a9647f8b0a6d4a83924139f4a8b01f17b7 \ + --hash=sha256:f1d25edafde516b146ecd0613dabcc61409817af4766fbbcfb8d1ad4ec441a34 + # via + # black + # importlib-metadata + # mypy +zipp==3.6.0 \ + --hash=sha256:71c644c5369f4a6e07636f0aa966270449561fcea2e3d6747b8d23efaa9d7832 \ + --hash=sha256:9fe5ea21568a0a70e50f273397638d39b03353731e6cbbb3fd8502a33fec40bc + # via importlib-metadata diff --git a/python/requirements.in b/python/requirements.in new file mode 100644 index 000000000..7e54705fc --- /dev/null +++ b/python/requirements.in @@ -0,0 +1,27 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +black +flake8 +isort +maturin +mypy +numpy +pandas +pyarrow +pytest +toml diff --git a/python/requirements.txt b/python/requirements.txt new file mode 100644 index 000000000..358578ecb --- /dev/null +++ b/python/requirements.txt @@ -0,0 +1,282 @@ +# +# This file is autogenerated by pip-compile with python 3.10 +# To update, run: +# +# pip-compile --generate-hashes +# +attrs==21.2.0 \ + --hash=sha256:149e90d6d8ac20db7a955ad60cf0e6881a3f20d37096140088356da6c716b0b1 \ + --hash=sha256:ef6aaac3ca6cd92904cdd0d83f629a15f18053ec84e6432106f7a4d04ae4f5fb + # via pytest +black==21.9b0 \ + --hash=sha256:380f1b5da05e5a1429225676655dddb96f5ae8c75bdf91e53d798871b902a115 \ + --hash=sha256:7de4cfc7eb6b710de325712d40125689101d21d25283eed7e9998722cf10eb91 + # via -r requirements.in +click==8.0.3 \ + --hash=sha256:353f466495adaeb40b6b5f592f9f91cb22372351c84caeb068132442a4518ef3 \ + --hash=sha256:410e932b050f5eed773c4cda94de75971c89cdb3155a72a0831139a79e5ecb5b + # via black +flake8==4.0.1 \ + --hash=sha256:479b1304f72536a55948cb40a32dce8bb0ffe3501e26eaf292c7e60eb5e0428d \ + --hash=sha256:806e034dda44114815e23c16ef92f95c91e4c71100ff52813adf7132a6ad870d + # via -r requirements.in +iniconfig==1.1.1 \ + --hash=sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3 \ + --hash=sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32 + # via pytest +isort==5.9.3 \ + --hash=sha256:9c2ea1e62d871267b78307fe511c0838ba0da28698c5732d54e2790bf3ba9899 \ + --hash=sha256:e17d6e2b81095c9db0a03a8025a957f334d6ea30b26f9ec70805411e5c7c81f2 + # via -r requirements.in +maturin==0.11.5 \ + --hash=sha256:07074778b063a439fdfd5501bd1d1823a216ec5b657d3ecde78fd7f2c4782422 \ + --hash=sha256:1ce666c386ff9c3c2b5d7d3ca4b1f9f675c38d7540ffbda0d5d5bc7d6ddde49a \ + --hash=sha256:20f9c30701c9932ed8026ceaf896fc77ecc76cebd6a182668dbc10ed597f8789 \ + --hash=sha256:3354d030b88c938a33bf407a6c0f79ccdd2cce3e1e3e4a2d0c92dc2e063adc6e \ + --hash=sha256:4191b0b7362b3025096faf126ff15cb682fbff324ac4a6ca18d55bb16e2b759b \ + --hash=sha256:70381be1585cb9fa5c02b83af80ae661aaad959e8aa0fddcfe195b004054bd69 \ + --hash=sha256:7bf96e7586bfdb5b0fadc6d662534b8a41123b33dff084fa383a81ded0ce5334 \ + --hash=sha256:ab2b3ccf66f5e0f9c3904d215835337b1bd305e79e3bf53b65bbc80a5755e01b \ + --hash=sha256:b0ac45879a7d624b47d72b093ae3370270894c19779f42aad7568a92951c5d47 \ + --hash=sha256:c2ded8b4ef9210d627bb966bc67661b7db259535f6062afe1ce5605406b50f3f \ + --hash=sha256:d78f24561a5e02f7d119b348b26e5772ad5698a43ca49e8facb9ce77cf273714 + # via -r requirements.in +mccabe==0.6.1 \ + --hash=sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42 \ + --hash=sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f + # via flake8 +mypy==0.910 \ + --hash=sha256:088cd9c7904b4ad80bec811053272986611b84221835e079be5bcad029e79dd9 \ + --hash=sha256:0aadfb2d3935988ec3815952e44058a3100499f5be5b28c34ac9d79f002a4a9a \ + --hash=sha256:119bed3832d961f3a880787bf621634ba042cb8dc850a7429f643508eeac97b9 \ + --hash=sha256:1a85e280d4d217150ce8cb1a6dddffd14e753a4e0c3cf90baabb32cefa41b59e \ + --hash=sha256:3c4b8ca36877fc75339253721f69603a9c7fdb5d4d5a95a1a1b899d8b86a4de2 \ + --hash=sha256:3e382b29f8e0ccf19a2df2b29a167591245df90c0b5a2542249873b5c1d78212 \ + --hash=sha256:42c266ced41b65ed40a282c575705325fa7991af370036d3f134518336636f5b \ + --hash=sha256:53fd2eb27a8ee2892614370896956af2ff61254c275aaee4c230ae771cadd885 \ + --hash=sha256:704098302473cb31a218f1775a873b376b30b4c18229421e9e9dc8916fd16150 \ + --hash=sha256:7df1ead20c81371ccd6091fa3e2878559b5c4d4caadaf1a484cf88d93ca06703 \ + --hash=sha256:866c41f28cee548475f146aa4d39a51cf3b6a84246969f3759cb3e9c742fc072 \ + --hash=sha256:a155d80ea6cee511a3694b108c4494a39f42de11ee4e61e72bc424c490e46457 \ + --hash=sha256:adaeee09bfde366d2c13fe6093a7df5df83c9a2ba98638c7d76b010694db760e \ + --hash=sha256:b6fb13123aeef4a3abbcfd7e71773ff3ff1526a7d3dc538f3929a49b42be03f0 \ + --hash=sha256:b94e4b785e304a04ea0828759172a15add27088520dc7e49ceade7834275bedb \ + --hash=sha256:c0df2d30ed496a08de5daed2a9ea807d07c21ae0ab23acf541ab88c24b26ab97 \ + --hash=sha256:c6c2602dffb74867498f86e6129fd52a2770c48b7cd3ece77ada4fa38f94eba8 \ + --hash=sha256:ceb6e0a6e27fb364fb3853389607cf7eb3a126ad335790fa1e14ed02fba50811 \ + --hash=sha256:d9dd839eb0dc1bbe866a288ba3c1afc33a202015d2ad83b31e875b5905a079b6 \ + --hash=sha256:e4dab234478e3bd3ce83bac4193b2ecd9cf94e720ddd95ce69840273bf44f6de \ + --hash=sha256:ec4e0cd079db280b6bdabdc807047ff3e199f334050db5cbb91ba3e959a67504 \ + --hash=sha256:ecd2c3fe726758037234c93df7e98deb257fd15c24c9180dacf1ef829da5f921 \ + --hash=sha256:ef565033fa5a958e62796867b1df10c40263ea9ded87164d67572834e57a174d + # via -r requirements.in +mypy-extensions==0.4.3 \ + --hash=sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d \ + --hash=sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8 + # via + # black + # mypy +numpy==1.21.3 \ + --hash=sha256:043e83bfc274649c82a6f09836943e4a4aebe5e33656271c7dbf9621dd58b8ec \ + --hash=sha256:160ccc1bed3a8371bf0d760971f09bfe80a3e18646620e9ded0ad159d9749baa \ + --hash=sha256:188031f833bbb623637e66006cf75e933e00e7231f67e2b45cf8189612bb5dc3 \ + --hash=sha256:28f15209fb535dd4c504a7762d3bc440779b0e37d50ed810ced209e5cea60d96 \ + --hash=sha256:29fb3dcd0468b7715f8ce2c0c2d9bbbaf5ae686334951343a41bd8d155c6ea27 \ + --hash=sha256:2a6ee9620061b2a722749b391c0d80a0e2ae97290f1b32e28d5a362e21941ee4 \ + --hash=sha256:300321e3985c968e3ae7fbda187237b225f3ffe6528395a5b7a5407f73cf093e \ + --hash=sha256:32437f0b275c1d09d9c3add782516413e98cd7c09e6baf4715cbce781fc29912 \ + --hash=sha256:3c09418a14471c7ae69ba682e2428cae5b4420a766659605566c0fa6987f6b7e \ + --hash=sha256:49c6249260890e05b8111ebfc391ed58b3cb4b33e63197b2ec7f776e45330721 \ + --hash=sha256:4cc9b512e9fb590797474f58b7f6d1f1b654b3a94f4fa8558b48ca8b3cfc97cf \ + --hash=sha256:508b0b513fa1266875524ba8a9ecc27b02ad771fe1704a16314dc1a816a68737 \ + --hash=sha256:50cd26b0cf6664cb3b3dd161ba0a09c9c1343db064e7c69f9f8b551f5104d654 \ + --hash=sha256:5c4193f70f8069550a1788bd0cd3268ab7d3a2b70583dfe3b2e7f421e9aace06 \ + --hash=sha256:5dfe9d6a4c39b8b6edd7990091fea4f852888e41919d0e6722fe78dd421db0eb \ + --hash=sha256:63571bb7897a584ca3249c86dd01c10bcb5fe4296e3568b2e9c1a55356b6410e \ + --hash=sha256:75621882d2230ab77fb6a03d4cbccd2038511491076e7964ef87306623aa5272 \ + --hash=sha256:75eb7cadc8da49302f5b659d40ba4f6d94d5045fbd9569c9d058e77b0514c9e4 \ + --hash=sha256:88a5d6b268e9ad18f3533e184744acdaa2e913b13148160b1152300c949bbb5f \ + --hash=sha256:8a10968963640e75cc0193e1847616ab4c718e83b6938ae74dea44953950f6b7 \ + --hash=sha256:90bec6a86b348b4559b6482e2b684db4a9a7eed1fa054b86115a48d58fbbf62a \ + --hash=sha256:98339aa9911853f131de11010f6dd94c8cec254d3d1f7261528c3b3e3219f139 \ + --hash=sha256:a99a6b067e5190ac6d12005a4d85aa6227c5606fa93211f86b1dafb16233e57d \ + --hash=sha256:bffa2eee3b87376cc6b31eee36d05349571c236d1de1175b804b348dc0941e3f \ + --hash=sha256:c6c2d535a7beb1f8790aaa98fd089ceab2e3dd7ca48aca0af7dc60e6ef93ffe1 \ + --hash=sha256:cc14e7519fab2a4ed87d31f99c31a3796e4e1fe63a86ebdd1c5a1ea78ebd5896 \ + --hash=sha256:dd0482f3fc547f1b1b5d6a8b8e08f63fdc250c58ce688dedd8851e6e26cff0f3 \ + --hash=sha256:dde972a1e11bb7b702ed0e447953e7617723760f420decb97305e66fb4afc54f \ + --hash=sha256:e54af82d68ef8255535a6cdb353f55d6b8cf418a83e2be3569243787a4f4866f \ + --hash=sha256:e606e6316911471c8d9b4618e082635cfe98876007556e89ce03d52ff5e8fcf0 \ + --hash=sha256:f41b018f126aac18583956c54544db437f25c7ee4794bcb23eb38bef8e5e192a \ + --hash=sha256:f8f4625536926a155b80ad2bbff44f8cc59e9f2ad14cdda7acf4c135b4dc8ff2 \ + --hash=sha256:fe52dbe47d9deb69b05084abd4b0df7abb39a3c51957c09f635520abd49b29dd + # via + # -r requirements.in + # pandas + # pyarrow +packaging==21.0 \ + --hash=sha256:7dc96269f53a4ccec5c0670940a4281106dd0bb343f47b7471f779df49c2fbe7 \ + --hash=sha256:c86254f9220d55e31cc94d69bade760f0847da8000def4dfe1c6b872fd14ff14 + # via pytest +pandas==1.3.4 \ + --hash=sha256:003ba92db58b71a5f8add604a17a059f3068ef4e8c0c365b088468d0d64935fd \ + --hash=sha256:10e10a2527db79af6e830c3d5842a4d60383b162885270f8cffc15abca4ba4a9 \ + --hash=sha256:22808afb8f96e2269dcc5b846decacb2f526dd0b47baebc63d913bf847317c8f \ + --hash=sha256:2d1dc09c0013d8faa7474574d61b575f9af6257ab95c93dcf33a14fd8d2c1bab \ + --hash=sha256:35c77609acd2e4d517da41bae0c11c70d31c87aae8dd1aabd2670906c6d2c143 \ + --hash=sha256:372d72a3d8a5f2dbaf566a5fa5fa7f230842ac80f29a931fb4b071502cf86b9a \ + --hash=sha256:42493f8ae67918bf129869abea8204df899902287a7f5eaf596c8e54e0ac7ff4 \ + --hash=sha256:5298a733e5bfbb761181fd4672c36d0c627320eb999c59c65156c6a90c7e1b4f \ + --hash=sha256:5ba0aac1397e1d7b654fccf263a4798a9e84ef749866060d19e577e927d66e1b \ + --hash=sha256:a2aa18d3f0b7d538e21932f637fbfe8518d085238b429e4790a35e1e44a96ffc \ + --hash=sha256:a388960f979665b447f0847626e40f99af8cf191bce9dc571d716433130cb3a7 \ + --hash=sha256:a51528192755f7429c5bcc9e80832c517340317c861318fea9cea081b57c9afd \ + --hash=sha256:b528e126c13816a4374e56b7b18bfe91f7a7f6576d1aadba5dee6a87a7f479ae \ + --hash=sha256:c1aa4de4919358c5ef119f6377bc5964b3a7023c23e845d9db7d9016fa0c5b1c \ + --hash=sha256:c2646458e1dce44df9f71a01dc65f7e8fa4307f29e5c0f2f92c97f47a5bf22f5 \ + --hash=sha256:d47750cf07dee6b55d8423471be70d627314277976ff2edd1381f02d52dbadf9 \ + --hash=sha256:d99d2350adb7b6c3f7f8f0e5dfb7d34ff8dd4bc0a53e62c445b7e43e163fce63 \ + --hash=sha256:dd324f8ee05925ee85de0ea3f0d66e1362e8c80799eb4eb04927d32335a3e44a \ + --hash=sha256:eaca36a80acaacb8183930e2e5ad7f71539a66805d6204ea88736570b2876a7b \ + --hash=sha256:f567e972dce3bbc3a8076e0b675273b4a9e8576ac629149cf8286ee13c259ae5 \ + --hash=sha256:fe48e4925455c964db914b958f6e7032d285848b7538a5e1b19aeb26ffaea3ec + # via -r requirements.in +pathspec==0.9.0 \ + --hash=sha256:7d15c4ddb0b5c802d161efc417ec1a2558ea2653c2e8ad9c19098201dc1c993a \ + --hash=sha256:e564499435a2673d586f6b2130bb5b95f04a3ba06f81b8f895b651a3c76aabb1 + # via black +platformdirs==2.4.0 \ + --hash=sha256:367a5e80b3d04d2428ffa76d33f124cf11e8fff2acdaa9b43d545f5c7d661ef2 \ + --hash=sha256:8868bbe3c3c80d42f20156f22e7131d2fb321f5bc86a2a345375c6481a67021d + # via black +pluggy==1.0.0 \ + --hash=sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159 \ + --hash=sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3 + # via pytest +py==1.10.0 \ + --hash=sha256:21b81bda15b66ef5e1a777a21c4dcd9c20ad3efd0b3f817e7a809035269e1bd3 \ + --hash=sha256:3b80836aa6d1feeaa108e046da6423ab8f6ceda6468545ae8d02d9d58d18818a + # via pytest +pyarrow==6.0.0 \ + --hash=sha256:004185e0babc6f3c3fba6ba4f106e406a0113d0f82bb9ad9a8571a1978c45d04 \ + --hash=sha256:0204e80777ab8f4e9abd3a765a8ec07ed1e3c4630bacda50d2ce212ef0f3826f \ + --hash=sha256:072c1a0fca4509eefd7d018b78542fb7e5c63aaf5698f1c0a6e45628ae17ba44 \ + --hash=sha256:15dc0d673d3f865ca63c877bd7a2eced70b0a08969fb733a28247134b8a1f18b \ + --hash=sha256:1c38263ea438a1666b13372e7565450cfeec32dbcd1c2595749476a58465eaec \ + --hash=sha256:281ce5fa03621d786a9beb514abb09846db7f0221b50eabf543caa24037eaacd \ + --hash=sha256:2d2c681659396c745e4f1988d5dd41dcc3ad557bb8d4a8c2e44030edafc08a91 \ + --hash=sha256:376c4b5f248ae63df21fe15c194e9013753164be2d38f4b3fb8bde63ac5a1958 \ + --hash=sha256:465f87fa0be0b2928b2beeba22b5813a0203fb05d90fd8563eea48e08ecc030e \ + --hash=sha256:477c746ef42c039348a288584800e299456c80c5691401bb9b19aa9c02a427b7 \ + --hash=sha256:5144bd9db2920c7cb566c96462d62443cc239104f94771d110f74393f2fb42a2 \ + --hash=sha256:5408fa8d623e66a0445f3fb0e4027fd219bf99bfb57422d543d7b7876e2c5b55 \ + --hash=sha256:5be62679201c441356d3f2a739895dcc8d4d299f2a6eabcd2163bfb6a898abba \ + --hash=sha256:5c666bc6a1cebf01206e2dc1ab05f25f39f35d3a499e0ef5cd635225e07306ca \ + --hash=sha256:6163d82cca7541774b00503c295fe86a1722820eddb958b57f091bb6f5b0a6db \ + --hash=sha256:6a1d9a2f4ee812ed0bd4182cabef99ea914ac297274f0de086f2488093d284ef \ + --hash=sha256:7a683f71b848eb6310b4ec48c0def55dac839e9994c1ac874c9b2d3d5625def1 \ + --hash=sha256:82fe80309e01acf29e3943a1f6d3c98ec109fe1d356bc1ac37d639bcaadcf684 \ + --hash=sha256:8c23f8cdecd3d9e49f9b0f9a651ae5549d1d32fd4901fb1bdc2d327edfba844f \ + --hash=sha256:8d41dfb09ba9236cca6245f33088eb42f3c54023da281139241e0f9f3b4b754e \ + --hash=sha256:a19e58dfb04e451cd8b7bdec3ac8848373b95dfc53492c9a69789aa9074a3c1b \ + --hash=sha256:a50d2f77b86af38ceabf45617208b9105d20e7a5eebc584e7c8c0acededd82ce \ + --hash=sha256:a5bed4f948c032c40597302e9bdfa65f62295240306976ecbe43a54924c6f94f \ + --hash=sha256:ac941a147d14993987cc8b605b721735a34b3e54d167302501fb4db1ad7382c7 \ + --hash=sha256:b86d175262db1eb46afdceb36d459409eb6f8e532d3dec162f8bf572c7f57623 \ + --hash=sha256:bf3400780c4d3c9cb43b1e8a1aaf2e1b7199a0572d0a645529d2784e4d0d8497 \ + --hash=sha256:c7a6e7e0bf8779e9c3428ced85507541f3da9a0675e2f4781d4eb2c7042cbf81 \ + --hash=sha256:cc1d4a70efd583befe92d4ea6f74ed2e0aa31ccdde767cd5cae8e77c65a1c2d4 \ + --hash=sha256:d046dc78a9337baa6415be915c5a16222505233e238a1017f368243c89817eea \ + --hash=sha256:da7860688c33ca88ac05f1a487d32d96d9caa091412496c35f3d1d832145675a \ + --hash=sha256:ddf2e6e3b321adaaf716f2d5af8e92d205a9671e0cb7c0779710a567fd1dd580 \ + --hash=sha256:e81508239a71943759cee272ce625ae208092dd36ef2c6713fccee30bbcf52bb \ + --hash=sha256:ea64a48a85c631eb2a0ea13ccdec5143c85b5897836b16331ee4289d27a57247 \ + --hash=sha256:ed0be080cf595ea15ff1c9ff4097bbf1fcc4b50847d98c0a3c0412fbc6ede7e9 \ + --hash=sha256:fb701ec4a94b92102606d4e88f0b8eba34f09a5ad8e014eaa4af76f42b7f62ae \ + --hash=sha256:fbda7595f24a639bcef3419ecfac17216efacb09f7b0f1b4c4c97f900d65ca0e + # via -r requirements.in +pycodestyle==2.8.0 \ + --hash=sha256:720f8b39dde8b293825e7ff02c475f3077124006db4f440dcbc9a20b76548a20 \ + --hash=sha256:eddd5847ef438ea1c7870ca7eb78a9d47ce0cdb4851a5523949f2601d0cbbe7f + # via flake8 +pyflakes==2.4.0 \ + --hash=sha256:05a85c2872edf37a4ed30b0cce2f6093e1d0581f8c19d7393122da7e25b2b24c \ + --hash=sha256:3bb3a3f256f4b7968c9c788781e4ff07dce46bdf12339dcda61053375426ee2e + # via flake8 +pyparsing==3.0.3 \ + --hash=sha256:9e3511118010f112a4b4b435ae50e1eaa610cda191acb9e421d60cf5fde83455 \ + --hash=sha256:f8d3fe9fc404576c5164f0f0c4e382c96b85265e023c409c43d48f65da9d60d0 + # via packaging +pytest==6.2.5 \ + --hash=sha256:131b36680866a76e6781d13f101efb86cf674ebb9762eb70d3082b6f29889e89 \ + --hash=sha256:7310f8d27bc79ced999e760ca304d69f6ba6c6649c0b60fb0e04a4a77cacc134 + # via -r requirements.in +python-dateutil==2.8.2 \ + --hash=sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86 \ + --hash=sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9 + # via pandas +pytz==2021.3 \ + --hash=sha256:3672058bc3453457b622aab7a1c3bfd5ab0bdae451512f6cf25f64ed37f5b87c \ + --hash=sha256:acad2d8b20a1af07d4e4c9d2e9285c5ed9104354062f275f3fcd88dcef4f1326 + # via pandas +regex==2021.10.23 \ + --hash=sha256:0c186691a7995ef1db61205e00545bf161fb7b59cdb8c1201c89b333141c438a \ + --hash=sha256:0dcc0e71118be8c69252c207630faf13ca5e1b8583d57012aae191e7d6d28b84 \ + --hash=sha256:0f7552429dd39f70057ac5d0e897e5bfe211629652399a21671e53f2a9693a4e \ + --hash=sha256:129472cd06062fb13e7b4670a102951a3e655e9b91634432cfbdb7810af9d710 \ + --hash=sha256:13ec99df95003f56edcd307db44f06fbeb708c4ccdcf940478067dd62353181e \ + --hash=sha256:1f2b59c28afc53973d22e7bc18428721ee8ca6079becf1b36571c42627321c65 \ + --hash=sha256:2b20f544cbbeffe171911f6ce90388ad36fe3fad26b7c7a35d4762817e9ea69c \ + --hash=sha256:2fb698037c35109d3c2e30f2beb499e5ebae6e4bb8ff2e60c50b9a805a716f79 \ + --hash=sha256:34d870f9f27f2161709054d73646fc9aca49480617a65533fc2b4611c518e455 \ + --hash=sha256:391703a2abf8013d95bae39145d26b4e21531ab82e22f26cd3a181ee2644c234 \ + --hash=sha256:450dc27483548214314640c89a0f275dbc557968ed088da40bde7ef8fb52829e \ + --hash=sha256:45b65d6a275a478ac2cbd7fdbf7cc93c1982d613de4574b56fd6972ceadb8395 \ + --hash=sha256:5095a411c8479e715784a0c9236568ae72509450ee2226b649083730f3fadfc6 \ + --hash=sha256:530fc2bbb3dc1ebb17f70f7b234f90a1dd43b1b489ea38cea7be95fb21cdb5c7 \ + --hash=sha256:56f0c81c44638dfd0e2367df1a331b4ddf2e771366c4b9c5d9a473de75e3e1c7 \ + --hash=sha256:5e9c9e0ce92f27cef79e28e877c6b6988c48b16942258f3bc55d39b5f911df4f \ + --hash=sha256:6d7722136c6ed75caf84e1788df36397efdc5dbadab95e59c2bba82d4d808a4c \ + --hash=sha256:74d071dbe4b53c602edd87a7476ab23015a991374ddb228d941929ad7c8c922e \ + --hash=sha256:7b568809dca44cb75c8ebb260844ea98252c8c88396f9d203f5094e50a70355f \ + --hash=sha256:80bb5d2e92b2258188e7dcae5b188c7bf868eafdf800ea6edd0fbfc029984a88 \ + --hash=sha256:8d1cdcda6bd16268316d5db1038965acf948f2a6f43acc2e0b1641ceab443623 \ + --hash=sha256:9f665677e46c5a4d288ece12fdedf4f4204a422bb28ff05f0e6b08b7447796d1 \ + --hash=sha256:a30513828180264294953cecd942202dfda64e85195ae36c265daf4052af0464 \ + --hash=sha256:a7a986c45d1099a5de766a15de7bee3840b1e0e1a344430926af08e5297cf666 \ + --hash=sha256:a940ca7e7189d23da2bfbb38973832813eab6bd83f3bf89a977668c2f813deae \ + --hash=sha256:ab7c5684ff3538b67df3f93d66bd3369b749087871ae3786e70ef39e601345b0 \ + --hash=sha256:be04739a27be55631069b348dda0c81d8ea9822b5da10b8019b789e42d1fe452 \ + --hash=sha256:c0938ddd60cc04e8f1faf7a14a166ac939aac703745bfcd8e8f20322a7373019 \ + --hash=sha256:cb46b542133999580ffb691baf67410306833ee1e4f58ed06b6a7aaf4e046952 \ + --hash=sha256:d134757a37d8640f3c0abb41f5e68b7cf66c644f54ef1cb0573b7ea1c63e1509 \ + --hash=sha256:de557502c3bec8e634246588a94e82f1ee1b9dfcfdc453267c4fb652ff531570 \ + --hash=sha256:ded0c4a3eee56b57fcb2315e40812b173cafe79d2f992d50015f4387445737fa \ + --hash=sha256:e1dae12321b31059a1a72aaa0e6ba30156fe7e633355e445451e4021b8e122b6 \ + --hash=sha256:eb672217f7bd640411cfc69756ce721d00ae600814708d35c930930f18e8029f \ + --hash=sha256:ee684f139c91e69fe09b8e83d18b4d63bf87d9440c1eb2eeb52ee851883b1b29 \ + --hash=sha256:f3f9a91d3cc5e5b0ddf1043c0ae5fa4852f18a1c0050318baf5fc7930ecc1f9c + # via black +six==1.16.0 \ + --hash=sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926 \ + --hash=sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254 + # via python-dateutil +toml==0.10.2 \ + --hash=sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b \ + --hash=sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f + # via + # -r requirements.in + # maturin + # mypy + # pytest +tomli==1.2.2 \ + --hash=sha256:c6ce0015eb38820eaf32b5db832dbc26deb3dd427bd5f6556cf0acac2c214fee \ + --hash=sha256:f04066f68f5554911363063a30b108d2b5a5b1a010aa8b6132af78489fe3aade + # via black +typing-extensions==3.10.0.2 \ + --hash=sha256:49f75d16ff11f1cd258e1b988ccff82a3ca5570217d7ad8c5f48205dd99a677e \ + --hash=sha256:d8226d10bc02a29bcc81df19a26e56a9647f8b0a6d4a83924139f4a8b01f17b7 \ + --hash=sha256:f1d25edafde516b146ecd0613dabcc61409817af4766fbbcfb8d1ad4ec441a34 + # via + # black + # mypy diff --git a/python/rust-toolchain b/python/rust-toolchain new file mode 100644 index 000000000..12b27c03a --- /dev/null +++ b/python/rust-toolchain @@ -0,0 +1 @@ +nightly-2021-10-23 diff --git a/python/src/catalog.rs b/python/src/catalog.rs new file mode 100644 index 000000000..f93c795ec --- /dev/null +++ b/python/src/catalog.rs @@ -0,0 +1,123 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::collections::HashSet; +use std::sync::Arc; + +use pyo3::exceptions::PyKeyError; +use pyo3::prelude::*; + +use datafusion::{ + arrow::pyarrow::PyArrowConvert, + catalog::{catalog::CatalogProvider, schema::SchemaProvider}, + datasource::{TableProvider, TableType}, +}; + +#[pyclass(name = "Catalog", module = "datafusion", subclass)] +pub(crate) struct PyCatalog { + catalog: Arc, +} + +#[pyclass(name = "Database", module = "datafusion", subclass)] +pub(crate) struct PyDatabase { + database: Arc, +} + +#[pyclass(name = "Table", module = "datafusion", subclass)] +pub(crate) struct PyTable { + table: Arc, +} + +impl PyCatalog { + pub fn new(catalog: Arc) -> Self { + Self { catalog } + } +} + +impl PyDatabase { + pub fn new(database: Arc) -> Self { + Self { database } + } +} + +impl PyTable { + pub fn new(table: Arc) -> Self { + Self { table } + } +} + +#[pymethods] +impl PyCatalog { + fn names(&self) -> Vec { + self.catalog.schema_names() + } + + #[args(name = "\"public\"")] + fn database(&self, name: &str) -> PyResult { + match self.catalog.schema(name) { + Some(database) => Ok(PyDatabase::new(database)), + None => Err(PyKeyError::new_err(format!( + "Database with name {} doesn't exist.", + name + ))), + } + } +} + +#[pymethods] +impl PyDatabase { + fn names(&self) -> HashSet { + self.database.table_names().into_iter().collect() + } + + fn table(&self, name: &str) -> PyResult { + match self.database.table(name) { + Some(table) => Ok(PyTable::new(table)), + None => Err(PyKeyError::new_err(format!( + "Table with name {} doesn't exist.", + name + ))), + } + } + + // register_table + // deregister_table +} + +#[pymethods] +impl PyTable { + /// Get a reference to the schema for this table + #[getter] + fn schema(&self, py: Python) -> PyResult { + self.table.schema().to_pyarrow(py) + } + + /// Get the type of this table for metadata/catalog purposes. + #[getter] + fn kind(&self) -> &str { + match self.table.table_type() { + TableType::Base => "physical", + TableType::View => "view", + TableType::Temporary => "temporary", + } + } + + // fn scan + // fn statistics + // fn has_exact_statistics + // fn supports_filter_pushdown +} diff --git a/python/src/context.rs b/python/src/context.rs new file mode 100644 index 000000000..7f386bac3 --- /dev/null +++ b/python/src/context.rs @@ -0,0 +1,173 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::path::PathBuf; +use std::{collections::HashSet, sync::Arc}; + +use uuid::Uuid; + +use pyo3::exceptions::{PyKeyError, PyValueError}; +use pyo3::prelude::*; + +use datafusion::arrow::datatypes::Schema; +use datafusion::arrow::record_batch::RecordBatch; +use datafusion::datasource::MemTable; +use datafusion::execution::context::ExecutionContext; +use datafusion::prelude::CsvReadOptions; + +use crate::catalog::PyCatalog; +use crate::dataframe::PyDataFrame; +use crate::errors::DataFusionError; +use crate::udf::PyScalarUDF; +use crate::utils::wait_for_future; + +/// `PyExecutionContext` is able to plan and execute DataFusion plans. +/// It has a powerful optimizer, a physical planner for local execution, and a +/// multi-threaded execution engine to perform the execution. +#[pyclass(name = "ExecutionContext", module = "datafusion", subclass, unsendable)] +pub(crate) struct PyExecutionContext { + ctx: ExecutionContext, +} + +#[pymethods] +impl PyExecutionContext { + // TODO(kszucs): should expose the configuration options as keyword arguments + #[new] + fn new() -> Self { + PyExecutionContext { + ctx: ExecutionContext::new(), + } + } + + /// Returns a PyDataFrame whose plan corresponds to the SQL statement. + fn sql(&mut self, query: &str, py: Python) -> PyResult { + let result = self.ctx.sql(query); + let df = wait_for_future(py, result).map_err(DataFusionError::from)?; + Ok(PyDataFrame::new(df)) + } + + fn create_dataframe( + &mut self, + partitions: Vec>, + ) -> PyResult { + let table = MemTable::try_new(partitions[0][0].schema(), partitions) + .map_err(DataFusionError::from)?; + + // generate a random (unique) name for this table + // table name cannot start with numeric digit + let name = "c".to_owned() + + &Uuid::new_v4() + .to_simple() + .encode_lower(&mut Uuid::encode_buffer()); + + self.ctx + .register_table(&*name, Arc::new(table)) + .map_err(DataFusionError::from)?; + let table = self.ctx.table(&*name).map_err(DataFusionError::from)?; + + let df = PyDataFrame::new(table); + Ok(df) + } + + fn register_record_batches( + &mut self, + name: &str, + partitions: Vec>, + ) -> PyResult<()> { + let schema = partitions[0][0].schema(); + let table = MemTable::try_new(schema, partitions)?; + self.ctx + .register_table(name, Arc::new(table)) + .map_err(DataFusionError::from)?; + Ok(()) + } + + fn register_parquet(&mut self, name: &str, path: &str, py: Python) -> PyResult<()> { + let result = self.ctx.register_parquet(name, path); + wait_for_future(py, result).map_err(DataFusionError::from)?; + Ok(()) + } + + #[args( + schema = "None", + has_header = "true", + delimiter = "\",\"", + schema_infer_max_records = "1000", + file_extension = "\".csv\"" + )] + fn register_csv( + &mut self, + name: &str, + path: PathBuf, + schema: Option, + has_header: bool, + delimiter: &str, + schema_infer_max_records: usize, + file_extension: &str, + py: Python, + ) -> PyResult<()> { + let path = path + .to_str() + .ok_or(PyValueError::new_err("Unable to convert path to a string"))?; + let delimiter = delimiter.as_bytes(); + if delimiter.len() != 1 { + return Err(PyValueError::new_err( + "Delimiter must be a single character", + )); + } + + let mut options = CsvReadOptions::new() + .has_header(has_header) + .delimiter(delimiter[0]) + .schema_infer_max_records(schema_infer_max_records) + .file_extension(file_extension); + options.schema = schema.as_ref(); + + let result = self.ctx.register_csv(name, path, options); + wait_for_future(py, result).map_err(DataFusionError::from)?; + + Ok(()) + } + + fn register_udf(&mut self, udf: PyScalarUDF) -> PyResult<()> { + self.ctx.register_udf(udf.function); + Ok(()) + } + + #[args(name = "\"datafusion\"")] + fn catalog(&self, name: &str) -> PyResult { + match self.ctx.catalog(name) { + Some(catalog) => Ok(PyCatalog::new(catalog)), + None => Err(PyKeyError::new_err(format!( + "Catalog with name {} doesn't exist.", + &name + ))), + } + } + + fn tables(&self) -> HashSet { + self.ctx.tables().unwrap() + } + + fn table(&self, name: &str) -> PyResult { + Ok(PyDataFrame::new(self.ctx.table(name)?)) + } + + fn empty_table(&self) -> PyResult { + Ok(PyDataFrame::new(self.ctx.read_empty()?)) + } +} diff --git a/python/src/dataframe.rs b/python/src/dataframe.rs new file mode 100644 index 000000000..9050df92e --- /dev/null +++ b/python/src/dataframe.rs @@ -0,0 +1,130 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use pyo3::prelude::*; + +use datafusion::arrow::datatypes::Schema; +use datafusion::arrow::pyarrow::PyArrowConvert; +use datafusion::arrow::util::pretty; +use datafusion::dataframe::DataFrame; +use datafusion::logical_plan::JoinType; + +use crate::utils::wait_for_future; +use crate::{errors::DataFusionError, expression::PyExpr}; + +/// A PyDataFrame is a representation of a logical plan and an API to compose statements. +/// Use it to build a plan and `.collect()` to execute the plan and collect the result. +/// The actual execution of a plan runs natively on Rust and Arrow on a multi-threaded environment. +#[pyclass(name = "DataFrame", module = "datafusion", subclass)] +#[derive(Clone)] +pub(crate) struct PyDataFrame { + df: Arc, +} + +impl PyDataFrame { + /// creates a new PyDataFrame + pub fn new(df: Arc) -> Self { + Self { df } + } +} + +#[pymethods] +impl PyDataFrame { + /// Returns the schema from the logical plan + fn schema(&self) -> Schema { + self.df.schema().into() + } + + #[args(args = "*")] + fn select(&self, args: Vec) -> PyResult { + let expr = args.into_iter().map(|e| e.into()).collect(); + let df = self.df.select(expr)?; + Ok(Self::new(df)) + } + + fn filter(&self, predicate: PyExpr) -> PyResult { + let df = self.df.filter(predicate.into())?; + Ok(Self::new(df)) + } + + fn aggregate(&self, group_by: Vec, aggs: Vec) -> PyResult { + let group_by = group_by.into_iter().map(|e| e.into()).collect(); + let aggs = aggs.into_iter().map(|e| e.into()).collect(); + let df = self.df.aggregate(group_by, aggs)?; + Ok(Self::new(df)) + } + + #[args(exprs = "*")] + fn sort(&self, exprs: Vec) -> PyResult { + let exprs = exprs.into_iter().map(|e| e.into()).collect(); + let df = self.df.sort(exprs)?; + Ok(Self::new(df)) + } + + fn limit(&self, count: usize) -> PyResult { + let df = self.df.limit(count)?; + Ok(Self::new(df)) + } + + /// Executes the plan, returning a list of `RecordBatch`es. + /// Unless some order is specified in the plan, there is no + /// guarantee of the order of the result. + fn collect(&self, py: Python) -> PyResult> { + let batches = wait_for_future(py, self.df.collect())?; + // cannot use PyResult> return type due to + // https://github.com/PyO3/pyo3/issues/1813 + batches.into_iter().map(|rb| rb.to_pyarrow(py)).collect() + } + + /// Print the result, 20 lines by default + #[args(num = "20")] + fn show(&self, py: Python, num: usize) -> PyResult<()> { + let df = self.df.limit(num)?; + let batches = wait_for_future(py, df.collect())?; + Ok(pretty::print_batches(&batches)?) + } + + fn join( + &self, + right: PyDataFrame, + join_keys: (Vec<&str>, Vec<&str>), + how: &str, + ) -> PyResult { + let join_type = match how { + "inner" => JoinType::Inner, + "left" => JoinType::Left, + "right" => JoinType::Right, + "full" => JoinType::Full, + "semi" => JoinType::Semi, + "anti" => JoinType::Anti, + how => { + return Err(DataFusionError::Common(format!( + "The join type {} does not exist or is not implemented", + how + )) + .into()) + } + }; + + let df = self + .df + .join(right.df, join_type, &join_keys.0, &join_keys.1)?; + Ok(Self::new(df)) + } +} diff --git a/python/src/errors.rs b/python/src/errors.rs new file mode 100644 index 000000000..655ed8441 --- /dev/null +++ b/python/src/errors.rs @@ -0,0 +1,57 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use core::fmt; + +use datafusion::arrow::error::ArrowError; +use datafusion::error::DataFusionError as InnerDataFusionError; +use pyo3::{exceptions::PyException, PyErr}; + +#[derive(Debug)] +pub enum DataFusionError { + ExecutionError(InnerDataFusionError), + ArrowError(ArrowError), + Common(String), +} + +impl fmt::Display for DataFusionError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + DataFusionError::ExecutionError(e) => write!(f, "DataFusion error: {:?}", e), + DataFusionError::ArrowError(e) => write!(f, "Arrow error: {:?}", e), + DataFusionError::Common(e) => write!(f, "{}", e), + } + } +} + +impl From for DataFusionError { + fn from(err: ArrowError) -> DataFusionError { + DataFusionError::ArrowError(err) + } +} + +impl From for DataFusionError { + fn from(err: InnerDataFusionError) -> DataFusionError { + DataFusionError::ExecutionError(err) + } +} + +impl From for PyErr { + fn from(err: DataFusionError) -> PyErr { + PyException::new_err(err.to_string()) + } +} diff --git a/python/src/expression.rs b/python/src/expression.rs new file mode 100644 index 000000000..d646d6b58 --- /dev/null +++ b/python/src/expression.rs @@ -0,0 +1,147 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use pyo3::PyMappingProtocol; +use pyo3::{basic::CompareOp, prelude::*, PyNumberProtocol, PyObjectProtocol}; +use std::convert::{From, Into}; + +use datafusion::arrow::datatypes::DataType; +use datafusion::logical_plan::{col, lit, Expr}; + +use datafusion::scalar::ScalarValue; + +/// An PyExpr that can be used on a DataFrame +#[pyclass(name = "Expression", module = "datafusion", subclass)] +#[derive(Debug, Clone)] +pub(crate) struct PyExpr { + pub(crate) expr: Expr, +} + +impl From for Expr { + fn from(expr: PyExpr) -> Expr { + expr.expr + } +} + +impl Into for Expr { + fn into(self) -> PyExpr { + PyExpr { expr: self } + } +} + +#[pyproto] +impl PyNumberProtocol for PyExpr { + fn __add__(lhs: PyExpr, rhs: PyExpr) -> PyResult { + Ok((lhs.expr + rhs.expr).into()) + } + + fn __sub__(lhs: PyExpr, rhs: PyExpr) -> PyResult { + Ok((lhs.expr - rhs.expr).into()) + } + + fn __truediv__(lhs: PyExpr, rhs: PyExpr) -> PyResult { + Ok((lhs.expr / rhs.expr).into()) + } + + fn __mul__(lhs: PyExpr, rhs: PyExpr) -> PyResult { + Ok((lhs.expr * rhs.expr).into()) + } + + fn __mod__(lhs: PyExpr, rhs: PyExpr) -> PyResult { + Ok(lhs.expr.clone().modulus(rhs.expr).into()) + } + + fn __and__(lhs: PyExpr, rhs: PyExpr) -> PyResult { + Ok(lhs.expr.clone().and(rhs.expr).into()) + } + + fn __or__(lhs: PyExpr, rhs: PyExpr) -> PyResult { + Ok(lhs.expr.clone().or(rhs.expr).into()) + } + + fn __invert__(&self) -> PyResult { + Ok(self.expr.clone().not().into()) + } +} + +#[pyproto] +impl PyObjectProtocol for PyExpr { + fn __richcmp__(&self, other: PyExpr, op: CompareOp) -> PyExpr { + let expr = match op { + CompareOp::Lt => self.expr.clone().lt(other.expr), + CompareOp::Le => self.expr.clone().lt_eq(other.expr), + CompareOp::Eq => self.expr.clone().eq(other.expr), + CompareOp::Ne => self.expr.clone().not_eq(other.expr), + CompareOp::Gt => self.expr.clone().gt(other.expr), + CompareOp::Ge => self.expr.clone().gt_eq(other.expr), + }; + expr.into() + } + + fn __str__(&self) -> PyResult { + Ok(format!("{}", self.expr)) + } +} + +#[pymethods] +impl PyExpr { + #[staticmethod] + pub fn literal(value: ScalarValue) -> PyExpr { + lit(value).into() + } + + #[staticmethod] + pub fn column(value: &str) -> PyExpr { + col(value).into() + } + + /// assign a name to the PyExpr + pub fn alias(&self, name: &str) -> PyExpr { + self.expr.clone().alias(name).into() + } + + /// Create a sort PyExpr from an existing PyExpr. + #[args(ascending = true, nulls_first = true)] + pub fn sort(&self, ascending: bool, nulls_first: bool) -> PyExpr { + self.expr.clone().sort(ascending, nulls_first).into() + } + + pub fn is_null(&self) -> PyExpr { + self.expr.clone().is_null().into() + } + + pub fn cast(&self, to: DataType) -> PyExpr { + // self.expr.cast_to() requires DFSchema to validate that the cast + // is supported, omit that for now + let expr = Expr::Cast { + expr: Box::new(self.expr.clone()), + data_type: to, + }; + expr.into() + } +} + +#[pyproto] +impl PyMappingProtocol for PyExpr { + fn __getitem__(&self, key: &str) -> PyResult { + Ok(Expr::GetIndexedField { + expr: Box::new(self.expr.clone()), + key: ScalarValue::Utf8(Some(key.to_string()).to_owned()), + } + .into()) + } +} diff --git a/python/src/functions.rs b/python/src/functions.rs new file mode 100644 index 000000000..c0b4e5989 --- /dev/null +++ b/python/src/functions.rs @@ -0,0 +1,343 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use pyo3::{prelude::*, wrap_pyfunction}; + +use datafusion::logical_plan; + +use datafusion::physical_plan::{ + aggregates::AggregateFunction, functions::BuiltinScalarFunction, +}; + +use crate::errors; +use crate::expression::PyExpr; + +#[pyfunction] +fn array(value: Vec) -> PyExpr { + PyExpr { + expr: logical_plan::array(value.into_iter().map(|x| x.expr).collect::>()), + } +} + +#[pyfunction] +fn in_list(expr: PyExpr, value: Vec, negated: bool) -> PyExpr { + logical_plan::in_list( + expr.expr, + value.into_iter().map(|x| x.expr).collect::>(), + negated, + ) + .into() +} + +/// Current date and time +#[pyfunction] +fn now() -> PyExpr { + PyExpr { + // here lit(0) is a stub for conform to arity + expr: logical_plan::now(logical_plan::lit(0)), + } +} + +/// Returns a random value in the range 0.0 <= x < 1.0 +#[pyfunction] +fn random() -> PyExpr { + PyExpr { + expr: logical_plan::random(), + } +} + +/// Computes a binary hash of the given data. type is the algorithm to use. +/// Standard algorithms are md5, sha224, sha256, sha384, sha512, blake2s, blake2b, and blake3. +#[pyfunction(value, method)] +fn digest(value: PyExpr, method: PyExpr) -> PyExpr { + PyExpr { + expr: logical_plan::digest(value.expr, method.expr), + } +} + +/// Concatenates the text representations of all the arguments. +/// NULL arguments are ignored. +#[pyfunction(args = "*")] +fn concat(args: Vec) -> PyResult { + let args = args.into_iter().map(|e| e.expr).collect::>(); + Ok(logical_plan::concat(&args).into()) +} + +/// Concatenates all but the first argument, with separators. +/// The first argument is used as the separator string, and should not be NULL. +/// Other NULL arguments are ignored. +#[pyfunction(sep, args = "*")] +fn concat_ws(sep: String, args: Vec) -> PyResult { + let args = args.into_iter().map(|e| e.expr).collect::>(); + Ok(logical_plan::concat_ws(sep, &args).into()) +} + +/// Creates a new Sort expression +#[pyfunction] +fn order_by( + expr: PyExpr, + asc: Option, + nulls_first: Option, +) -> PyResult { + Ok(PyExpr { + expr: datafusion::logical_plan::Expr::Sort { + expr: Box::new(expr.expr), + asc: asc.unwrap_or(true), + nulls_first: nulls_first.unwrap_or(true), + }, + }) +} + +/// Creates a new Alias expression +#[pyfunction] +fn alias(expr: PyExpr, name: &str) -> PyResult { + Ok(PyExpr { + expr: datafusion::logical_plan::Expr::Alias( + Box::new(expr.expr), + String::from(name), + ), + }) +} + +/// Creates a new Window function expression +#[pyfunction] +fn window( + name: &str, + args: Vec, + partition_by: Option>, + order_by: Option>, +) -> PyResult { + use std::str::FromStr; + let fun = datafusion::physical_plan::window_functions::WindowFunction::from_str(name) + .map_err(|e| -> errors::DataFusionError { e.into() })?; + Ok(PyExpr { + expr: datafusion::logical_plan::Expr::WindowFunction { + fun, + args: args.into_iter().map(|x| x.expr).collect::>(), + partition_by: partition_by + .unwrap_or(vec![]) + .into_iter() + .map(|x| x.expr) + .collect::>(), + order_by: order_by + .unwrap_or(vec![]) + .into_iter() + .map(|x| x.expr) + .collect::>(), + window_frame: None, + }, + }) +} + +macro_rules! scalar_function { + ($NAME: ident, $FUNC: ident) => { + scalar_function!($NAME, $FUNC, stringify!($NAME)); + }; + ($NAME: ident, $FUNC: ident, $DOC: expr) => { + #[doc = $DOC] + #[pyfunction(args = "*")] + fn $NAME(args: Vec) -> PyExpr { + let expr = logical_plan::Expr::ScalarFunction { + fun: BuiltinScalarFunction::$FUNC, + args: args.into_iter().map(|e| e.into()).collect(), + }; + expr.into() + } + }; +} + +macro_rules! aggregate_function { + ($NAME: ident, $FUNC: ident) => { + aggregate_function!($NAME, $FUNC, stringify!($NAME)); + }; + ($NAME: ident, $FUNC: ident, $DOC: expr) => { + #[doc = $DOC] + #[pyfunction(args = "*", distinct = "false")] + fn $NAME(args: Vec, distinct: bool) -> PyExpr { + let expr = logical_plan::Expr::AggregateFunction { + fun: AggregateFunction::$FUNC, + args: args.into_iter().map(|e| e.into()).collect(), + distinct, + }; + expr.into() + } + }; +} + +scalar_function!(abs, Abs); +scalar_function!(acos, Acos); +scalar_function!(ascii, Ascii, "Returns the numeric code of the first character of the argument. In UTF8 encoding, returns the Unicode code point of the character. In other multibyte encodings, the argument must be an ASCII character."); +scalar_function!(asin, Asin); +scalar_function!(atan, Atan); +scalar_function!( + bit_length, + BitLength, + "Returns number of bits in the string (8 times the octet_length)." +); +scalar_function!(btrim, Btrim, "Removes the longest string containing only characters in characters (a space by default) from the start and end of string."); +scalar_function!(ceil, Ceil); +scalar_function!( + character_length, + CharacterLength, + "Returns number of characters in the string." +); +scalar_function!(chr, Chr, "Returns the character with the given code."); +scalar_function!(cos, Cos); +scalar_function!(exp, Exp); +scalar_function!(floor, Floor); +scalar_function!(initcap, InitCap, "Converts the first letter of each word to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters."); +scalar_function!(left, Left, "Returns first n characters in the string, or when n is negative, returns all but last |n| characters."); +scalar_function!(ln, Ln); +scalar_function!(log10, Log10); +scalar_function!(log2, Log2); +scalar_function!(lower, Lower, "Converts the string to all lower case"); +scalar_function!(lpad, Lpad, "Extends the string to length length by prepending the characters fill (a space by default). If the string is already longer than length then it is truncated (on the right)."); +scalar_function!(ltrim, Ltrim, "Removes the longest string containing only characters in characters (a space by default) from the start of string."); +scalar_function!( + md5, + MD5, + "Computes the MD5 hash of the argument, with the result written in hexadecimal." +); +scalar_function!(octet_length, OctetLength, "Returns number of bytes in the string. Since this version of the function accepts type character directly, it will not strip trailing spaces."); +scalar_function!(regexp_match, RegexpMatch); +scalar_function!( + regexp_replace, + RegexpReplace, + "Replaces substring(s) matching a POSIX regular expression" +); +scalar_function!( + repeat, + Repeat, + "Repeats string the specified number of times." +); +scalar_function!( + replace, + Replace, + "Replaces all occurrences in string of substring from with substring to." +); +scalar_function!( + reverse, + Reverse, + "Reverses the order of the characters in the string." +); +scalar_function!(right, Right, "Returns last n characters in the string, or when n is negative, returns all but first |n| characters."); +scalar_function!(round, Round); +scalar_function!(rpad, Rpad, "Extends the string to length length by appending the characters fill (a space by default). If the string is already longer than length then it is truncated."); +scalar_function!(rtrim, Rtrim, "Removes the longest string containing only characters in characters (a space by default) from the end of string."); +scalar_function!(sha224, SHA224); +scalar_function!(sha256, SHA256); +scalar_function!(sha384, SHA384); +scalar_function!(sha512, SHA512); +scalar_function!(signum, Signum); +scalar_function!(sin, Sin); +scalar_function!(split_part, SplitPart, "Splits string at occurrences of delimiter and returns the n'th field (counting from one)."); +scalar_function!(sqrt, Sqrt); +scalar_function!( + starts_with, + StartsWith, + "Returns true if string starts with prefix." +); +scalar_function!(strpos, Strpos, "Returns starting index of specified substring within string, or zero if it's not present. (Same as position(substring in string), but note the reversed argument order.)"); +scalar_function!(substr, Substr); +scalar_function!(tan, Tan); +scalar_function!( + to_hex, + ToHex, + "Converts the number to its equivalent hexadecimal representation." +); +scalar_function!(to_timestamp, ToTimestamp); +scalar_function!(translate, Translate, "Replaces each character in string that matches a character in the from set with the corresponding character in the to set. If from is longer than to, occurrences of the extra characters in from are deleted."); +scalar_function!(trim, Trim, "Removes the longest string containing only characters in characters (a space by default) from the start, end, or both ends (BOTH is the default) of string."); +scalar_function!(trunc, Trunc); +scalar_function!(upper, Upper, "Converts the string to all upper case."); + +aggregate_function!(avg, Avg); +aggregate_function!(count, Count); +aggregate_function!(max, Max); +aggregate_function!(min, Min); +aggregate_function!(sum, Sum); +aggregate_function!(approx_distinct, ApproxDistinct); + +pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { + m.add_wrapped(wrap_pyfunction!(abs))?; + m.add_wrapped(wrap_pyfunction!(acos))?; + m.add_wrapped(wrap_pyfunction!(approx_distinct))?; + m.add_wrapped(wrap_pyfunction!(alias))?; + m.add_wrapped(wrap_pyfunction!(array))?; + m.add_wrapped(wrap_pyfunction!(ascii))?; + m.add_wrapped(wrap_pyfunction!(asin))?; + m.add_wrapped(wrap_pyfunction!(atan))?; + m.add_wrapped(wrap_pyfunction!(avg))?; + m.add_wrapped(wrap_pyfunction!(bit_length))?; + m.add_wrapped(wrap_pyfunction!(btrim))?; + m.add_wrapped(wrap_pyfunction!(ceil))?; + m.add_wrapped(wrap_pyfunction!(character_length))?; + m.add_wrapped(wrap_pyfunction!(chr))?; + m.add_wrapped(wrap_pyfunction!(concat_ws))?; + m.add_wrapped(wrap_pyfunction!(concat))?; + m.add_wrapped(wrap_pyfunction!(cos))?; + m.add_wrapped(wrap_pyfunction!(count))?; + m.add_wrapped(wrap_pyfunction!(digest))?; + m.add_wrapped(wrap_pyfunction!(exp))?; + m.add_wrapped(wrap_pyfunction!(floor))?; + m.add_wrapped(wrap_pyfunction!(in_list))?; + m.add_wrapped(wrap_pyfunction!(initcap))?; + m.add_wrapped(wrap_pyfunction!(left))?; + m.add_wrapped(wrap_pyfunction!(ln))?; + m.add_wrapped(wrap_pyfunction!(log10))?; + m.add_wrapped(wrap_pyfunction!(log2))?; + m.add_wrapped(wrap_pyfunction!(lower))?; + m.add_wrapped(wrap_pyfunction!(lpad))?; + m.add_wrapped(wrap_pyfunction!(ltrim))?; + m.add_wrapped(wrap_pyfunction!(max))?; + m.add_wrapped(wrap_pyfunction!(md5))?; + m.add_wrapped(wrap_pyfunction!(min))?; + m.add_wrapped(wrap_pyfunction!(now))?; + m.add_wrapped(wrap_pyfunction!(octet_length))?; + m.add_wrapped(wrap_pyfunction!(order_by))?; + m.add_wrapped(wrap_pyfunction!(random))?; + m.add_wrapped(wrap_pyfunction!(regexp_match))?; + m.add_wrapped(wrap_pyfunction!(regexp_replace))?; + m.add_wrapped(wrap_pyfunction!(repeat))?; + m.add_wrapped(wrap_pyfunction!(replace))?; + m.add_wrapped(wrap_pyfunction!(reverse))?; + m.add_wrapped(wrap_pyfunction!(right))?; + m.add_wrapped(wrap_pyfunction!(round))?; + m.add_wrapped(wrap_pyfunction!(rpad))?; + m.add_wrapped(wrap_pyfunction!(rtrim))?; + m.add_wrapped(wrap_pyfunction!(sha224))?; + m.add_wrapped(wrap_pyfunction!(sha256))?; + m.add_wrapped(wrap_pyfunction!(sha384))?; + m.add_wrapped(wrap_pyfunction!(sha512))?; + m.add_wrapped(wrap_pyfunction!(signum))?; + m.add_wrapped(wrap_pyfunction!(sin))?; + m.add_wrapped(wrap_pyfunction!(split_part))?; + m.add_wrapped(wrap_pyfunction!(sqrt))?; + m.add_wrapped(wrap_pyfunction!(starts_with))?; + m.add_wrapped(wrap_pyfunction!(strpos))?; + m.add_wrapped(wrap_pyfunction!(substr))?; + m.add_wrapped(wrap_pyfunction!(sum))?; + m.add_wrapped(wrap_pyfunction!(tan))?; + m.add_wrapped(wrap_pyfunction!(to_hex))?; + m.add_wrapped(wrap_pyfunction!(to_timestamp))?; + m.add_wrapped(wrap_pyfunction!(translate))?; + m.add_wrapped(wrap_pyfunction!(trim))?; + m.add_wrapped(wrap_pyfunction!(trunc))?; + m.add_wrapped(wrap_pyfunction!(upper))?; + m.add_wrapped(wrap_pyfunction!(window))?; + Ok(()) +} diff --git a/python/src/lib.rs b/python/src/lib.rs new file mode 100644 index 000000000..d40bae251 --- /dev/null +++ b/python/src/lib.rs @@ -0,0 +1,52 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use pyo3::prelude::*; + +mod catalog; +mod context; +mod dataframe; +mod errors; +mod expression; +mod functions; +mod udaf; +mod udf; +mod utils; + +/// Low-level DataFusion internal package. +/// +/// The higher-level public API is defined in pure python files under the +/// datafusion directory. +#[pymodule] +fn _internal(py: Python, m: &PyModule) -> PyResult<()> { + // Register the python classes + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + + // Register the functions as a submodule + let funcs = PyModule::new(py, "functions")?; + functions::init_module(funcs)?; + m.add_submodule(funcs)?; + + Ok(()) +} diff --git a/python/src/udaf.rs b/python/src/udaf.rs new file mode 100644 index 000000000..1de6e6320 --- /dev/null +++ b/python/src/udaf.rs @@ -0,0 +1,153 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use pyo3::{prelude::*, types::PyTuple}; + +use datafusion::arrow::array::ArrayRef; +use datafusion::arrow::datatypes::DataType; +use datafusion::arrow::pyarrow::PyArrowConvert; +use datafusion::error::{DataFusionError, Result}; +use datafusion::logical_plan; +use datafusion::physical_plan::aggregates::AccumulatorFunctionImplementation; +use datafusion::physical_plan::udaf::AggregateUDF; +use datafusion::physical_plan::Accumulator; +use datafusion::scalar::ScalarValue; + +use crate::expression::PyExpr; +use crate::utils::parse_volatility; + +#[derive(Debug)] +struct RustAccumulator { + accum: PyObject, +} + +impl RustAccumulator { + fn new(accum: PyObject) -> Self { + Self { accum } + } +} + +impl Accumulator for RustAccumulator { + fn state(&self) -> Result> { + Python::with_gil(|py| self.accum.as_ref(py).call_method0("state")?.extract()) + .map_err(|e| DataFusionError::Execution(format!("{}", e))) + } + + fn update(&mut self, _values: &[ScalarValue]) -> Result<()> { + // no need to implement as datafusion does not use it + todo!() + } + + fn merge(&mut self, _states: &[ScalarValue]) -> Result<()> { + // no need to implement as datafusion does not use it + todo!() + } + + fn evaluate(&self) -> Result { + Python::with_gil(|py| self.accum.as_ref(py).call_method0("evaluate")?.extract()) + .map_err(|e| DataFusionError::Execution(format!("{}", e))) + } + + fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> { + Python::with_gil(|py| { + // 1. cast args to Pyarrow array + let py_args = values + .iter() + .map(|arg| arg.data().to_owned().to_pyarrow(py).unwrap()) + .collect::>(); + let py_args = PyTuple::new(py, py_args); + + // 2. call function + self.accum + .as_ref(py) + .call_method1("update", py_args) + .map_err(|e| DataFusionError::Execution(format!("{}", e)))?; + + Ok(()) + }) + } + + fn merge_batch(&mut self, states: &[ArrayRef]) -> Result<()> { + Python::with_gil(|py| { + let state = &states[0]; + + // 1. cast states to Pyarrow array + let state = state + .to_pyarrow(py) + .map_err(|e| DataFusionError::Execution(format!("{}", e)))?; + + // 2. call merge + self.accum + .as_ref(py) + .call_method1("merge", (state,)) + .map_err(|e| DataFusionError::Execution(format!("{}", e)))?; + + Ok(()) + }) + } +} + +pub fn to_rust_accumulator(accum: PyObject) -> AccumulatorFunctionImplementation { + Arc::new(move || -> Result> { + let accum = Python::with_gil(|py| { + accum + .call0(py) + .map_err(|e| DataFusionError::Execution(format!("{}", e))) + })?; + Ok(Box::new(RustAccumulator::new(accum))) + }) +} + +/// Represents a AggregateUDF +#[pyclass(name = "AggregateUDF", module = "datafusion", subclass)] +#[derive(Debug, Clone)] +pub struct PyAggregateUDF { + pub(crate) function: AggregateUDF, +} + +#[pymethods] +impl PyAggregateUDF { + #[new(name, accumulator, input_type, return_type, state_type, volatility)] + fn new( + name: &str, + accumulator: PyObject, + input_type: DataType, + return_type: DataType, + state_type: Vec, + volatility: &str, + ) -> PyResult { + let function = logical_plan::create_udaf( + &name, + input_type, + Arc::new(return_type), + parse_volatility(volatility)?, + to_rust_accumulator(accumulator), + Arc::new(state_type), + ); + Ok(Self { function }) + } + + /// creates a new PyExpr with the call of the udf + #[call] + #[args(args = "*")] + fn __call__(&self, args: Vec) -> PyResult { + let args = args.iter().map(|e| e.expr.clone()).collect(); + Ok(self.function.call(args).into()) + } +} diff --git a/python/src/udf.rs b/python/src/udf.rs new file mode 100644 index 000000000..379c44987 --- /dev/null +++ b/python/src/udf.rs @@ -0,0 +1,98 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::sync::Arc; + +use pyo3::{prelude::*, types::PyTuple}; + +use datafusion::arrow::array::ArrayRef; +use datafusion::arrow::datatypes::DataType; +use datafusion::arrow::pyarrow::PyArrowConvert; +use datafusion::error::DataFusionError; +use datafusion::logical_plan; +use datafusion::physical_plan::functions::{ + make_scalar_function, ScalarFunctionImplementation, +}; +use datafusion::physical_plan::udf::ScalarUDF; + +use crate::expression::PyExpr; +use crate::utils::parse_volatility; + +/// Create a DataFusion's UDF implementation from a python function +/// that expects pyarrow arrays. This is more efficient as it performs +/// a zero-copy of the contents. +fn to_rust_function(func: PyObject) -> ScalarFunctionImplementation { + make_scalar_function( + move |args: &[ArrayRef]| -> Result { + Python::with_gil(|py| { + // 1. cast args to Pyarrow arrays + let py_args = args + .iter() + .map(|arg| arg.data().to_owned().to_pyarrow(py).unwrap()) + .collect::>(); + let py_args = PyTuple::new(py, py_args); + + // 2. call function + let value = func.as_ref(py).call(py_args, None); + let value = match value { + Ok(n) => Ok(n), + Err(error) => Err(DataFusionError::Execution(format!("{:?}", error))), + }?; + + // 3. cast to arrow::array::Array + let array = ArrayRef::from_pyarrow(value).unwrap(); + Ok(array) + }) + }, + ) +} + +/// Represents a PyScalarUDF +#[pyclass(name = "ScalarUDF", module = "datafusion", subclass)] +#[derive(Debug, Clone)] +pub struct PyScalarUDF { + pub(crate) function: ScalarUDF, +} + +#[pymethods] +impl PyScalarUDF { + #[new(name, func, input_types, return_type, volatility)] + fn new( + name: &str, + func: PyObject, + input_types: Vec, + return_type: DataType, + volatility: &str, + ) -> PyResult { + let function = logical_plan::create_udf( + name, + input_types, + Arc::new(return_type), + parse_volatility(volatility)?, + to_rust_function(func), + ); + Ok(Self { function }) + } + + /// creates a new PyExpr with the call of the udf + #[call] + #[args(args = "*")] + fn __call__(&self, args: Vec) -> PyResult { + let args = args.iter().map(|e| e.expr.clone()).collect(); + Ok(self.function.call(args).into()) + } +} diff --git a/python/src/utils.rs b/python/src/utils.rs new file mode 100644 index 000000000..c8e1c63b1 --- /dev/null +++ b/python/src/utils.rs @@ -0,0 +1,50 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::future::Future; + +use pyo3::prelude::*; +use tokio::runtime::Runtime; + +use datafusion::physical_plan::functions::Volatility; + +use crate::errors::DataFusionError; + +/// Utility to collect rust futures with GIL released +pub(crate) fn wait_for_future(py: Python, f: F) -> F::Output +where + F: Send, + F::Output: Send, +{ + let rt = Runtime::new().unwrap(); + py.allow_threads(|| rt.block_on(f)) +} + +pub(crate) fn parse_volatility(value: &str) -> Result { + Ok(match value { + "immutable" => Volatility::Immutable, + "stable" => Volatility::Stable, + "volatile" => Volatility::Volatile, + value => { + return Err(DataFusionError::Common(format!( + "Unsupportad volatility type: `{}`, supported \ + values are: immutable, stable and volatile.", + value + ))) + } + }) +} From d39227bba22b7b7e7a2c8a09b94f0a0d3bc05232 Mon Sep 17 00:00:00 2001 From: Remco Verhoef Date: Wed, 8 Jun 2022 20:09:50 +0200 Subject: [PATCH 4/5] merge ballista-python --- python/CHANGELOG.md | 133 ++-- python/Cargo.lock | 743 +++++++++++------- python/Cargo.toml | 19 +- python/README.md | 114 +-- python/{datafusion => ballista}/__init__.py | 21 +- python/{datafusion => ballista}/functions.py | 0 .../tests/__init__.py | 0 python/ballista/tests/conftest.py | 33 + .../{datafusion => ballista}/tests/generic.py | 0 .../tests/test_aggregation.py | 4 +- .../tests/test_catalog.py | 32 - .../tests/test_context.py | 27 +- .../tests/test_dataframe.py | 28 +- .../tests/test_functions.py | 6 +- .../tests/test_imports.py | 28 +- python/ballista/tests/test_indexing.py | 55 ++ .../tests/test_sql.py | 7 +- .../tests/test_udaf.py | 5 +- python/dev/create_license.py | 252 ++++++ python/pyproject.toml | 15 +- python/requirements-310.txt | 213 +++++ python/requirements-37.txt | 443 +++++------ python/requirements.in | 2 +- python/src/ballista_context.rs | 125 +++ python/src/dataframe.rs | 55 +- python/src/errors.rs | 38 + python/src/expression.rs | 96 +-- python/src/functions.rs | 29 +- python/src/lib.rs | 20 +- python/src/utils.rs | 24 +- 30 files changed, 1629 insertions(+), 938 deletions(-) rename python/{datafusion => ballista}/__init__.py (89%) rename python/{datafusion => ballista}/functions.py (100%) rename python/{datafusion => ballista}/tests/__init__.py (100%) create mode 100644 python/ballista/tests/conftest.py rename python/{datafusion => ballista}/tests/generic.py (100%) rename python/{datafusion => ballista}/tests/test_aggregation.py (95%) rename python/{datafusion => ballista}/tests/test_catalog.py (69%) rename python/{datafusion => ballista}/tests/test_context.py (77%) rename python/{datafusion => ballista}/tests/test_dataframe.py (88%) rename python/{datafusion => ballista}/tests/test_functions.py (98%) rename python/{datafusion => ballista}/tests/test_imports.py (70%) create mode 100644 python/ballista/tests/test_indexing.py rename python/{datafusion => ballista}/tests/test_sql.py (98%) rename python/{datafusion => ballista}/tests/test_udaf.py (95%) create mode 100644 python/dev/create_license.py create mode 100644 python/requirements-310.txt create mode 100644 python/src/ballista_context.rs diff --git a/python/CHANGELOG.md b/python/CHANGELOG.md index a07cb003c..a27ad0adb 100644 --- a/python/CHANGELOG.md +++ b/python/CHANGELOG.md @@ -19,111 +19,68 @@ # Changelog -## [python-0.4.0](https://github.com/apache/arrow-datafusion/tree/python-0.4.0) (2021-11-13) +## [Unreleased](https://github.com/datafusion-contrib/datafusion-python/tree/HEAD) -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/python-0.3.0...python-0.4.0) +[Full Changelog](https://github.com/datafusion-contrib/datafusion-python/compare/0.5.1...HEAD) -**Breaking changes:** +**Merged pull requests:** -- Add function volatility to Signature [\#1071](https://github.com/apache/arrow-datafusion/pull/1071) [[sql](https://github.com/apache/arrow-datafusion/labels/sql)] ([pjmore](https://github.com/pjmore)) -- Make TableProvider.scan\(\) and PhysicalPlanner::create\_physical\_plan\(\) async [\#1013](https://github.com/apache/arrow-datafusion/pull/1013) ([rdettai](https://github.com/rdettai)) -- Reorganize table providers by table format [\#1010](https://github.com/apache/arrow-datafusion/pull/1010) ([rdettai](https://github.com/rdettai)) +- use \_\_getitem\_\_ for df column selection [\#41](https://github.com/datafusion-contrib/datafusion-python/pull/41) ([Jimexist](https://github.com/Jimexist)) +- fix demo in readme [\#40](https://github.com/datafusion-contrib/datafusion-python/pull/40) ([Jimexist](https://github.com/Jimexist)) +- Implement select_columns [\#39](https://github.com/datafusion-contrib/datafusion-python/pull/39) ([andygrove](https://github.com/andygrove)) +- update readme and changelog [\#38](https://github.com/datafusion-contrib/datafusion-python/pull/38) ([Jimexist](https://github.com/Jimexist)) +- Add PyDataFrame.explain [\#36](https://github.com/datafusion-contrib/datafusion-python/pull/36) ([andygrove](https://github.com/andygrove)) +- Release 0.5.0 [\#34](https://github.com/datafusion-contrib/datafusion-python/pull/34) ([Jimexist](https://github.com/Jimexist)) +- disable nightly in workflow [\#33](https://github.com/datafusion-contrib/datafusion-python/pull/33) ([Jimexist](https://github.com/Jimexist)) +- update requirements to 37 and 310, update readme [\#32](https://github.com/datafusion-contrib/datafusion-python/pull/32) ([Jimexist](https://github.com/Jimexist)) +- Add custom global allocator [\#30](https://github.com/datafusion-contrib/datafusion-python/pull/30) ([matthewmturner](https://github.com/matthewmturner)) +- Remove pandas dependency [\#25](https://github.com/datafusion-contrib/datafusion-python/pull/25) ([matthewmturner](https://github.com/matthewmturner)) +- upgrade datafusion and pyo3 [\#20](https://github.com/datafusion-contrib/datafusion-python/pull/20) ([Jimexist](https://github.com/Jimexist)) +- update maturin 0.12+ [\#17](https://github.com/datafusion-contrib/datafusion-python/pull/17) ([Jimexist](https://github.com/Jimexist)) +- Update README.md [\#16](https://github.com/datafusion-contrib/datafusion-python/pull/16) ([Jimexist](https://github.com/Jimexist)) +- apply cargo clippy --fix [\#15](https://github.com/datafusion-contrib/datafusion-python/pull/15) ([Jimexist](https://github.com/Jimexist)) +- update test workflow to include rust clippy and check [\#14](https://github.com/datafusion-contrib/datafusion-python/pull/14) ([Jimexist](https://github.com/Jimexist)) +- use maturin 0.12.6 [\#13](https://github.com/datafusion-contrib/datafusion-python/pull/13) ([Jimexist](https://github.com/Jimexist)) +- apply cargo fmt [\#12](https://github.com/datafusion-contrib/datafusion-python/pull/12) ([Jimexist](https://github.com/Jimexist)) +- use stable not nightly [\#11](https://github.com/datafusion-contrib/datafusion-python/pull/11) ([Jimexist](https://github.com/Jimexist)) +- ci: test against more compilers, setup clippy and fix clippy lints [\#9](https://github.com/datafusion-contrib/datafusion-python/pull/9) ([cpcloud](https://github.com/cpcloud)) +- Fix use of importlib.metadata and unify requirements.txt [\#8](https://github.com/datafusion-contrib/datafusion-python/pull/8) ([cpcloud](https://github.com/cpcloud)) +- Ship the Cargo.lock file in the source distribution [\#7](https://github.com/datafusion-contrib/datafusion-python/pull/7) ([cpcloud](https://github.com/cpcloud)) +- add \_\_version\_\_ attribute to datafusion object [\#3](https://github.com/datafusion-contrib/datafusion-python/pull/3) ([tfeda](https://github.com/tfeda)) +- fix ci by fixing directories [\#2](https://github.com/datafusion-contrib/datafusion-python/pull/2) ([Jimexist](https://github.com/Jimexist)) +- setup workflow [\#1](https://github.com/datafusion-contrib/datafusion-python/pull/1) ([Jimexist](https://github.com/Jimexist)) -**Implemented enhancements:** +## [0.5.1](https://github.com/datafusion-contrib/datafusion-python/tree/0.5.1) (2022-03-15) -- Build abi3 wheels for python binding [\#921](https://github.com/apache/arrow-datafusion/issues/921) -- Release documentation for python binding [\#837](https://github.com/apache/arrow-datafusion/issues/837) -- use arrow 6.1.0 [\#1255](https://github.com/apache/arrow-datafusion/pull/1255) ([Jimexist](https://github.com/Jimexist)) -- python `lit` function to support bool and byte vec [\#1152](https://github.com/apache/arrow-datafusion/pull/1152) ([Jimexist](https://github.com/Jimexist)) -- add python binding for `approx_distinct` aggregate function [\#1134](https://github.com/apache/arrow-datafusion/pull/1134) ([Jimexist](https://github.com/Jimexist)) -- refactor datafusion python `lit` function to allow different types [\#1130](https://github.com/apache/arrow-datafusion/pull/1130) ([Jimexist](https://github.com/Jimexist)) -- \[python\] add digest python function [\#1127](https://github.com/apache/arrow-datafusion/pull/1127) ([Jimexist](https://github.com/Jimexist)) -- \[crypto\] add `blake3` algorithm to `digest` function [\#1086](https://github.com/apache/arrow-datafusion/pull/1086) ([Jimexist](https://github.com/Jimexist)) -- \[crypto\] add blake2b and blake2s functions [\#1081](https://github.com/apache/arrow-datafusion/pull/1081) ([Jimexist](https://github.com/Jimexist)) -- fix: fix joins on Float32/Float64 columns bug [\#1054](https://github.com/apache/arrow-datafusion/pull/1054) ([francis-du](https://github.com/francis-du)) -- Update DataFusion to arrow 6.0 [\#984](https://github.com/apache/arrow-datafusion/pull/984) ([alamb](https://github.com/alamb)) -- \[Python\] Add support to perform sql query on in-memory datasource. [\#981](https://github.com/apache/arrow-datafusion/pull/981) ([mmuru](https://github.com/mmuru)) -- \[Python\] - Support show function for DataFrame api of python library [\#942](https://github.com/apache/arrow-datafusion/pull/942) ([francis-du](https://github.com/francis-du)) -- Rework the python bindings using conversion traits from arrow-rs [\#873](https://github.com/apache/arrow-datafusion/pull/873) ([kszucs](https://github.com/kszucs)) +[Full Changelog](https://github.com/datafusion-contrib/datafusion-python/compare/0.5.1-rc1...0.5.1) -**Fixed bugs:** +## [0.5.1-rc1](https://github.com/datafusion-contrib/datafusion-python/tree/0.5.1-rc1) (2022-03-15) -- Error in `python test` check / maturn python build: `function or associated item not found in `proc_macro::Literal` [\#961](https://github.com/apache/arrow-datafusion/issues/961) -- Use UUID to create unique table names in python binding [\#1111](https://github.com/apache/arrow-datafusion/pull/1111) ([hippowdon](https://github.com/hippowdon)) -- python: fix generated table name in dataframe creation [\#1078](https://github.com/apache/arrow-datafusion/pull/1078) ([houqp](https://github.com/houqp)) -- fix: joins on Timestamp columns [\#1055](https://github.com/apache/arrow-datafusion/pull/1055) ([francis-du](https://github.com/francis-du)) -- register datafusion.functions as a python package [\#995](https://github.com/apache/arrow-datafusion/pull/995) ([houqp](https://github.com/houqp)) +[Full Changelog](https://github.com/datafusion-contrib/datafusion-python/compare/0.5.0...0.5.1-rc1) -**Documentation updates:** +## [0.5.0](https://github.com/datafusion-contrib/datafusion-python/tree/0.5.0) (2022-03-10) -- python: update docs to use new APIs [\#1287](https://github.com/apache/arrow-datafusion/pull/1287) ([houqp](https://github.com/houqp)) -- Fix typo on Python functions [\#1207](https://github.com/apache/arrow-datafusion/pull/1207) ([j-a-m-l](https://github.com/j-a-m-l)) -- fix deadlink in python/readme [\#1002](https://github.com/apache/arrow-datafusion/pull/1002) ([waynexia](https://github.com/waynexia)) +[Full Changelog](https://github.com/datafusion-contrib/datafusion-python/compare/0.5.0-rc2...0.5.0) -**Performance improvements:** +## [0.5.0-rc2](https://github.com/datafusion-contrib/datafusion-python/tree/0.5.0-rc2) (2022-03-10) -- optimize build profile for datafusion python binding, cli and ballista [\#1137](https://github.com/apache/arrow-datafusion/pull/1137) ([houqp](https://github.com/houqp)) +[Full Changelog](https://github.com/datafusion-contrib/datafusion-python/compare/0.5.0-rc1...0.5.0-rc2) **Closed issues:** -- InList expr with NULL literals do not work [\#1190](https://github.com/apache/arrow-datafusion/issues/1190) -- update the homepage README to include values, `approx_distinct`, etc. [\#1171](https://github.com/apache/arrow-datafusion/issues/1171) -- \[Python\]: Inconsistencies with Python package name [\#1011](https://github.com/apache/arrow-datafusion/issues/1011) -- Wanting to contribute to project where to start? [\#983](https://github.com/apache/arrow-datafusion/issues/983) -- delete redundant code [\#973](https://github.com/apache/arrow-datafusion/issues/973) -- \[Python\]: register custom datasource [\#906](https://github.com/apache/arrow-datafusion/issues/906) -- How to build DataFusion python wheel [\#853](https://github.com/apache/arrow-datafusion/issues/853) -- Produce a design for a metrics framework [\#21](https://github.com/apache/arrow-datafusion/issues/21) - - -For older versions, see [apache/arrow/CHANGELOG.md](https://github.com/apache/arrow/blob/master/CHANGELOG.md) - -## [python-0.3.0](https://github.com/apache/arrow-datafusion/tree/python-0.3.0) (2021-08-10) - -[Full Changelog](https://github.com/apache/arrow-datafusion/compare/4.0.0...python-0.3.0) - -**Implemented enhancements:** +- Add support for Ballista [\#37](https://github.com/datafusion-contrib/datafusion-python/issues/37) +- Implement DataFrame.explain [\#35](https://github.com/datafusion-contrib/datafusion-python/issues/35) -- add more math functions and unit tests to `python` crate [\#748](https://github.com/apache/arrow-datafusion/pull/748) ([Jimexist](https://github.com/Jimexist)) -- Expose ExecutionContext.register\_csv to the python bindings [\#524](https://github.com/apache/arrow-datafusion/pull/524) ([kszucs](https://github.com/kszucs)) -- Implement missing join types for Python dataframe [\#503](https://github.com/apache/arrow-datafusion/pull/503) ([Dandandan](https://github.com/Dandandan)) -- Add missing functions to python [\#388](https://github.com/apache/arrow-datafusion/pull/388) ([jgoday](https://github.com/jgoday)) +## [0.5.0-rc1](https://github.com/datafusion-contrib/datafusion-python/tree/0.5.0-rc1) (2022-03-09) -**Fixed bugs:** - -- fix maturin version in pyproject.toml [\#756](https://github.com/apache/arrow-datafusion/pull/756) ([Jimexist](https://github.com/Jimexist)) -- fix pyarrow type id mapping in `python` crate [\#742](https://github.com/apache/arrow-datafusion/pull/742) ([Jimexist](https://github.com/Jimexist)) +[Full Changelog](https://github.com/datafusion-contrib/datafusion-python/compare/4c98b8e9c3c3f8e2e6a8f2d1ffcfefda344c4680...0.5.0-rc1) **Closed issues:** -- Confirm git tagging strategy for releases [\#770](https://github.com/apache/arrow-datafusion/issues/770) -- arrow::util::pretty::pretty\_format\_batches missing [\#769](https://github.com/apache/arrow-datafusion/issues/769) -- move the `assert_batches_eq!` macros to a non part of datafusion [\#745](https://github.com/apache/arrow-datafusion/issues/745) -- fix an issue where aliases are not respected in generating downstream schemas in window expr [\#592](https://github.com/apache/arrow-datafusion/issues/592) -- make the planner to print more succinct and useful information in window function explain clause [\#526](https://github.com/apache/arrow-datafusion/issues/526) -- move window frame module to be in `logical_plan` [\#517](https://github.com/apache/arrow-datafusion/issues/517) -- use a more rust idiomatic way of handling nth\_value [\#448](https://github.com/apache/arrow-datafusion/issues/448) -- create a test with more than one partition for window functions [\#435](https://github.com/apache/arrow-datafusion/issues/435) -- Implement hash-partitioned hash aggregate [\#27](https://github.com/apache/arrow-datafusion/issues/27) -- Consider using GitHub pages for DataFusion/Ballista documentation [\#18](https://github.com/apache/arrow-datafusion/issues/18) -- Update "repository" in Cargo.toml [\#16](https://github.com/apache/arrow-datafusion/issues/16) - -**Merged pull requests:** - -- fix python binding for `concat`, `concat_ws`, and `random` [\#768](https://github.com/apache/arrow-datafusion/pull/768) ([Jimexist](https://github.com/Jimexist)) -- fix 226, make `concat`, `concat_ws`, and `random` work with `Python` crate [\#761](https://github.com/apache/arrow-datafusion/pull/761) ([Jimexist](https://github.com/Jimexist)) -- fix python crate with the changes to logical plan builder [\#650](https://github.com/apache/arrow-datafusion/pull/650) ([Jimexist](https://github.com/Jimexist)) -- use nightly nightly-2021-05-10 [\#536](https://github.com/apache/arrow-datafusion/pull/536) ([Jimexist](https://github.com/Jimexist)) -- Define the unittests using pytest [\#493](https://github.com/apache/arrow-datafusion/pull/493) ([kszucs](https://github.com/kszucs)) -- use requirements.txt to formalize python deps [\#484](https://github.com/apache/arrow-datafusion/pull/484) ([Jimexist](https://github.com/Jimexist)) -- update cargo.toml in python crate and fix unit test due to hash joins [\#483](https://github.com/apache/arrow-datafusion/pull/483) ([Jimexist](https://github.com/Jimexist)) -- simplify python function definitions [\#477](https://github.com/apache/arrow-datafusion/pull/477) ([Jimexist](https://github.com/Jimexist)) -- Expose DataFrame::sort in the python bindings [\#469](https://github.com/apache/arrow-datafusion/pull/469) ([kszucs](https://github.com/kszucs)) -- Revert "Revert "Add datafusion-python \(\#69\)" \(\#257\)" [\#270](https://github.com/apache/arrow-datafusion/pull/270) ([andygrove](https://github.com/andygrove)) -- Revert "Add datafusion-python \(\#69\)" [\#257](https://github.com/apache/arrow-datafusion/pull/257) ([andygrove](https://github.com/andygrove)) -- update arrow-rs deps to latest master [\#216](https://github.com/apache/arrow-datafusion/pull/216) ([alamb](https://github.com/alamb)) -- Add datafusion-python [\#69](https://github.com/apache/arrow-datafusion/pull/69) ([jorgecarleitao](https://github.com/jorgecarleitao)) - - +- Investigate exposing additional optimizations [\#28](https://github.com/datafusion-contrib/datafusion-python/issues/28) +- Use custom allocator in Python build [\#27](https://github.com/datafusion-contrib/datafusion-python/issues/27) +- Why is pandas a requirement? [\#24](https://github.com/datafusion-contrib/datafusion-python/issues/24) +- Unable to build [\#18](https://github.com/datafusion-contrib/datafusion-python/issues/18) +- Setup CI against multiple Python version [\#6](https://github.com/datafusion-contrib/datafusion-python/issues/6) -\* *This Changelog was automatically generated by [github_changelog_generator](https://github.com/github-changelog-generator/github-changelog-generator)* +\* _This Changelog was automatically generated by [github_changelog_generator](https://github.com/github-changelog-generator/github-changelog-generator)_ diff --git a/python/Cargo.lock b/python/Cargo.lock index 74c6e9000..e71932b3b 100644 --- a/python/Cargo.lock +++ b/python/Cargo.lock @@ -14,7 +14,7 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" dependencies = [ - "getrandom 0.2.3", + "getrandom 0.2.6", "once_cell", "version_check", ] @@ -51,21 +51,22 @@ checksum = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544" [[package]] name = "arrayvec" -version = "0.7.1" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be4dc07131ffa69b8072d35f5007352af944213cde02545e2103680baed38fcd" +checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" [[package]] name = "arrow" -version = "6.5.0" +version = "13.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "216c6846a292bdd93c2b93c1baab58c32ff50e2ab5e8d50db333ab518535dd8b" +checksum = "5c6bee230122beb516ead31935a61f683715f987c6f003eff44ad6986624105a" dependencies = [ "bitflags", "chrono", "comfy-table", "csv", "flatbuffers", + "half", "hex", "indexmap", "lazy_static", @@ -73,7 +74,7 @@ dependencies = [ "multiversion", "num", "pyo3", - "rand 0.8.4", + "rand 0.8.5", "regex", "serde", "serde_derive", @@ -82,9 +83,9 @@ dependencies = [ [[package]] name = "async-trait" -version = "0.1.51" +version = "0.1.53" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44318e776df68115a881de9a8fd1b9e53368d7a4a5ce4cc48517da3393233a5e" +checksum = "ed6aa3524a2dfcf9fe180c51eae2b58738348d819517ceadf95789c51fff7600" dependencies = [ "proc-macro2", "quote", @@ -93,9 +94,9 @@ dependencies = [ [[package]] name = "autocfg" -version = "1.0.1" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" [[package]] name = "base64" @@ -111,20 +112,18 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "blake2" -version = "0.9.2" +version = "0.10.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a4e37d16930f5459780f5621038b6382b9bb37c19016f39fb6b5808d831f174" +checksum = "b9cf849ee05b2ee5fba5e36f97ff8ec2533916700fc0758d40d92136a42f3388" dependencies = [ - "crypto-mac", "digest", - "opaque-debug", ] [[package]] name = "blake3" -version = "1.1.0" +version = "1.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2607a74355ce2e252d0c483b2d8a348e1bba36036e786ccc2dcd777213c86ffd" +checksum = "a08e53fc5a564bb15bfe6fae56bd71522205f1f91893f9c0116edad6496c183f" dependencies = [ "arrayref", "arrayvec", @@ -136,18 +135,18 @@ dependencies = [ [[package]] name = "block-buffer" -version = "0.9.0" +version = "0.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4152116fd6e9dadb291ae18fc1ec3575ed6d84c29642d97890f4b4a3417297e4" +checksum = "0bf7fe51849ea569fd452f37822f606a5cabb684dc918707a0193fd4664ff324" dependencies = [ "generic-array", ] [[package]] name = "brotli" -version = "3.3.2" +version = "3.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71cb90ade945043d3d53597b2fc359bb063db8ade2bcffe7997351d0756e9d50" +checksum = "a1a0b1dbcc8ae29329621f8d4f0d835787c1c38bb1401979b49d13b0b305ff68" dependencies = [ "alloc-no-stdlib", "alloc-stdlib", @@ -184,9 +183,9 @@ checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" [[package]] name = "cc" -version = "1.0.71" +version = "1.0.73" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79c2681d6594606957bbb8631c4b90a7fcaaa72cdb714743a437b156d6a7eedd" +checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" dependencies = [ "jobserver", ] @@ -206,7 +205,6 @@ dependencies = [ "libc", "num-integer", "num-traits", - "time", "winapi", ] @@ -229,30 +227,30 @@ checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc" [[package]] name = "cpufeatures" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95059428f66df56b63431fdb4e1947ed2190586af5c5a8a8b71122bdf5a7f469" +checksum = "59a6001667ab124aebae2a495118e11d30984c3a653e99d86d58971708cf5e4b" dependencies = [ "libc", ] [[package]] name = "crc32fast" -version = "1.2.1" +version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81156fece84ab6a9f2afdb109ce3ae577e42b1228441eded99bd77f627953b1a" +checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" dependencies = [ "cfg-if", ] [[package]] -name = "crypto-mac" -version = "0.8.0" +name = "crypto-common" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b584a330336237c1eecd3e94266efb216c56ed91225d634cb2991c5f3fd1aeab" +checksum = "57952ca27b5e3606ff4dd79b0020231aaf9d6aa76dc05fd30137538c50bd3ce8" dependencies = [ "generic-array", - "subtle", + "typenum", ] [[package]] @@ -263,7 +261,7 @@ checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1" dependencies = [ "bstr", "csv-core", - "itoa", + "itoa 0.4.8", "ryu", "serde", ] @@ -279,61 +277,155 @@ dependencies = [ [[package]] name = "datafusion" -version = "6.0.0" -source = "git+https://github.com/apache/arrow-datafusion?rev=7607ace992a5a42840bf546221a8635e70e10885#7607ace992a5a42840bf546221a8635e70e10885" +version = "8.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8174c458d0266ba442038160fad2c98f02924e6179d6d46175f600b69abb5bb7" dependencies = [ "ahash", "arrow", "async-trait", - "blake2", - "blake3", "chrono", + "datafusion-common", + "datafusion-data-access", + "datafusion-expr", + "datafusion-physical-expr", + "datafusion-row", "futures", - "hashbrown", + "hashbrown 0.12.1", "lazy_static", "log", - "md-5", "num_cpus", - "ordered-float 2.8.0", + "ordered-float 3.0.0", + "parking_lot", "parquet", - "paste 1.0.5", + "paste", "pin-project-lite", "pyo3", - "rand 0.8.4", - "regex", - "sha2", + "rand 0.8.5", "smallvec", "sqlparser", + "tempfile", "tokio", "tokio-stream", + "uuid 1.0.0", +] + +[[package]] +name = "datafusion-common" +version = "8.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c687e12a58d1499b19ee899fa467d122c8cc9223570af6f3eb3c4d9d9be929b" +dependencies = [ + "arrow", + "ordered-float 3.0.0", + "parquet", + "pyo3", + "sqlparser", +] + +[[package]] +name = "datafusion-data-access" +version = "8.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6cca9e1200ddd362d97f22d185e98995ebd3737d2b3d69a200a06a5099fa699a" +dependencies = [ + "async-trait", + "chrono", + "futures", + "glob", + "parking_lot", + "tempfile", + "tokio", +] + +[[package]] +name = "datafusion-expr" +version = "8.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d20909d70931b88605b6f121c0ed820cec1d5b802cb51b7b5759f0421be3add8" +dependencies = [ + "ahash", + "arrow", + "datafusion-common", + "sqlparser", +] + +[[package]] +name = "datafusion-physical-expr" +version = "8.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad9083cb20b57216430d5b8e52341782d9520ce77e65c60803e330fd09bdfa6e" +dependencies = [ + "ahash", + "arrow", + "blake2", + "blake3", + "chrono", + "datafusion-common", + "datafusion-expr", + "datafusion-row", + "hashbrown 0.12.1", + "lazy_static", + "md-5", + "ordered-float 3.0.0", + "paste", + "rand 0.8.5", + "regex", + "sha2", "unicode-segmentation", ] [[package]] name = "datafusion-python" -version = "0.4.0" +version = "0.6.0" dependencies = [ "datafusion", + "datafusion-common", + "datafusion-expr", + "mimalloc", "pyo3", "rand 0.7.3", "tokio", - "uuid", + "uuid 0.8.2", +] + +[[package]] +name = "datafusion-row" +version = "8.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f649a2021eefef44e0bef6782d053148b2fef74db7592fecfe87e042d52947a" +dependencies = [ + "arrow", + "datafusion-common", + "paste", + "rand 0.8.5", ] [[package]] name = "digest" -version = "0.9.0" +version = "0.10.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3dd60d1080a57a05ab032377049e0591415d2b31afd7028356dbf3cc6dcb066" +checksum = "f2fb860ca6fafa5552fb6d0e816a69c8e49f0908bf524e30a90d97c85892d506" dependencies = [ - "generic-array", + "block-buffer", + "crypto-common", + "subtle", +] + +[[package]] +name = "fastrand" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3fcf0cee53519c866c09b5de1f6c56ff9d647101f81c1964fa632e148896cdf" +dependencies = [ + "instant", ] [[package]] name = "flatbuffers" -version = "2.0.0" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef4c5738bcd7fad10315029c50026f83c9da5e4a21f8ed66826f43e0e2bde5f6" +checksum = "86b428b715fdbdd1c364b84573b5fdc0f84f8e423661b9f398735278bc7f2b6a" dependencies = [ "bitflags", "smallvec", @@ -342,9 +434,9 @@ dependencies = [ [[package]] name = "flate2" -version = "1.0.22" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e6988e897c1c9c485f43b47a529cef42fde0547f9d8d41a7062518f1d8fc53f" +checksum = "b39522e96686d38f4bc984b9198e3a0613264abaebaff2c5c918bfa6b6da09af" dependencies = [ "cfg-if", "crc32fast", @@ -354,9 +446,9 @@ dependencies = [ [[package]] name = "futures" -version = "0.3.17" +version = "0.3.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a12aa0eb539080d55c3f2d45a67c3b58b6b0773c1a3ca2dfec66d58c97fd66ca" +checksum = "f73fe65f54d1e12b726f517d3e2135ca3125a437b6d998caf1962961f7172d9e" dependencies = [ "futures-channel", "futures-core", @@ -369,9 +461,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.17" +version = "0.3.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5da6ba8c3bb3c165d3c7319fc1cc8304facf1fb8db99c5de877183c08a273888" +checksum = "c3083ce4b914124575708913bca19bfe887522d6e2e6d0952943f5eac4a74010" dependencies = [ "futures-core", "futures-sink", @@ -379,15 +471,15 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.17" +version = "0.3.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88d1c26957f23603395cd326b0ffe64124b818f4449552f960d815cfba83a53d" +checksum = "0c09fd04b7e4073ac7156a9539b57a484a8ea920f79c7c675d05d289ab6110d3" [[package]] name = "futures-executor" -version = "0.3.17" +version = "0.3.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45025be030969d763025784f7f355043dc6bc74093e4ecc5000ca4dc50d8745c" +checksum = "9420b90cfa29e327d0429f19be13e7ddb68fa1cccb09d65e5706b8c7a749b8a6" dependencies = [ "futures-core", "futures-task", @@ -396,18 +488,16 @@ dependencies = [ [[package]] name = "futures-io" -version = "0.3.17" +version = "0.3.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "522de2a0fe3e380f1bc577ba0474108faf3f6b18321dbf60b3b9c39a75073377" +checksum = "fc4045962a5a5e935ee2fdedaa4e08284547402885ab326734432bed5d12966b" [[package]] name = "futures-macro" -version = "0.3.17" +version = "0.3.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18e4a4b95cea4b4ccbcf1c5675ca7c4ee4e9e75eb79944d07defde18068f79bb" +checksum = "33c1e13800337f4d4d7a316bf45a567dbcb6ffe087f16424852d97e97a91f512" dependencies = [ - "autocfg", - "proc-macro-hack", "proc-macro2", "quote", "syn", @@ -415,23 +505,22 @@ dependencies = [ [[package]] name = "futures-sink" -version = "0.3.17" +version = "0.3.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36ea153c13024fe480590b3e3d4cad89a0cfacecc24577b68f86c6ced9c2bc11" +checksum = "21163e139fa306126e6eedaf49ecdb4588f939600f0b1e770f4205ee4b7fa868" [[package]] name = "futures-task" -version = "0.3.17" +version = "0.3.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d3d00f4eddb73e498a54394f228cd55853bdf059259e8e7bc6e69d408892e99" +checksum = "57c66a976bf5909d801bbef33416c41372779507e7a6b3a5e25e4749c58f776a" [[package]] name = "futures-util" -version = "0.3.17" +version = "0.3.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36568465210a3a6ee45e1f165136d68671471a501e632e9a98d96872222b5481" +checksum = "d8b7abd5d659d9b90c8cba917f6ec750a74e2dc23902ef9cd4cc8c8b22e6036a" dependencies = [ - "autocfg", "futures-channel", "futures-core", "futures-io", @@ -441,16 +530,14 @@ dependencies = [ "memchr", "pin-project-lite", "pin-utils", - "proc-macro-hack", - "proc-macro-nested", "slab", ] [[package]] name = "generic-array" -version = "0.14.4" +version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "501466ecc8a30d1d3b7fc9229b122b2ce8ed6e9d9223f1138d4babb253e51817" +checksum = "fd48d33ec7f05fbfa152300fdad764757cbded343c1aa1cff2fbaf4134851803" dependencies = [ "typenum", "version_check", @@ -469,20 +556,38 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.3" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753" +checksum = "9be70c98951c83b8d2f8f60d7065fa6d5146873094452a1008da8c2f1e4205ad" dependencies = [ "cfg-if", "libc", "wasi 0.10.2+wasi-snapshot-preview1", ] +[[package]] +name = "glob" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" + +[[package]] +name = "half" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" + [[package]] name = "hashbrown" version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" + +[[package]] +name = "hashbrown" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db0d4cf898abf0081f964436dc980e96670a0f36863e4b83aaacdb65c9d7ccc3" dependencies = [ "ahash", ] @@ -513,36 +618,19 @@ checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" [[package]] name = "indexmap" -version = "1.7.0" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc633605454125dec4b66843673f01c7df2b89479b32e0ed634e43a91cff62a5" +checksum = "0f647032dfaa1f8b6dc29bd3edb7bbef4861b8b8007ebb118d6db284fd59f6ee" dependencies = [ "autocfg", - "hashbrown", + "hashbrown 0.11.2", ] [[package]] name = "indoc" -version = "0.3.6" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47741a8bc60fb26eb8d6e0238bbb26d8575ff623fdc97b1a2c00c050b9684ed8" -dependencies = [ - "indoc-impl", - "proc-macro-hack", -] - -[[package]] -name = "indoc-impl" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce046d161f000fffde5f432a0d034d0341dc152643b2598ed5bfce44c4f3a8f0" -dependencies = [ - "proc-macro-hack", - "proc-macro2", - "quote", - "syn", - "unindent", -] +checksum = "05a0bd019339e5d968b37855180087b7b9d512c5046fbd244cf8c95687927d6e" [[package]] name = "instant" @@ -565,6 +653,12 @@ version = "0.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" +[[package]] +name = "itoa" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "112c678d4050afce233f4f2852bb2eb519230b3cf12f33585275537d7e41578d" + [[package]] name = "jobserver" version = "0.1.24" @@ -582,9 +676,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "lexical-core" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a3926d8f156019890be4abe5fd3785e0cff1001e06f59c597641fd513a5a284" +checksum = "92912c4af2e7d9075be3e5e3122c4d7263855fa6cce34fbece4dd08e5884624d" dependencies = [ "lexical-parse-float", "lexical-parse-integer", @@ -595,9 +689,9 @@ dependencies = [ [[package]] name = "lexical-parse-float" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4d066d004fa762d9da995ed21aa8845bb9f6e4265f540d716fb4b315197bf0e" +checksum = "f518eed87c3be6debe6d26b855c97358d8a11bf05acec137e5f53080f5ad2dd8" dependencies = [ "lexical-parse-integer", "lexical-util", @@ -606,9 +700,9 @@ dependencies = [ [[package]] name = "lexical-parse-integer" -version = "0.8.0" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2c92badda8cc0fc4f3d3cc1c30aaefafb830510c8781ce4e8669881f3ed53ac" +checksum = "afc852ec67c6538bbb2b9911116a385b24510e879a69ab516e6a151b15a79168" dependencies = [ "lexical-util", "static_assertions", @@ -616,18 +710,18 @@ dependencies = [ [[package]] name = "lexical-util" -version = "0.8.1" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ff669ccaae16ee33af90dc51125755efed17f1309626ba5c12052512b11e291" +checksum = "c72a9d52c5c4e62fa2cdc2cb6c694a39ae1382d9c2a17a466f18e272a0930eb1" dependencies = [ "static_assertions", ] [[package]] name = "lexical-write-float" -version = "0.8.2" +version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b5186948c7b297abaaa51560f2581dae625e5ce7dfc2d8fdc56345adb6dc576" +checksum = "8a89ec1d062e481210c309b672f73a0567b7855f21e7d2fae636df44d12e97f9" dependencies = [ "lexical-util", "lexical-write-integer", @@ -636,9 +730,9 @@ dependencies = [ [[package]] name = "lexical-write-integer" -version = "0.8.0" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ece956492e0e40fd95ef8658a34d53a3b8c2015762fdcaaff2167b28de1f56ef" +checksum = "094060bd2a7c2ff3a16d5304a6ae82727cb3cc9d1c70f813cc73f744c319337e" dependencies = [ "lexical-util", "static_assertions", @@ -646,33 +740,43 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.105" +version = "0.2.126" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" + +[[package]] +name = "libmimalloc-sys" +version = "0.1.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "869d572136620d55835903746bcb5cdc54cb2851fd0aeec53220b4bb65ef3013" +checksum = "11ca136052550448f55df7898c6dbe651c6b574fe38a0d9ea687a9f8088a2e2c" +dependencies = [ + "cc", +] [[package]] name = "lock_api" -version = "0.4.5" +version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712a4d093c9976e24e7dbca41db895dabcbac38eb5f4045393d17a95bdfb1109" +checksum = "327fa5b6a6940e4699ec49a9beae1ea4845c6bab9314e4f84ac68742139d8c53" dependencies = [ + "autocfg", "scopeguard", ] [[package]] name = "log" -version = "0.4.14" +version = "0.4.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" +checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" dependencies = [ "cfg-if", ] [[package]] name = "lz4" -version = "1.23.2" +version = "1.23.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aac20ed6991e01bf6a2e68cc73df2b389707403662a8ba89f68511fb340f724c" +checksum = "4edcb94251b1c375c459e5abe9fb0168c1c826c3370172684844f8f3f8d1a885" dependencies = [ "libc", "lz4-sys", @@ -680,9 +784,9 @@ dependencies = [ [[package]] name = "lz4-sys" -version = "1.9.2" +version = "1.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dca79aa95d8b3226213ad454d328369853be3a1382d89532a854f4d69640acae" +checksum = "d7be8908e2ed6f31c02db8a9fa962f03e36c53fbfde437363eae3306b85d7e17" dependencies = [ "cc", "libc", @@ -690,29 +794,35 @@ dependencies = [ [[package]] name = "md-5" -version = "0.9.1" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b5a279bb9607f9f53c22d496eade00d138d1bdcccd07d74650387cf94942a15" +checksum = "658646b21e0b72f7866c7038ab086d3d5e1cd6271f060fd37defb241949d0582" dependencies = [ - "block-buffer", "digest", - "opaque-debug", ] [[package]] name = "memchr" -version = "2.4.1" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" + +[[package]] +name = "mimalloc" +version = "0.1.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" +checksum = "2f64ad83c969af2e732e907564deb0d0ed393cec4af80776f77dd77a1a427698" +dependencies = [ + "libmimalloc-sys", +] [[package]] name = "miniz_oxide" -version = "0.4.4" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a92518e98c078586bc6c934028adcca4c92a53d6a958196de835170a01d84e4b" +checksum = "d2b29bd4bc3f33391105ebee3589c19197c4271e3e5a9ec9bfe8127eeff8f082" dependencies = [ "adler", - "autocfg", ] [[package]] @@ -751,9 +861,9 @@ dependencies = [ [[package]] name = "num-bigint" -version = "0.4.2" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74e768dff5fb39a41b3bcd30bb25cf989706c90d028d1ad71971987aa309d535" +checksum = "f93ab6289c7b344a8a9f60f88d80aa20032336fe78da341afc91c8a2341fc75f" dependencies = [ "autocfg", "num-integer", @@ -762,18 +872,18 @@ dependencies = [ [[package]] name = "num-complex" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26873667bbbb7c5182d4a37c1add32cdf09f841af72da53318fdb81543c15085" +checksum = "97fbc387afefefd5e9e39493299f3069e14a140dd34dc19b4c1c1a8fddb6a790" dependencies = [ "num-traits", ] [[package]] name = "num-integer" -version = "0.1.44" +version = "0.1.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db" +checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" dependencies = [ "autocfg", "num-traits", @@ -781,9 +891,9 @@ dependencies = [ [[package]] name = "num-iter" -version = "0.1.42" +version = "0.1.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2021c8337a54d21aca0d59a92577a029af9431cb59b909b03252b9c164fad59" +checksum = "7d03e6c028c5dc5cac6e2dec0efda81fc887605bb3d884578bb6d6bf7514e252" dependencies = [ "autocfg", "num-integer", @@ -804,18 +914,18 @@ dependencies = [ [[package]] name = "num-traits" -version = "0.2.14" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290" +checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" dependencies = [ "autocfg", ] [[package]] name = "num_cpus" -version = "1.13.0" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3" +checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1" dependencies = [ "hermit-abi", "libc", @@ -823,15 +933,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.8.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "692fcb63b64b1758029e0a96ee63e049ce8c5948587f2f7208df04625e5f6b56" - -[[package]] -name = "opaque-debug" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "624a8340c38c1b80fd549087862da4ba43e08858af025b236e509b6649fc13d5" +checksum = "87f3e037eac156d1775da914196f0f37741a274155e34a0b7e427c35d2a2ecb9" [[package]] name = "ordered-float" @@ -844,43 +948,41 @@ dependencies = [ [[package]] name = "ordered-float" -version = "2.8.0" +version = "3.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97c9d06878b3a851e8026ef94bf7fef9ba93062cd412601da4d9cf369b1cc62d" +checksum = "96bcbab4bfea7a59c2c0fe47211a1ac4e3e96bea6eb446d704f310bc5c732ae2" dependencies = [ "num-traits", ] [[package]] name = "parking_lot" -version = "0.11.2" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" +checksum = "87f5ec2493a61ac0506c0f4199f99070cbe83857b0337006a30f3e6719b8ef58" dependencies = [ - "instant", "lock_api", "parking_lot_core", ] [[package]] name = "parking_lot_core" -version = "0.8.5" +version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d76e8e1493bcac0d2766c42737f34458f1c8c50c0d23bcb24ea953affb273216" +checksum = "09a279cbf25cb0757810394fbc1e359949b59e348145c643a939a525692e6929" dependencies = [ "cfg-if", - "instant", "libc", "redox_syscall", "smallvec", - "winapi", + "windows-sys", ] [[package]] name = "parquet" -version = "6.5.0" +version = "13.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "788d9953f4cfbe9db1beff7bebd54299d105e34680d78b82b1ddc85d432cac9d" +checksum = "6c6d737baed48775e87a69aa262f1fa2f1d6bd074dedbe9cac244b9aabf2a0b4" dependencies = [ "arrow", "base64", @@ -889,9 +991,10 @@ dependencies = [ "chrono", "flate2", "lz4", + "num", "num-bigint", "parquet-format", - "rand 0.8.4", + "rand 0.8.5", "snap", "thrift", "zstd", @@ -899,43 +1002,24 @@ dependencies = [ [[package]] name = "parquet-format" -version = "2.6.1" +version = "4.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5bc6b23543b5dedc8f6cce50758a35e5582e148e0cfa26bd0cacd569cda5b71" +checksum = "1f0c06cdcd5460967c485f9c40a821746f5955ad81990533c7fae95dbd9bc0b5" dependencies = [ "thrift", ] [[package]] name = "paste" -version = "0.1.18" +version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45ca20c77d80be666aef2b45486da86238fabe33e38306bd3118fe4af33fa880" -dependencies = [ - "paste-impl", - "proc-macro-hack", -] - -[[package]] -name = "paste" -version = "1.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acbf547ad0c65e31259204bd90935776d1c693cec2f4ff7abb7a1bbbd40dfe58" - -[[package]] -name = "paste-impl" -version = "0.1.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d95a7db200b97ef370c8e6de0088252f7e0dfff7d047a28528e47456c0fc98b6" -dependencies = [ - "proc-macro-hack", -] +checksum = "0c520e05135d6e763148b6426a837e239041653ba7becd2e538c076c738025fc" [[package]] name = "pin-project-lite" -version = "0.2.7" +version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d31d11c69a6b52a174b42bdc0c30e5e11670f90788b2c471c31c1d17d449443" +checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116" [[package]] name = "pin-utils" @@ -945,62 +1029,62 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "ppv-lite86" -version = "0.2.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed0cfbc8191465bed66e1718596ee0b0b35d5ee1f41c5df2189d0fe8bde535ba" - -[[package]] -name = "proc-macro-hack" -version = "0.5.19" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" - -[[package]] -name = "proc-macro-nested" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc881b2c22681370c6a780e47af9840ef841837bc98118431d4e1868bd0c1086" +checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872" [[package]] name = "proc-macro2" -version = "1.0.30" +version = "1.0.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edc3358ebc67bc8b7fa0c007f945b0b18226f78437d61bec735a9eb96b61ee70" +checksum = "c54b25569025b7fc9651de43004ae593a75ad88543b17178aa5e1b9c4f15f56f" dependencies = [ - "unicode-xid", + "unicode-ident", ] [[package]] name = "pyo3" -version = "0.14.5" +version = "0.16.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35100f9347670a566a67aa623369293703322bb9db77d99d7df7313b575ae0c8" +checksum = "1e6302e85060011447471887705bb7838f14aba43fcb06957d823739a496b3dc" dependencies = [ "cfg-if", "indoc", "libc", "parking_lot", - "paste 0.1.18", "pyo3-build-config", + "pyo3-ffi", "pyo3-macros", "unindent", ] [[package]] name = "pyo3-build-config" -version = "0.14.5" +version = "0.16.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d12961738cacbd7f91b7c43bc25cfeeaa2698ad07a04b3be0aa88b950865738f" +checksum = "b5b65b546c35d8a3b1b2f0ddbac7c6a569d759f357f2b9df884f5d6b719152c8" dependencies = [ "once_cell", + "target-lexicon", +] + +[[package]] +name = "pyo3-ffi" +version = "0.16.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c275a07127c1aca33031a563e384ffdd485aee34ef131116fcd58e3430d1742b" +dependencies = [ + "libc", + "pyo3-build-config", ] [[package]] name = "pyo3-macros" -version = "0.14.5" +version = "0.16.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc0bc5215d704824dfddddc03f93cb572e1155c68b6761c37005e1c288808ea8" +checksum = "284fc4485bfbcc9850a6d661d627783f18d19c2ab55880b021671c4ba83e90f7" dependencies = [ + "proc-macro2", "pyo3-macros-backend", "quote", "syn", @@ -1008,21 +1092,20 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.14.5" +version = "0.16.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71623fc593224afaab918aa3afcaf86ed2f43d34f6afde7f3922608f253240df" +checksum = "53bda0f58f73f5c5429693c96ed57f7abdb38fdfc28ae06da4101a257adb7faf" dependencies = [ "proc-macro2", - "pyo3-build-config", "quote", "syn", ] [[package]] name = "quote" -version = "1.0.10" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38bc8cc6a5f2e3655e0899c1b848643b2562f853f114bfec7be120678e3ace05" +checksum = "a1feb54ed693b93a84e14094943b84b7c4eae204c512b7ccb95ab0c66d278ad1" dependencies = [ "proc-macro2", ] @@ -1037,19 +1120,18 @@ dependencies = [ "libc", "rand_chacha 0.2.2", "rand_core 0.5.1", - "rand_hc 0.2.0", + "rand_hc", ] [[package]] name = "rand" -version = "0.8.4" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e7573632e6454cf6b99d7aac4ccca54be06da05aca2ef7423d22d27d4d4bcd8" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc", "rand_chacha 0.3.1", "rand_core 0.6.3", - "rand_hc 0.3.1", ] [[package]] @@ -1087,7 +1169,7 @@ version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" dependencies = [ - "getrandom 0.2.3", + "getrandom 0.2.6", ] [[package]] @@ -1099,29 +1181,20 @@ dependencies = [ "rand_core 0.5.1", ] -[[package]] -name = "rand_hc" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d51e9f596de227fda2ea6c84607f5558e196eeaf43c986b724ba4fb8fdf497e7" -dependencies = [ - "rand_core 0.6.3", -] - [[package]] name = "redox_syscall" -version = "0.2.10" +version = "0.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8383f39639269cde97d255a32bdb68c047337295414940c68bdd30c2e13203ff" +checksum = "62f25bc4c7e55e0b0b7a1d43fb893f4fa1361d0abe38b9ce4f323c2adfe6ef42" dependencies = [ "bitflags", ] [[package]] name = "regex" -version = "1.5.4" +version = "1.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461" +checksum = "1a11647b6b25ff05a515cb92c365cec08801e83423a235b51e231e1808747286" dependencies = [ "aho-corasick", "memchr", @@ -1140,6 +1213,15 @@ version = "0.6.25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" +[[package]] +name = "remove_dir_all" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" +dependencies = [ + "winapi", +] + [[package]] name = "rustversion" version = "1.0.6" @@ -1148,9 +1230,9 @@ checksum = "f2cc38e8fa666e2de3c4aba7edeb5ffc5246c1c2ed0e3d17e560aeeba736b23f" [[package]] name = "ryu" -version = "1.0.5" +version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" +checksum = "f3f6f92acf49d1b98f7a81226834412ada05458b7364277387724a237f062695" [[package]] name = "scopeguard" @@ -1160,15 +1242,15 @@ checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" [[package]] name = "serde" -version = "1.0.130" +version = "1.0.137" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f12d06de37cf59146fbdecab66aa99f9fe4f78722e3607577a5375d66bd0c913" +checksum = "61ea8d54c77f8315140a05f4c7237403bf38b72704d031543aa1d16abbf517d1" [[package]] name = "serde_derive" -version = "1.0.130" +version = "1.0.137" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7bc1a1ab1961464eae040d96713baa5a724a8152c1222492465b54322ec508b" +checksum = "1f26faba0c3959972377d3b2d306ee9f71faee9714294e41bb777f83f88578be" dependencies = [ "proc-macro2", "quote", @@ -1177,40 +1259,38 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.68" +version = "1.0.81" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f690853975602e1bfe1ccbf50504d67174e3bcf340f23b5ea9992e0587a52d8" +checksum = "9b7ce2b32a1aed03c558dc61a5cd328f15aff2dbc17daad8fb8af04d2100e15c" dependencies = [ "indexmap", - "itoa", + "itoa 1.0.2", "ryu", "serde", ] [[package]] name = "sha2" -version = "0.9.8" +version = "0.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b69f9a4c9740d74c5baa3fd2e547f9525fa8088a8a958e0ca2409a514e33f5fa" +checksum = "55deaec60f81eefe3cce0dc50bda92d6d8e88f2a27df7c5033b42afeb1ed2676" dependencies = [ - "block-buffer", "cfg-if", "cpufeatures", "digest", - "opaque-debug", ] [[package]] name = "slab" -version = "0.4.5" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9def91fd1e018fe007022791f865d0ccc9b3a0d5001e01aabb8b40e46000afb5" +checksum = "eb703cfe953bccee95685111adeedb76fabe4e97549a58d16f03ea7b9367bb32" [[package]] name = "smallvec" -version = "1.7.0" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ecab6c735a6bb4139c0caafd0cc3635748bbb3acf4550e8138122099251f309" +checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" [[package]] name = "snap" @@ -1220,9 +1300,9 @@ checksum = "45456094d1983e2ee2a18fdfebce3189fa451699d0502cb8e3b49dba5ba41451" [[package]] name = "sqlparser" -version = "0.13.0" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9907f54bd0f7b6ce72c2be1e570a614819ee08e3deb66d90480df341d8a12a8" +checksum = "ddc2739f3a9bfc68e2f7b7695589f6cb0181c88af73ceaee0c84215cd2a2ae28" dependencies = [ "log", ] @@ -1260,29 +1340,49 @@ checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601" [[package]] name = "syn" -version = "1.0.80" +version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d010a1623fbd906d51d650a9916aaefc05ffa0e4053ff7fe601167f3e715d194" +checksum = "fbaf6116ab8924f39d52792136fb74fd60a80194cf1b1c6ffa6453eef1c3f942" dependencies = [ "proc-macro2", "quote", - "unicode-xid", + "unicode-ident", +] + +[[package]] +name = "target-lexicon" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7fa7e55043acb85fca6b3c01485a2eeb6b69c5d21002e273c79e465f43b7ac1" + +[[package]] +name = "tempfile" +version = "3.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cdb1ef4eaeeaddc8fbd371e5017057064af0911902ef36b39801f67cc6d79e4" +dependencies = [ + "cfg-if", + "fastrand", + "libc", + "redox_syscall", + "remove_dir_all", + "winapi", ] [[package]] name = "thiserror" -version = "1.0.30" +version = "1.0.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "854babe52e4df1653706b98fcfc05843010039b406875930a70e4d9644e5c417" +checksum = "bd829fe32373d27f76265620b5309d0340cb8550f523c1dda251d6298069069a" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.30" +version = "1.0.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa32fd3f627f367fe16f893e2597ae3c05020f8bba2666a4e6ea73d377e5714b" +checksum = "0396bc89e626244658bef819e22d0cc459e795a5ebe878e6ec336d1674a8d79a" dependencies = [ "proc-macro2", "quote", @@ -1311,33 +1411,24 @@ dependencies = [ "threadpool", ] -[[package]] -name = "time" -version = "0.1.43" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca8a50ef2360fbd1eeb0ecd46795a87a19024eb4b53c5dc916ca1fd95fe62438" -dependencies = [ - "libc", - "winapi", -] - [[package]] name = "tokio" -version = "1.12.0" +version = "1.18.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2c2416fdedca8443ae44b4527de1ea633af61d8f7169ffa6e72c5b53d24efcc" +checksum = "4903bf0427cf68dddd5aa6a93220756f8be0c34fcfa9f5e6191e103e15a31395" dependencies = [ - "autocfg", "num_cpus", + "once_cell", + "parking_lot", "pin-project-lite", "tokio-macros", ] [[package]] name = "tokio-macros" -version = "1.5.0" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2dd85aeaba7b68df939bd357c6afb36c87951be9e80bf9c859f2fc3e9fca0fd" +checksum = "b557f72f448c511a979e2564e55d74e6c4432fc96ff4f6241bc6bded342643b7" dependencies = [ "proc-macro2", "quote", @@ -1346,9 +1437,9 @@ dependencies = [ [[package]] name = "tokio-stream" -version = "0.1.7" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b2f3f698253f03119ac0102beaa64f67a67e08074d03a22d18784104543727f" +checksum = "50145484efff8818b5ccd256697f36863f587da82cf8b409c53adf1e840798e3" dependencies = [ "futures-core", "pin-project-lite", @@ -1357,15 +1448,21 @@ dependencies = [ [[package]] name = "typenum" -version = "1.14.0" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987" + +[[package]] +name = "unicode-ident" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b63708a265f51345575b27fe43f9500ad611579e764c79edbc2037b1121959ec" +checksum = "d22af068fba1eb5edcb4aea19d382b2a3deb4c8f9d475c589b6ada9e0fd493ee" [[package]] name = "unicode-segmentation" -version = "1.8.0" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8895849a949e7845e06bd6dc1aa51731a103c42707010a5b591c0038fb73385b" +checksum = "7e8820f5d777f6224dc4be3632222971ac30164d4a258d595640799554ebfd99" [[package]] name = "unicode-width" @@ -1373,17 +1470,11 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973" -[[package]] -name = "unicode-xid" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" - [[package]] name = "unindent" -version = "0.1.7" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f14ee04d9415b52b3aeab06258a3f07093182b88ba0f9b8d203f211a7a7d41c7" +checksum = "52fee519a3e570f7df377a06a1a7775cdbfb7aa460be7e08de2b1f0e69973a44" [[package]] name = "uuid" @@ -1391,14 +1482,23 @@ version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7" dependencies = [ - "getrandom 0.2.3", + "getrandom 0.2.6", +] + +[[package]] +name = "uuid" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8cfcd319456c4d6ea10087ed423473267e1a071f3bc0aa89f80d60997843c6f0" +dependencies = [ + "getrandom 0.2.6", ] [[package]] name = "version_check" -version = "0.9.3" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "wasi" @@ -1434,20 +1534,63 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows-sys" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2" +dependencies = [ + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_msvc" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47" + +[[package]] +name = "windows_i686_gnu" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6" + +[[package]] +name = "windows_i686_msvc" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" + [[package]] name = "zstd" -version = "0.9.0+zstd.1.5.0" +version = "0.11.2+zstd.1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07749a5dc2cb6b36661290245e350f15ec3bbb304e493db54a1d354480522ccd" +checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4" dependencies = [ "zstd-safe", ] [[package]] name = "zstd-safe" -version = "4.1.1+zstd.1.5.0" +version = "5.0.2+zstd.1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c91c90f2c593b003603e5e0493c837088df4469da25aafff8bce42ba48caf079" +checksum = "1d2a5585e04f9eea4b2a3d1eca508c4dee9592a89ef6f450c11719da0726f4db" dependencies = [ "libc", "zstd-sys", @@ -1455,9 +1598,9 @@ dependencies = [ [[package]] name = "zstd-sys" -version = "1.6.1+zstd.1.5.0" +version = "2.0.1+zstd.1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "615120c7a2431d16cf1cf979e7fc31ba7a5b5e5707b29c8a99e5dbf8a8392a33" +checksum = "9fd07cbbc53846d9145dbffdf6dd09a7a0aa52be46741825f5c97bdd4f73f12b" dependencies = [ "cc", "libc", diff --git a/python/Cargo.toml b/python/Cargo.toml index a71ce3c49..0c3a42c3d 100644 --- a/python/Cargo.toml +++ b/python/Cargo.toml @@ -16,8 +16,8 @@ # under the License. [package] -name = "datafusion-python" -version = "0.4.0" +name = "ballista-python" +version = "0.6.0" homepage = "https://github.com/apache/arrow" repository = "https://github.com/apache/arrow" authors = ["Apache Arrow "] @@ -28,18 +28,23 @@ edition = "2021" rust-version = "1.57" [package.metadata.maturin] -name = "datafusion._internal" +name = "ballista._internal" [dependencies] -datafusion = { git = "https://github.com/apache/arrow-datafusion", rev = "7607ace992a5a42840bf546221a8635e70e10885", features = ["pyarrow"] } -pyo3 = { version = "0.14", features = ["extension-module", "abi3", "abi3-py36"] } +ballista = { git = "https://github.com/apache/arrow-ballista", rev = "9e78b8ecd55cc3e9d46387c87098c4e6625294eb" } +datafusion = { git = "https://github.com/apache/arrow-datafusion", rev = "585bc3a629b92ea7a86ebfe8bf762dbef4155710", features = ["pyarrow"] } +datafusion-common = { git = "https://github.com/apache/arrow-datafusion", rev = "585bc3a629b92ea7a86ebfe8bf762dbef4155710" } +datafusion-expr = { git = "https://github.com/apache/arrow-datafusion", rev = "585bc3a629b92ea7a86ebfe8bf762dbef4155710" } + +mimalloc = { version = "*", default-features = false } +pyo3 = { version = "~0.16.5", features = ["extension-module", "abi3", "abi3-py37"] } rand = "0.7" tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync"] } uuid = { version = "0.8", features = ["v4"] } [lib] -crate-type = ["cdylib"] -name = "_internal" +crate-type = ["cdylib", "rlib"] +name = "ballista_python" [profile.release] codegen-units = 1 diff --git a/python/README.md b/python/README.md index 5979803dc..a9d7b873e 100644 --- a/python/README.md +++ b/python/README.md @@ -17,14 +17,14 @@ under the License. --> -## DataFusion in Python +# Ballista in Python + +[![Python test](https://github.com/datafusion-contrib/datafusion-python/actions/workflows/test.yaml/badge.svg)](https://github.com/datafusion-contrib/datafusion-python/actions/workflows/test.yaml) This is a Python library that binds to [Apache Arrow](https://arrow.apache.org/) in-memory query engine [DataFusion](https://github.com/apache/arrow-datafusion). Like pyspark, it allows you to build a plan through SQL or a DataFrame API against in-memory data, parquet or CSV files, run it in a multi-threaded environment, and obtain the result back in Python. -It also allows you to use UDFs and UDAFs for complex operations. - The major advantage of this library over other execution engines is that this library achieves zero-copy between Python and its execution engine: there is no cost in using UDFs, UDAFs, and collecting the results to Python apart from having to lock the GIL when running those operations. Its query engine, DataFusion, is written in [Rust](https://www.rust-lang.org/), which makes strong assumptions about thread safety and lack of memory leaks. @@ -36,91 +36,27 @@ Technically, zero-copy is achieved via the [c data interface](https://arrow.apac Simple usage: ```python -import datafusion -import pyarrow - -# an alias -f = datafusion.functions - -# create a context -ctx = datafusion.ExecutionContext() - -# create a RecordBatch and a new DataFrame from it -batch = pyarrow.RecordBatch.from_arrays( - [pyarrow.array([1, 2, 3]), pyarrow.array([4, 5, 6])], - names=["a", "b"], -) -df = ctx.create_dataframe([[batch]]) - -# create a new statement -df = df.select( - f.col("a") + f.col("b"), - f.col("a") - f.col("b"), -) - -# execute and collect the first (and only) batch -result = df.collect()[0] - -assert result.column(0) == pyarrow.array([5, 7, 9]) -assert result.column(1) == pyarrow.array([-3, -3, -3]) +import ballista +ctx = ballista.BallistaContext(host="host.docker.internal") +ctx.register_parquet("table", "/data") +df = ctx.sql("SELECT * FROM table LIMIT 10") +df.show() ``` -### UDFs - -```python -def is_null(array: pyarrow.Array) -> pyarrow.Array: - return array.is_null() - -udf = f.udf(is_null, [pyarrow.int64()], pyarrow.bool_()) +## How to install (from pip) -df = df.select(udf(f.col("a"))) +```bash +pip install ballista +# or +python -m pip install ballista ``` -### UDAF +You can verify the installation by running: ```python -import pyarrow -import pyarrow.compute - - -class Accumulator: - """ - Interface of a user-defined accumulation. - """ - def __init__(self): - self._sum = pyarrow.scalar(0.0) - - def to_scalars(self) -> [pyarrow.Scalar]: - return [self._sum] - - def update(self, values: pyarrow.Array) -> None: - # not nice since pyarrow scalars can't be summed yet. This breaks on `None` - self._sum = pyarrow.scalar(self._sum.as_py() + pyarrow.compute.sum(values).as_py()) - - def merge(self, states: pyarrow.Array) -> None: - # not nice since pyarrow scalars can't be summed yet. This breaks on `None` - self._sum = pyarrow.scalar(self._sum.as_py() + pyarrow.compute.sum(states).as_py()) - - def evaluate(self) -> pyarrow.Scalar: - return self._sum - - -df = ... - -udaf = f.udaf(Accumulator, pyarrow.float64(), pyarrow.float64(), [pyarrow.float64()]) - -df = df.aggregate( - [], - [udaf(f.col("a"))] -) -``` - -## How to install (from pip) - -```bash -pip install datafusion -# or -python -m pip install datafusion +>>> import ballista +>>> ballista.__version__ +'0.6.0' ``` ## How to develop @@ -131,19 +67,15 @@ Bootstrap: ```bash # fetch this repo -git clone git@github.com:apache/arrow-datafusion.git -# change to python directory -cd arrow-datafusion/python +git clone git@github.com:datafusion-contrib/datafusion-python.git # prepare development environment (used to build wheel / install in development) python3 -m venv venv # activate the venv source venv/bin/activate # update pip itself if necessary python -m pip install -U pip -# if python -V gives python 3.7 -python -m pip install -r requirements-37.txt -# if python -V gives python 3.8/3.9/3.10 -python -m pip install -r requirements.txt +# install dependencies (for Python 3.8+) +python -m pip install -r requirements-310.txt ``` Whenever rust code changes (your changes or via `git pull`): @@ -161,11 +93,7 @@ To change test dependencies, change the `requirements.in` and run ```bash # install pip-tools (this can be done only once), also consider running in venv python -m pip install pip-tools - -# change requirements.in and then run -python -m piptools compile --generate-hashes -o requirements-37.txt -# or run this is you are on python 3.8/3.9/3.10 -python -m piptools compile --generate-hashes -o requirements.txt +python -m piptools compile --generate-hashes -o requirements-310.txt ``` To update dependencies, run with `-U` diff --git a/python/datafusion/__init__.py b/python/ballista/__init__.py similarity index 89% rename from python/datafusion/__init__.py rename to python/ballista/__init__.py index 0a25592f8..96108f9c6 100644 --- a/python/datafusion/__init__.py +++ b/python/ballista/__init__.py @@ -18,20 +18,29 @@ from abc import ABCMeta, abstractmethod from typing import List +try: + import importlib.metadata as importlib_metadata +except ImportError: + import importlib_metadata + + +import datafusion + import pyarrow as pa from ._internal import ( - AggregateUDF, DataFrame, - ExecutionContext, + BallistaContext, Expression, - ScalarUDF, ) +__version__ = importlib_metadata.version(__name__) + + __all__ = [ "DataFrame", - "ExecutionContext", + "BallistaContext", "Expression", "AggregateUDF", "ScalarUDF", @@ -82,7 +91,7 @@ def udf(func, input_types, return_type, volatility, name=None): raise TypeError("`func` argument must be callable") if name is None: name = func.__qualname__ - return ScalarUDF( + return datafusion.ScalarUDF( name=name, func=func, input_types=input_types, @@ -101,7 +110,7 @@ def udaf(accum, input_type, return_type, state_type, volatility, name=None): ) if name is None: name = accum.__qualname__ - return AggregateUDF( + return datafusion.AggregateUDF( name=name, accumulator=accum, input_type=input_type, diff --git a/python/datafusion/functions.py b/python/ballista/functions.py similarity index 100% rename from python/datafusion/functions.py rename to python/ballista/functions.py diff --git a/python/datafusion/tests/__init__.py b/python/ballista/tests/__init__.py similarity index 100% rename from python/datafusion/tests/__init__.py rename to python/ballista/tests/__init__.py diff --git a/python/ballista/tests/conftest.py b/python/ballista/tests/conftest.py new file mode 100644 index 000000000..93662a082 --- /dev/null +++ b/python/ballista/tests/conftest.py @@ -0,0 +1,33 @@ +import pytest +from datafusion import SessionContext +import pyarrow as pa + + +@pytest.fixture +def ctx(): + return SessionContext() + + +@pytest.fixture +def database(ctx, tmp_path): + path = tmp_path / "test.csv" + + table = pa.Table.from_arrays( + [ + [1, 2, 3, 4], + ["a", "b", "c", "d"], + [1.1, 2.2, 3.3, 4.4], + ], + names=["int", "str", "float"], + ) + pa.csv.write_csv(table, path) + + ctx.register_csv("csv", path) + ctx.register_csv("csv1", str(path)) + ctx.register_csv( + "csv2", + path, + has_header=True, + delimiter=",", + schema_infer_max_records=10, + ) diff --git a/python/datafusion/tests/generic.py b/python/ballista/tests/generic.py similarity index 100% rename from python/datafusion/tests/generic.py rename to python/ballista/tests/generic.py diff --git a/python/datafusion/tests/test_aggregation.py b/python/ballista/tests/test_aggregation.py similarity index 95% rename from python/datafusion/tests/test_aggregation.py rename to python/ballista/tests/test_aggregation.py index d539c4458..bba1ed41d 100644 --- a/python/datafusion/tests/test_aggregation.py +++ b/python/ballista/tests/test_aggregation.py @@ -18,13 +18,13 @@ import pyarrow as pa import pytest -from datafusion import ExecutionContext, column +from datafusion import SessionContext, column from datafusion import functions as f @pytest.fixture def df(): - ctx = ExecutionContext() + ctx = SessionContext() # create a RecordBatch and a new DataFrame from it batch = pa.RecordBatch.from_arrays( diff --git a/python/datafusion/tests/test_catalog.py b/python/ballista/tests/test_catalog.py similarity index 69% rename from python/datafusion/tests/test_catalog.py rename to python/ballista/tests/test_catalog.py index 2e64a810a..a9bdf72b3 100644 --- a/python/datafusion/tests/test_catalog.py +++ b/python/ballista/tests/test_catalog.py @@ -18,38 +18,6 @@ import pyarrow as pa import pytest -from datafusion import ExecutionContext - - -@pytest.fixture -def ctx(): - return ExecutionContext() - - -@pytest.fixture -def database(ctx, tmp_path): - path = tmp_path / "test.csv" - - table = pa.Table.from_arrays( - [ - [1, 2, 3, 4], - ["a", "b", "c", "d"], - [1.1, 2.2, 3.3, 4.4], - ], - names=["int", "str", "float"], - ) - pa.csv.write_csv(table, path) - - ctx.register_csv("csv", path) - ctx.register_csv("csv1", str(path)) - ctx.register_csv( - "csv2", - path, - has_header=True, - delimiter=",", - schema_infer_max_records=10, - ) - def test_basic(ctx, database): with pytest.raises(KeyError): diff --git a/python/datafusion/tests/test_context.py b/python/ballista/tests/test_context.py similarity index 77% rename from python/datafusion/tests/test_context.py rename to python/ballista/tests/test_context.py index 60beea4a0..4d4a38c9d 100644 --- a/python/datafusion/tests/test_context.py +++ b/python/ballista/tests/test_context.py @@ -16,14 +16,6 @@ # under the License. import pyarrow as pa -import pytest - -from datafusion import ExecutionContext - - -@pytest.fixture -def ctx(): - return ExecutionContext() def test_register_record_batches(ctx): @@ -61,3 +53,22 @@ def test_create_dataframe_registers_unique_table_name(ctx): # only hexadecimal numbers for c in tables[0][1:]: assert c in "0123456789abcdef" + + +def test_register_table(ctx, database): + default = ctx.catalog() + public = default.database("public") + assert public.names() == {"csv", "csv1", "csv2"} + table = public.table("csv") + + ctx.register_table("csv3", table) + assert public.names() == {"csv", "csv1", "csv2", "csv3"} + + +def test_deregister_table(ctx, database): + default = ctx.catalog() + public = default.database("public") + assert public.names() == {"csv", "csv1", "csv2"} + + ctx.deregister_table("csv") + assert public.names() == {"csv1", "csv2"} diff --git a/python/datafusion/tests/test_dataframe.py b/python/ballista/tests/test_dataframe.py similarity index 88% rename from python/datafusion/tests/test_dataframe.py rename to python/ballista/tests/test_dataframe.py index 9a97c25f2..43c260ae2 100644 --- a/python/datafusion/tests/test_dataframe.py +++ b/python/ballista/tests/test_dataframe.py @@ -19,12 +19,12 @@ import pytest from datafusion import functions as f -from datafusion import DataFrame, ExecutionContext, column, literal, udf +from datafusion import DataFrame, SessionContext, column, literal, udf @pytest.fixture def df(): - ctx = ExecutionContext() + ctx = SessionContext() # create a RecordBatch and a new DataFrame from it batch = pa.RecordBatch.from_arrays( @@ -37,7 +37,7 @@ def df(): @pytest.fixture def struct_df(): - ctx = ExecutionContext() + ctx = SessionContext() # create a RecordBatch and a new DataFrame from it batch = pa.RecordBatch.from_arrays( @@ -61,6 +61,16 @@ def test_select(df): assert result.column(1) == pa.array([-3, -3, -3]) +def test_select_columns(df): + df = df.select_columns("b", "a") + + # execute and collect the first (and only) batch + result = df.collect()[0] + + assert result.column(0) == pa.array([4, 5, 6]) + assert result.column(1) == pa.array([1, 2, 3]) + + def test_filter(df): df = df.select( column("a") + column("b"), @@ -109,7 +119,7 @@ def test_udf(df): def test_join(): - ctx = ExecutionContext() + ctx = SessionContext() batch = pa.RecordBatch.from_arrays( [pa.array([1, 2, 3]), pa.array([4, 5, 6])], @@ -149,7 +159,7 @@ def test_window_lead(df): def test_get_dataframe(tmp_path): - ctx = ExecutionContext() + ctx = SessionContext() path = tmp_path / "test.csv" table = pa.Table.from_arrays( @@ -179,3 +189,11 @@ def test_struct_select(struct_df): assert result.column(0) == pa.array([5, 7, 9]) assert result.column(1) == pa.array([-3, -3, -3]) + + +def test_explain(df): + df = df.select( + column("a") + column("b"), + column("a") - column("b"), + ) + df.explain() diff --git a/python/datafusion/tests/test_functions.py b/python/ballista/tests/test_functions.py similarity index 98% rename from python/datafusion/tests/test_functions.py rename to python/ballista/tests/test_functions.py index 84718eaf0..daa2f1967 100644 --- a/python/datafusion/tests/test_functions.py +++ b/python/ballista/tests/test_functions.py @@ -19,14 +19,14 @@ import pyarrow as pa import pytest -from datafusion import ExecutionContext, column +from datafusion import SessionContext, column from datafusion import functions as f from datafusion import literal @pytest.fixture def df(): - ctx = ExecutionContext() + ctx = SessionContext() # create a RecordBatch and a new DataFrame from it batch = pa.RecordBatch.from_arrays( [pa.array(["Hello", "World", "!"]), pa.array([4, 5, 6])], @@ -70,7 +70,7 @@ def test_lit_arith(df): def test_math_functions(): - ctx = ExecutionContext() + ctx = SessionContext() # create a RecordBatch and a new DataFrame from it batch = pa.RecordBatch.from_arrays( [pa.array([0.1, -0.7, 0.55])], names=["value"] diff --git a/python/datafusion/tests/test_imports.py b/python/ballista/tests/test_imports.py similarity index 70% rename from python/datafusion/tests/test_imports.py rename to python/ballista/tests/test_imports.py index 423800248..9522bc31a 100644 --- a/python/datafusion/tests/test_imports.py +++ b/python/ballista/tests/test_imports.py @@ -17,45 +17,49 @@ import pytest -import datafusion -from datafusion import ( +import ballista +from ballista import ( AggregateUDF, DataFrame, - ExecutionContext, + SessionContext, Expression, ScalarUDF, functions, ) -def test_import_datafusion(): - assert datafusion.__name__ == "datafusion" +def test_import_ballista(): + assert ballista.__name__ == "ballista" -def test_class_module_is_datafusion(): +def test_ballista_python_version(): + assert ballista.__version__ is not None + + +def test_class_module_is_ballista(): for klass in [ - ExecutionContext, + SessionContext, Expression, DataFrame, ScalarUDF, AggregateUDF, ]: - assert klass.__module__ == "datafusion" + assert klass.__module__ == "ballista" def test_import_from_functions_submodule(): - from datafusion.functions import abs, sin # noqa + from ballista.functions import abs, sin # noqa assert functions.abs is abs assert functions.sin is sin - msg = "cannot import name 'foobar' from 'datafusion.functions'" + msg = "cannot import name 'foobar' from 'ballista.functions'" with pytest.raises(ImportError, match=msg): - from datafusion.functions import foobar # noqa + from ballista.functions import foobar # noqa def test_classes_are_inheritable(): - class MyExecContext(ExecutionContext): + class MyExecContext(SessionContext): pass class MyExpression(Expression): diff --git a/python/ballista/tests/test_indexing.py b/python/ballista/tests/test_indexing.py new file mode 100644 index 000000000..9e65c0efd --- /dev/null +++ b/python/ballista/tests/test_indexing.py @@ -0,0 +1,55 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pyarrow as pa +import pytest + +from datafusion import SessionContext + + +@pytest.fixture +def df(): + ctx = SessionContext() + + # create a RecordBatch and a new DataFrame from it + batch = pa.RecordBatch.from_arrays( + [pa.array([1, 2, 3]), pa.array([4, 4, 6])], + names=["a", "b"], + ) + return ctx.create_dataframe([[batch]]) + + +def test_indexing(df): + assert df["a"] is not None + assert df["a", "b"] is not None + assert df[("a", "b")] is not None + assert df[["a"]] is not None + + +def test_err(df): + with pytest.raises(Exception) as e_info: + df["c"] + + assert "Schema error: No field named 'c'" in e_info.value.args[0] + + with pytest.raises(Exception) as e_info: + df[1] + + assert ( + "DataFrame can only be indexed by string index or indices" + in e_info.value.args[0] + ) diff --git a/python/datafusion/tests/test_sql.py b/python/ballista/tests/test_sql.py similarity index 98% rename from python/datafusion/tests/test_sql.py rename to python/ballista/tests/test_sql.py index 23f20079f..cde5425a8 100644 --- a/python/datafusion/tests/test_sql.py +++ b/python/ballista/tests/test_sql.py @@ -19,16 +19,11 @@ import pyarrow as pa import pytest -from datafusion import ExecutionContext, udf +from datafusion import udf from . import generic as helpers -@pytest.fixture -def ctx(): - return ExecutionContext() - - def test_no_table(ctx): with pytest.raises(Exception, match="DataFusion error"): ctx.sql("SELECT a FROM b").collect() diff --git a/python/datafusion/tests/test_udaf.py b/python/ballista/tests/test_udaf.py similarity index 95% rename from python/datafusion/tests/test_udaf.py rename to python/ballista/tests/test_udaf.py index 2f286ba10..c2b29d199 100644 --- a/python/datafusion/tests/test_udaf.py +++ b/python/ballista/tests/test_udaf.py @@ -21,7 +21,7 @@ import pyarrow.compute as pc import pytest -from datafusion import Accumulator, ExecutionContext, column, udaf +from datafusion import Accumulator, SessionContext, column, udaf class Summarize(Accumulator): @@ -63,7 +63,7 @@ def state(self) -> List[pa.Scalar]: @pytest.fixture def df(): - ctx = ExecutionContext() + ctx = SessionContext() # create a RecordBatch and a new DataFrame from it batch = pa.RecordBatch.from_arrays( @@ -73,6 +73,7 @@ def df(): return ctx.create_dataframe([[batch]]) +@pytest.mark.skip(reason="df.collect() will hang, need more investigations") def test_errors(df): with pytest.raises(TypeError): udaf( diff --git a/python/dev/create_license.py b/python/dev/create_license.py new file mode 100644 index 000000000..2a67cb8fd --- /dev/null +++ b/python/dev/create_license.py @@ -0,0 +1,252 @@ +#!/usr/bin/python +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# This file is a mirror of https://github.com/apache/arrow-datafusion/blob/master/dev/create_license.py + +import json +import subprocess + +subprocess.check_output(["cargo", "install", "cargo-license"]) +data = subprocess.check_output( + [ + "cargo", + "license", + "--avoid-build-deps", + "--avoid-dev-deps", + "--do-not-bundle", + "--json", + ] +) +data = json.loads(data) + +result = """ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" +result += "\n------------------\n\n" +result += "This software is built and contains the following software:\n\n" +result += "(automatically generated via [cargo-license](https://crates.io/crates/cargo-license))\n\n" +for item in data: + license = item["license"] + name = item["name"] + version = item["version"] + repository = item["repository"] + result += "------------------\n\n" + result += f"### {name} {version}\n* source: [{repository}]({repository})\n* license: {license}\n\n" + +with open("LICENSE.txt", "w") as f: + f.write(result) diff --git a/python/pyproject.toml b/python/pyproject.toml index c6ee36349..2d9d30e78 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -16,16 +16,16 @@ # under the License. [build-system] -requires = ["maturin>=0.11,<0.12"] +requires = ["maturin>=0.11,<0.13"] build-backend = "maturin" [project] -name = "datafusion" +name = "ballista" description = "Build and run queries against data" readme = "README.md" license = {file = "LICENSE.txt"} requires-python = ">=3.6" -keywords = ["datafusion", "dataframe", "rust", "query-engine"] +keywords = ["ballista", "datafusion", "dataframe", "rust", "query-engine"] classifier = [ "Development Status :: 2 - Pre-Alpha", "Intended Audience :: Developers", @@ -35,7 +35,6 @@ classifier = [ "Operating System :: Microsoft :: Windows", "Operating System :: POSIX :: Linux", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", @@ -44,12 +43,16 @@ classifier = [ "Programming Language :: Rust", ] dependencies = [ + "datafusion>=0.5.2", "pyarrow>=1", ] [project.urls] -documentation = "https://arrow.apache.org/datafusion/python" -repository = "https://github.com/apache/arrow-datafusion" +documentation = "https://arrow.apache.org/ballista/python" +repository = "https://github.com/apache/arrow-ballista" [tool.isort] profile = "black" + +[tool.maturin] +sdist-include = ["Cargo.lock"] diff --git a/python/requirements-310.txt b/python/requirements-310.txt new file mode 100644 index 000000000..30a2291c4 --- /dev/null +++ b/python/requirements-310.txt @@ -0,0 +1,213 @@ +# +# This file is autogenerated by pip-compile with python 3.10 +# To update, run: +# +# pip-compile --generate-hashes +# +attrs==21.4.0 \ + --hash=sha256:2d27e3784d7a565d36ab851fe94887c5eccd6a463168875832a1be79c82828b4 \ + --hash=sha256:626ba8234211db98e869df76230a137c4c40a12d72445c45d5f5b716f076e2fd + # via pytest +black==22.3.0 \ + --hash=sha256:06f9d8846f2340dfac80ceb20200ea5d1b3f181dd0556b47af4e8e0b24fa0a6b \ + --hash=sha256:10dbe6e6d2988049b4655b2b739f98785a884d4d6b85bc35133a8fb9a2233176 \ + --hash=sha256:2497f9c2386572e28921fa8bec7be3e51de6801f7459dffd6e62492531c47e09 \ + --hash=sha256:30d78ba6bf080eeaf0b7b875d924b15cd46fec5fd044ddfbad38c8ea9171043a \ + --hash=sha256:328efc0cc70ccb23429d6be184a15ce613f676bdfc85e5fe8ea2a9354b4e9015 \ + --hash=sha256:35020b8886c022ced9282b51b5a875b6d1ab0c387b31a065b84db7c33085ca79 \ + --hash=sha256:5795a0375eb87bfe902e80e0c8cfaedf8af4d49694d69161e5bd3206c18618bb \ + --hash=sha256:5891ef8abc06576985de8fa88e95ab70641de6c1fca97e2a15820a9b69e51b20 \ + --hash=sha256:637a4014c63fbf42a692d22b55d8ad6968a946b4a6ebc385c5505d9625b6a464 \ + --hash=sha256:67c8301ec94e3bcc8906740fe071391bce40a862b7be0b86fb5382beefecd968 \ + --hash=sha256:6d2fc92002d44746d3e7db7cf9313cf4452f43e9ea77a2c939defce3b10b5c82 \ + --hash=sha256:6ee227b696ca60dd1c507be80a6bc849a5a6ab57ac7352aad1ffec9e8b805f21 \ + --hash=sha256:863714200ada56cbc366dc9ae5291ceb936573155f8bf8e9de92aef51f3ad0f0 \ + --hash=sha256:9b542ced1ec0ceeff5b37d69838106a6348e60db7b8fdd245294dc1d26136265 \ + --hash=sha256:a6342964b43a99dbc72f72812bf88cad8f0217ae9acb47c0d4f141a6416d2d7b \ + --hash=sha256:ad4efa5fad66b903b4a5f96d91461d90b9507a812b3c5de657d544215bb7877a \ + --hash=sha256:bc58025940a896d7e5356952228b68f793cf5fcb342be703c3a2669a1488cb72 \ + --hash=sha256:cc1e1de68c8e5444e8f94c3670bb48a2beef0e91dddfd4fcc29595ebd90bb9ce \ + --hash=sha256:cee3e11161dde1b2a33a904b850b0899e0424cc331b7295f2a9698e79f9a69a0 \ + --hash=sha256:e3556168e2e5c49629f7b0f377070240bd5511e45e25a4497bb0073d9dda776a \ + --hash=sha256:e8477ec6bbfe0312c128e74644ac8a02ca06bcdb8982d4ee06f209be28cdf163 \ + --hash=sha256:ee8f1f7228cce7dffc2b464f07ce769f478968bfb3dd1254a4c2eeed84928aad \ + --hash=sha256:fd57160949179ec517d32ac2ac898b5f20d68ed1a9c977346efbac9c2f1e779d + # via -r requirements.in +click==8.1.3 \ + --hash=sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e \ + --hash=sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48 + # via black +flake8==4.0.1 \ + --hash=sha256:479b1304f72536a55948cb40a32dce8bb0ffe3501e26eaf292c7e60eb5e0428d \ + --hash=sha256:806e034dda44114815e23c16ef92f95c91e4c71100ff52813adf7132a6ad870d + # via -r requirements.in +iniconfig==1.1.1 \ + --hash=sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3 \ + --hash=sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32 + # via pytest +isort==5.10.1 \ + --hash=sha256:6f62d78e2f89b4500b080fe3a81690850cd254227f27f75c3a0c491a1f351ba7 \ + --hash=sha256:e8443a5e7a020e9d7f97f1d7d9cd17c88bcb3bc7e218bf9cf5095fe550be2951 + # via -r requirements.in +maturin==0.12.16 \ + --hash=sha256:29a635699db1f4981b891a4ee51ddcae8c410136ed40103232aea3b5f62e8504 \ + --hash=sha256:4b5fe1de8b8e7ba5a9f52002b24a2d8148a23d1260c7bd59291c319ccc5b31f1 \ + --hash=sha256:54ecff17c64cf5c5dc59ff22745517ea56b791995e70008d1dcd1623ce609f78 \ + --hash=sha256:641c8ed8452cb8a288baf953be78d03e27e60189a64f00cc7bcc1731d158e8f6 \ + --hash=sha256:70a042197fdcb726c911146a1c875f65f596de122a01eeb58af10faf3bd3a2c5 \ + --hash=sha256:781abebb255061b5eda0413ecbac22b88a7ab50ecaee607fe5d8e3c55ab48e52 \ + --hash=sha256:83a8f9378c320e981412f8d367e181af22f145d489a7da0a0c3aea86cf23f048 \ + --hash=sha256:8aeb62a328bf4d9439758b59ccf5360a5f3127bbe58bedbcb6c64e888de3eb36 \ + --hash=sha256:8d11e801216f4c91b2ba9da4bad615ffc3638f80a7ba973245a0154dcfdbee64 \ + --hash=sha256:917f77382cdff55d2f290d0f58b7e6f4a7aaa74b58e2b61e4c67b37786d8a965 \ + --hash=sha256:97756ad5ff113478de237b029add91def0a40af0dc5e120c25e1595addd9c151 \ + --hash=sha256:9ce67a844d63d1ba8cdcf903ee2e6e0b21c0b0461b97c8737751d74002ded4c4 \ + --hash=sha256:a026515e39fd48ee5318de57ddc6841a5fbcd5169b3860fb9ac9ea9521cc6027 \ + --hash=sha256:bc4da52ef0c7e975396e7e6fb90da8858c518b4dccb810ceabec9db7ecedde57 \ + --hash=sha256:d63f60dd5dddb165f824b2d8e593dcb31300d832eb6cbc6288dd484e29dfbd89 \ + --hash=sha256:e5e4e3bfcf209ea1a6d20cade2de1ea716e17ea491a7a8b3fee0e45a10aa1e98 \ + --hash=sha256:e7e3fa53c5207c05d4148ecbc0ce7463b7757989dadebcd8ab3a61c67b874157 + # via -r requirements.in +mccabe==0.6.1 \ + --hash=sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42 \ + --hash=sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f + # via flake8 +mypy==0.950 \ + --hash=sha256:0112752a6ff07230f9ec2f71b0d3d4e088a910fdce454fdb6553e83ed0eced7d \ + --hash=sha256:0384d9f3af49837baa92f559d3fa673e6d2652a16550a9ee07fc08c736f5e6f8 \ + --hash=sha256:1b333cfbca1762ff15808a0ef4f71b5d3eed8528b23ea1c3fb50543c867d68de \ + --hash=sha256:1fdeb0a0f64f2a874a4c1f5271f06e40e1e9779bf55f9567f149466fc7a55038 \ + --hash=sha256:4c653e4846f287051599ed8f4b3c044b80e540e88feec76b11044ddc5612ffed \ + --hash=sha256:563514c7dc504698fb66bb1cf897657a173a496406f1866afae73ab5b3cdb334 \ + --hash=sha256:5b231afd6a6e951381b9ef09a1223b1feabe13625388db48a8690f8daa9b71ff \ + --hash=sha256:5ce6a09042b6da16d773d2110e44f169683d8cc8687e79ec6d1181a72cb028d2 \ + --hash=sha256:5e7647df0f8fc947388e6251d728189cfadb3b1e558407f93254e35abc026e22 \ + --hash=sha256:6003de687c13196e8a1243a5e4bcce617d79b88f83ee6625437e335d89dfebe2 \ + --hash=sha256:61504b9a5ae166ba5ecfed9e93357fd51aa693d3d434b582a925338a2ff57fd2 \ + --hash=sha256:77423570c04aca807508a492037abbd72b12a1fb25a385847d191cd50b2c9605 \ + --hash=sha256:a4d9898f46446bfb6405383b57b96737dcfd0a7f25b748e78ef3e8c576bba3cb \ + --hash=sha256:a952b8bc0ae278fc6316e6384f67bb9a396eb30aced6ad034d3a76120ebcc519 \ + --hash=sha256:b5b5bd0ffb11b4aba2bb6d31b8643902c48f990cc92fda4e21afac658044f0c0 \ + --hash=sha256:ca75ecf2783395ca3016a5e455cb322ba26b6d33b4b413fcdedfc632e67941dc \ + --hash=sha256:cf9c261958a769a3bd38c3e133801ebcd284ffb734ea12d01457cb09eacf7d7b \ + --hash=sha256:dd4d670eee9610bf61c25c940e9ade2d0ed05eb44227275cce88701fee014b1f \ + --hash=sha256:e19736af56947addedce4674c0971e5dceef1b5ec7d667fe86bcd2b07f8f9075 \ + --hash=sha256:eaea21d150fb26d7b4856766e7addcf929119dd19fc832b22e71d942835201ef \ + --hash=sha256:eaff8156016487c1af5ffa5304c3e3fd183edcb412f3e9c72db349faf3f6e0eb \ + --hash=sha256:ee0a36edd332ed2c5208565ae6e3a7afc0eabb53f5327e281f2ef03a6bc7687a \ + --hash=sha256:ef7beb2a3582eb7a9f37beaf38a28acfd801988cde688760aea9e6cc4832b10b + # via -r requirements.in +mypy-extensions==0.4.3 \ + --hash=sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d \ + --hash=sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8 + # via + # black + # mypy +numpy==1.22.3 \ + --hash=sha256:07a8c89a04997625236c5ecb7afe35a02af3896c8aa01890a849913a2309c676 \ + --hash=sha256:08d9b008d0156c70dc392bb3ab3abb6e7a711383c3247b410b39962263576cd4 \ + --hash=sha256:201b4d0552831f7250a08d3b38de0d989d6f6e4658b709a02a73c524ccc6ffce \ + --hash=sha256:2c10a93606e0b4b95c9b04b77dc349b398fdfbda382d2a39ba5a822f669a0123 \ + --hash=sha256:3ca688e1b9b95d80250bca34b11a05e389b1420d00e87a0d12dc45f131f704a1 \ + --hash=sha256:48a3aecd3b997bf452a2dedb11f4e79bc5bfd21a1d4cc760e703c31d57c84b3e \ + --hash=sha256:568dfd16224abddafb1cbcce2ff14f522abe037268514dd7e42c6776a1c3f8e5 \ + --hash=sha256:5bfb1bb598e8229c2d5d48db1860bcf4311337864ea3efdbe1171fb0c5da515d \ + --hash=sha256:639b54cdf6aa4f82fe37ebf70401bbb74b8508fddcf4797f9fe59615b8c5813a \ + --hash=sha256:8251ed96f38b47b4295b1ae51631de7ffa8260b5b087808ef09a39a9d66c97ab \ + --hash=sha256:92bfa69cfbdf7dfc3040978ad09a48091143cffb778ec3b03fa170c494118d75 \ + --hash=sha256:97098b95aa4e418529099c26558eeb8486e66bd1e53a6b606d684d0c3616b168 \ + --hash=sha256:a3bae1a2ed00e90b3ba5f7bd0a7c7999b55d609e0c54ceb2b076a25e345fa9f4 \ + --hash=sha256:c34ea7e9d13a70bf2ab64a2532fe149a9aced424cd05a2c4ba662fd989e3e45f \ + --hash=sha256:dbc7601a3b7472d559dc7b933b18b4b66f9aa7452c120e87dfb33d02008c8a18 \ + --hash=sha256:e7927a589df200c5e23c57970bafbd0cd322459aa7b1ff73b7c2e84d6e3eae62 \ + --hash=sha256:f8c1f39caad2c896bc0018f699882b345b2a63708008be29b1f355ebf6f933fe \ + --hash=sha256:f950f8845b480cffe522913d35567e29dd381b0dc7e4ce6a4a9f9156417d2430 \ + --hash=sha256:fade0d4f4d292b6f39951b6836d7a3c7ef5b2347f3c420cd9820a1d90d794802 \ + --hash=sha256:fdf3c08bce27132395d3c3ba1503cac12e17282358cb4bddc25cc46b0aca07aa + # via + # -r requirements.in + # pyarrow +packaging==21.3 \ + --hash=sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb \ + --hash=sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522 + # via pytest +pathspec==0.9.0 \ + --hash=sha256:7d15c4ddb0b5c802d161efc417ec1a2558ea2653c2e8ad9c19098201dc1c993a \ + --hash=sha256:e564499435a2673d586f6b2130bb5b95f04a3ba06f81b8f895b651a3c76aabb1 + # via black +platformdirs==2.5.2 \ + --hash=sha256:027d8e83a2d7de06bbac4e5ef7e023c02b863d7ea5d079477e722bb41ab25788 \ + --hash=sha256:58c8abb07dcb441e6ee4b11d8df0ac856038f944ab98b7be6b27b2a3c7feef19 + # via black +pluggy==1.0.0 \ + --hash=sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159 \ + --hash=sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3 + # via pytest +py==1.11.0 \ + --hash=sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719 \ + --hash=sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378 + # via pytest +pyarrow==8.0.0 \ + --hash=sha256:03a10daad957970e914920b793f6a49416699e791f4c827927fd4e4d892a5d16 \ + --hash=sha256:15511ce2f50343f3fd5e9f7c30e4d004da9134e9597e93e9c96c3985928cbe82 \ + --hash=sha256:1dd482ccb07c96188947ad94d7536ab696afde23ad172df8e18944ec79f55055 \ + --hash=sha256:25a5f7c7f36df520b0b7363ba9f51c3070799d4b05d587c60c0adaba57763479 \ + --hash=sha256:3bd201af6e01f475f02be88cf1f6ee9856ab98c11d8bbb6f58347c58cd07be00 \ + --hash=sha256:3fee786259d986f8c046100ced54d63b0c8c9f7cdb7d1bbe07dc69e0f928141c \ + --hash=sha256:42b7982301a9ccd06e1dd4fabd2e8e5df74b93ce4c6b87b81eb9e2d86dc79871 \ + --hash=sha256:4a18a211ed888f1ac0b0ebcb99e2d9a3e913a481120ee9b1fe33d3fedb945d4e \ + --hash=sha256:51e58778fcb8829fca37fbfaea7f208d5ce7ea89ea133dd13d8ce745278ee6f0 \ + --hash=sha256:541e7845ce5f27a861eb5b88ee165d931943347eec17b9ff1e308663531c9647 \ + --hash=sha256:65c7f4cc2be195e3db09296d31a654bb6d8786deebcab00f0e2455fd109d7456 \ + --hash=sha256:69b043a3fce064ebd9fbae6abc30e885680296e5bd5e6f7353e6a87966cf2ad7 \ + --hash=sha256:6ea2c54e6b5ecd64e8299d2abb40770fe83a718f5ddc3825ddd5cd28e352cce1 \ + --hash=sha256:78a6ac39cd793582998dac88ab5c1c1dd1e6503df6672f064f33a21937ec1d8d \ + --hash=sha256:81b87b782a1366279411f7b235deab07c8c016e13f9af9f7c7b0ee564fedcc8f \ + --hash=sha256:8392b9a1e837230090fe916415ed4c3433b2ddb1a798e3f6438303c70fbabcfc \ + --hash=sha256:863be6bad6c53797129610930794a3e797cb7d41c0a30e6794a2ac0e42ce41b8 \ + --hash=sha256:8cd86e04a899bef43e25184f4b934584861d787cf7519851a8c031803d45c6d8 \ + --hash=sha256:95c7822eb37663e073da9892f3499fe28e84f3464711a3e555e0c5463fd53a19 \ + --hash=sha256:98c13b2e28a91b0fbf24b483df54a8d7814c074c2623ecef40dce1fa52f6539b \ + --hash=sha256:ba2b7aa7efb59156b87987a06f5241932914e4d5bbb74a465306b00a6c808849 \ + --hash=sha256:c9c97c8e288847e091dfbcdf8ce51160e638346f51919a9e74fe038b2e8aee62 \ + --hash=sha256:cb06cacc19f3b426681f2f6803cc06ff481e7fe5b3a533b406bc5b2138843d4f \ + --hash=sha256:ce64bc1da3109ef5ab9e4c60316945a7239c798098a631358e9ab39f6e5529e9 \ + --hash=sha256:d5ef4372559b191cafe7db8932801eee252bfc35e983304e7d60b6954576a071 \ + --hash=sha256:d6f1e1040413651819074ef5b500835c6c42e6c446532a1ddef8bc5054e8dba5 \ + --hash=sha256:deb400df8f19a90b662babceb6dd12daddda6bb357c216e558b207c0770c7654 \ + --hash=sha256:ea132067ec712d1b1116a841db1c95861508862b21eddbcafefbce8e4b96b867 \ + --hash=sha256:ece333706a94c1221ced8b299042f85fd88b5db802d71be70024433ddf3aecab \ + --hash=sha256:edad25522ad509e534400d6ab98cf1872d30c31bc5e947712bfd57def7af15bb + # via -r requirements.in +pycodestyle==2.8.0 \ + --hash=sha256:720f8b39dde8b293825e7ff02c475f3077124006db4f440dcbc9a20b76548a20 \ + --hash=sha256:eddd5847ef438ea1c7870ca7eb78a9d47ce0cdb4851a5523949f2601d0cbbe7f + # via flake8 +pyflakes==2.4.0 \ + --hash=sha256:05a85c2872edf37a4ed30b0cce2f6093e1d0581f8c19d7393122da7e25b2b24c \ + --hash=sha256:3bb3a3f256f4b7968c9c788781e4ff07dce46bdf12339dcda61053375426ee2e + # via flake8 +pyparsing==3.0.9 \ + --hash=sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb \ + --hash=sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc + # via packaging +pytest==7.1.2 \ + --hash=sha256:13d0e3ccfc2b6e26be000cb6568c832ba67ba32e719443bfe725814d3c42433c \ + --hash=sha256:a06a0425453864a270bc45e71f783330a7428defb4230fb5e6a731fde06ecd45 + # via -r requirements.in +toml==0.10.2 \ + --hash=sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b \ + --hash=sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f + # via -r requirements.in +tomli==2.0.1 \ + --hash=sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc \ + --hash=sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f + # via + # black + # maturin + # mypy + # pytest +typing-extensions==4.2.0 \ + --hash=sha256:6657594ee297170d19f67d55c05852a874e7eb634f4f753dbd667855e07c1708 \ + --hash=sha256:f1c24655a0da0d1b67f07e17a5e6b2a105894e6824b92096378bb3668ef02376 + # via mypy diff --git a/python/requirements-37.txt b/python/requirements-37.txt index e64bebf32..b1776eed7 100644 --- a/python/requirements-37.txt +++ b/python/requirements-37.txt @@ -4,26 +4,48 @@ # # pip-compile --generate-hashes # -attrs==21.2.0 \ - --hash=sha256:149e90d6d8ac20db7a955ad60cf0e6881a3f20d37096140088356da6c716b0b1 \ - --hash=sha256:ef6aaac3ca6cd92904cdd0d83f629a15f18053ec84e6432106f7a4d04ae4f5fb +attrs==21.4.0 \ + --hash=sha256:2d27e3784d7a565d36ab851fe94887c5eccd6a463168875832a1be79c82828b4 \ + --hash=sha256:626ba8234211db98e869df76230a137c4c40a12d72445c45d5f5b716f076e2fd # via pytest -black==21.9b0 \ - --hash=sha256:380f1b5da05e5a1429225676655dddb96f5ae8c75bdf91e53d798871b902a115 \ - --hash=sha256:7de4cfc7eb6b710de325712d40125689101d21d25283eed7e9998722cf10eb91 +black==22.3.0 \ + --hash=sha256:06f9d8846f2340dfac80ceb20200ea5d1b3f181dd0556b47af4e8e0b24fa0a6b \ + --hash=sha256:10dbe6e6d2988049b4655b2b739f98785a884d4d6b85bc35133a8fb9a2233176 \ + --hash=sha256:2497f9c2386572e28921fa8bec7be3e51de6801f7459dffd6e62492531c47e09 \ + --hash=sha256:30d78ba6bf080eeaf0b7b875d924b15cd46fec5fd044ddfbad38c8ea9171043a \ + --hash=sha256:328efc0cc70ccb23429d6be184a15ce613f676bdfc85e5fe8ea2a9354b4e9015 \ + --hash=sha256:35020b8886c022ced9282b51b5a875b6d1ab0c387b31a065b84db7c33085ca79 \ + --hash=sha256:5795a0375eb87bfe902e80e0c8cfaedf8af4d49694d69161e5bd3206c18618bb \ + --hash=sha256:5891ef8abc06576985de8fa88e95ab70641de6c1fca97e2a15820a9b69e51b20 \ + --hash=sha256:637a4014c63fbf42a692d22b55d8ad6968a946b4a6ebc385c5505d9625b6a464 \ + --hash=sha256:67c8301ec94e3bcc8906740fe071391bce40a862b7be0b86fb5382beefecd968 \ + --hash=sha256:6d2fc92002d44746d3e7db7cf9313cf4452f43e9ea77a2c939defce3b10b5c82 \ + --hash=sha256:6ee227b696ca60dd1c507be80a6bc849a5a6ab57ac7352aad1ffec9e8b805f21 \ + --hash=sha256:863714200ada56cbc366dc9ae5291ceb936573155f8bf8e9de92aef51f3ad0f0 \ + --hash=sha256:9b542ced1ec0ceeff5b37d69838106a6348e60db7b8fdd245294dc1d26136265 \ + --hash=sha256:a6342964b43a99dbc72f72812bf88cad8f0217ae9acb47c0d4f141a6416d2d7b \ + --hash=sha256:ad4efa5fad66b903b4a5f96d91461d90b9507a812b3c5de657d544215bb7877a \ + --hash=sha256:bc58025940a896d7e5356952228b68f793cf5fcb342be703c3a2669a1488cb72 \ + --hash=sha256:cc1e1de68c8e5444e8f94c3670bb48a2beef0e91dddfd4fcc29595ebd90bb9ce \ + --hash=sha256:cee3e11161dde1b2a33a904b850b0899e0424cc331b7295f2a9698e79f9a69a0 \ + --hash=sha256:e3556168e2e5c49629f7b0f377070240bd5511e45e25a4497bb0073d9dda776a \ + --hash=sha256:e8477ec6bbfe0312c128e74644ac8a02ca06bcdb8982d4ee06f209be28cdf163 \ + --hash=sha256:ee8f1f7228cce7dffc2b464f07ce769f478968bfb3dd1254a4c2eeed84928aad \ + --hash=sha256:fd57160949179ec517d32ac2ac898b5f20d68ed1a9c977346efbac9c2f1e779d # via -r requirements.in -click==8.0.3 \ - --hash=sha256:353f466495adaeb40b6b5f592f9f91cb22372351c84caeb068132442a4518ef3 \ - --hash=sha256:410e932b050f5eed773c4cda94de75971c89cdb3155a72a0831139a79e5ecb5b +click==8.1.3 \ + --hash=sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e \ + --hash=sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48 # via black flake8==4.0.1 \ --hash=sha256:479b1304f72536a55948cb40a32dce8bb0ffe3501e26eaf292c7e60eb5e0428d \ --hash=sha256:806e034dda44114815e23c16ef92f95c91e4c71100ff52813adf7132a6ad870d # via -r requirements.in -importlib-metadata==4.2.0 \ +importlib-metadata==4.2.0 ; python_version < "3.8" \ --hash=sha256:057e92c15bc8d9e8109738a48db0ccb31b4d9d5cfbee5a8670879a30be66304b \ --hash=sha256:b7e52a1f8dec14a75ea73e0891f3060099ca1d8e6a462a4dff11c3e119ea1b31 # via + # -r requirements.in # click # flake8 # pluggy @@ -32,51 +54,57 @@ iniconfig==1.1.1 \ --hash=sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3 \ --hash=sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32 # via pytest -isort==5.9.3 \ - --hash=sha256:9c2ea1e62d871267b78307fe511c0838ba0da28698c5732d54e2790bf3ba9899 \ - --hash=sha256:e17d6e2b81095c9db0a03a8025a957f334d6ea30b26f9ec70805411e5c7c81f2 +isort==5.10.1 \ + --hash=sha256:6f62d78e2f89b4500b080fe3a81690850cd254227f27f75c3a0c491a1f351ba7 \ + --hash=sha256:e8443a5e7a020e9d7f97f1d7d9cd17c88bcb3bc7e218bf9cf5095fe550be2951 # via -r requirements.in -maturin==0.11.5 \ - --hash=sha256:07074778b063a439fdfd5501bd1d1823a216ec5b657d3ecde78fd7f2c4782422 \ - --hash=sha256:1ce666c386ff9c3c2b5d7d3ca4b1f9f675c38d7540ffbda0d5d5bc7d6ddde49a \ - --hash=sha256:20f9c30701c9932ed8026ceaf896fc77ecc76cebd6a182668dbc10ed597f8789 \ - --hash=sha256:3354d030b88c938a33bf407a6c0f79ccdd2cce3e1e3e4a2d0c92dc2e063adc6e \ - --hash=sha256:4191b0b7362b3025096faf126ff15cb682fbff324ac4a6ca18d55bb16e2b759b \ - --hash=sha256:70381be1585cb9fa5c02b83af80ae661aaad959e8aa0fddcfe195b004054bd69 \ - --hash=sha256:7bf96e7586bfdb5b0fadc6d662534b8a41123b33dff084fa383a81ded0ce5334 \ - --hash=sha256:ab2b3ccf66f5e0f9c3904d215835337b1bd305e79e3bf53b65bbc80a5755e01b \ - --hash=sha256:b0ac45879a7d624b47d72b093ae3370270894c19779f42aad7568a92951c5d47 \ - --hash=sha256:c2ded8b4ef9210d627bb966bc67661b7db259535f6062afe1ce5605406b50f3f \ - --hash=sha256:d78f24561a5e02f7d119b348b26e5772ad5698a43ca49e8facb9ce77cf273714 +maturin==0.12.16 \ + --hash=sha256:29a635699db1f4981b891a4ee51ddcae8c410136ed40103232aea3b5f62e8504 \ + --hash=sha256:4b5fe1de8b8e7ba5a9f52002b24a2d8148a23d1260c7bd59291c319ccc5b31f1 \ + --hash=sha256:54ecff17c64cf5c5dc59ff22745517ea56b791995e70008d1dcd1623ce609f78 \ + --hash=sha256:641c8ed8452cb8a288baf953be78d03e27e60189a64f00cc7bcc1731d158e8f6 \ + --hash=sha256:70a042197fdcb726c911146a1c875f65f596de122a01eeb58af10faf3bd3a2c5 \ + --hash=sha256:781abebb255061b5eda0413ecbac22b88a7ab50ecaee607fe5d8e3c55ab48e52 \ + --hash=sha256:83a8f9378c320e981412f8d367e181af22f145d489a7da0a0c3aea86cf23f048 \ + --hash=sha256:8aeb62a328bf4d9439758b59ccf5360a5f3127bbe58bedbcb6c64e888de3eb36 \ + --hash=sha256:8d11e801216f4c91b2ba9da4bad615ffc3638f80a7ba973245a0154dcfdbee64 \ + --hash=sha256:917f77382cdff55d2f290d0f58b7e6f4a7aaa74b58e2b61e4c67b37786d8a965 \ + --hash=sha256:97756ad5ff113478de237b029add91def0a40af0dc5e120c25e1595addd9c151 \ + --hash=sha256:9ce67a844d63d1ba8cdcf903ee2e6e0b21c0b0461b97c8737751d74002ded4c4 \ + --hash=sha256:a026515e39fd48ee5318de57ddc6841a5fbcd5169b3860fb9ac9ea9521cc6027 \ + --hash=sha256:bc4da52ef0c7e975396e7e6fb90da8858c518b4dccb810ceabec9db7ecedde57 \ + --hash=sha256:d63f60dd5dddb165f824b2d8e593dcb31300d832eb6cbc6288dd484e29dfbd89 \ + --hash=sha256:e5e4e3bfcf209ea1a6d20cade2de1ea716e17ea491a7a8b3fee0e45a10aa1e98 \ + --hash=sha256:e7e3fa53c5207c05d4148ecbc0ce7463b7757989dadebcd8ab3a61c67b874157 # via -r requirements.in mccabe==0.6.1 \ --hash=sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42 \ --hash=sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f # via flake8 -mypy==0.910 \ - --hash=sha256:088cd9c7904b4ad80bec811053272986611b84221835e079be5bcad029e79dd9 \ - --hash=sha256:0aadfb2d3935988ec3815952e44058a3100499f5be5b28c34ac9d79f002a4a9a \ - --hash=sha256:119bed3832d961f3a880787bf621634ba042cb8dc850a7429f643508eeac97b9 \ - --hash=sha256:1a85e280d4d217150ce8cb1a6dddffd14e753a4e0c3cf90baabb32cefa41b59e \ - --hash=sha256:3c4b8ca36877fc75339253721f69603a9c7fdb5d4d5a95a1a1b899d8b86a4de2 \ - --hash=sha256:3e382b29f8e0ccf19a2df2b29a167591245df90c0b5a2542249873b5c1d78212 \ - --hash=sha256:42c266ced41b65ed40a282c575705325fa7991af370036d3f134518336636f5b \ - --hash=sha256:53fd2eb27a8ee2892614370896956af2ff61254c275aaee4c230ae771cadd885 \ - --hash=sha256:704098302473cb31a218f1775a873b376b30b4c18229421e9e9dc8916fd16150 \ - --hash=sha256:7df1ead20c81371ccd6091fa3e2878559b5c4d4caadaf1a484cf88d93ca06703 \ - --hash=sha256:866c41f28cee548475f146aa4d39a51cf3b6a84246969f3759cb3e9c742fc072 \ - --hash=sha256:a155d80ea6cee511a3694b108c4494a39f42de11ee4e61e72bc424c490e46457 \ - --hash=sha256:adaeee09bfde366d2c13fe6093a7df5df83c9a2ba98638c7d76b010694db760e \ - --hash=sha256:b6fb13123aeef4a3abbcfd7e71773ff3ff1526a7d3dc538f3929a49b42be03f0 \ - --hash=sha256:b94e4b785e304a04ea0828759172a15add27088520dc7e49ceade7834275bedb \ - --hash=sha256:c0df2d30ed496a08de5daed2a9ea807d07c21ae0ab23acf541ab88c24b26ab97 \ - --hash=sha256:c6c2602dffb74867498f86e6129fd52a2770c48b7cd3ece77ada4fa38f94eba8 \ - --hash=sha256:ceb6e0a6e27fb364fb3853389607cf7eb3a126ad335790fa1e14ed02fba50811 \ - --hash=sha256:d9dd839eb0dc1bbe866a288ba3c1afc33a202015d2ad83b31e875b5905a079b6 \ - --hash=sha256:e4dab234478e3bd3ce83bac4193b2ecd9cf94e720ddd95ce69840273bf44f6de \ - --hash=sha256:ec4e0cd079db280b6bdabdc807047ff3e199f334050db5cbb91ba3e959a67504 \ - --hash=sha256:ecd2c3fe726758037234c93df7e98deb257fd15c24c9180dacf1ef829da5f921 \ - --hash=sha256:ef565033fa5a958e62796867b1df10c40263ea9ded87164d67572834e57a174d +mypy==0.950 \ + --hash=sha256:0112752a6ff07230f9ec2f71b0d3d4e088a910fdce454fdb6553e83ed0eced7d \ + --hash=sha256:0384d9f3af49837baa92f559d3fa673e6d2652a16550a9ee07fc08c736f5e6f8 \ + --hash=sha256:1b333cfbca1762ff15808a0ef4f71b5d3eed8528b23ea1c3fb50543c867d68de \ + --hash=sha256:1fdeb0a0f64f2a874a4c1f5271f06e40e1e9779bf55f9567f149466fc7a55038 \ + --hash=sha256:4c653e4846f287051599ed8f4b3c044b80e540e88feec76b11044ddc5612ffed \ + --hash=sha256:563514c7dc504698fb66bb1cf897657a173a496406f1866afae73ab5b3cdb334 \ + --hash=sha256:5b231afd6a6e951381b9ef09a1223b1feabe13625388db48a8690f8daa9b71ff \ + --hash=sha256:5ce6a09042b6da16d773d2110e44f169683d8cc8687e79ec6d1181a72cb028d2 \ + --hash=sha256:5e7647df0f8fc947388e6251d728189cfadb3b1e558407f93254e35abc026e22 \ + --hash=sha256:6003de687c13196e8a1243a5e4bcce617d79b88f83ee6625437e335d89dfebe2 \ + --hash=sha256:61504b9a5ae166ba5ecfed9e93357fd51aa693d3d434b582a925338a2ff57fd2 \ + --hash=sha256:77423570c04aca807508a492037abbd72b12a1fb25a385847d191cd50b2c9605 \ + --hash=sha256:a4d9898f46446bfb6405383b57b96737dcfd0a7f25b748e78ef3e8c576bba3cb \ + --hash=sha256:a952b8bc0ae278fc6316e6384f67bb9a396eb30aced6ad034d3a76120ebcc519 \ + --hash=sha256:b5b5bd0ffb11b4aba2bb6d31b8643902c48f990cc92fda4e21afac658044f0c0 \ + --hash=sha256:ca75ecf2783395ca3016a5e455cb322ba26b6d33b4b413fcdedfc632e67941dc \ + --hash=sha256:cf9c261958a769a3bd38c3e133801ebcd284ffb734ea12d01457cb09eacf7d7b \ + --hash=sha256:dd4d670eee9610bf61c25c940e9ade2d0ed05eb44227275cce88701fee014b1f \ + --hash=sha256:e19736af56947addedce4674c0971e5dceef1b5ec7d667fe86bcd2b07f8f9075 \ + --hash=sha256:eaea21d150fb26d7b4856766e7addcf929119dd19fc832b22e71d942835201ef \ + --hash=sha256:eaff8156016487c1af5ffa5304c3e3fd183edcb412f3e9c72db349faf3f6e0eb \ + --hash=sha256:ee0a36edd332ed2c5208565ae6e3a7afc0eabb53f5327e281f2ef03a6bc7687a \ + --hash=sha256:ef7beb2a3582eb7a9f37beaf38a28acfd801988cde688760aea9e6cc4832b10b # via -r requirements.in mypy-extensions==0.4.3 \ --hash=sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d \ @@ -84,124 +112,92 @@ mypy-extensions==0.4.3 \ # via # black # mypy -numpy==1.21.3 \ - --hash=sha256:043e83bfc274649c82a6f09836943e4a4aebe5e33656271c7dbf9621dd58b8ec \ - --hash=sha256:160ccc1bed3a8371bf0d760971f09bfe80a3e18646620e9ded0ad159d9749baa \ - --hash=sha256:188031f833bbb623637e66006cf75e933e00e7231f67e2b45cf8189612bb5dc3 \ - --hash=sha256:28f15209fb535dd4c504a7762d3bc440779b0e37d50ed810ced209e5cea60d96 \ - --hash=sha256:29fb3dcd0468b7715f8ce2c0c2d9bbbaf5ae686334951343a41bd8d155c6ea27 \ - --hash=sha256:2a6ee9620061b2a722749b391c0d80a0e2ae97290f1b32e28d5a362e21941ee4 \ - --hash=sha256:300321e3985c968e3ae7fbda187237b225f3ffe6528395a5b7a5407f73cf093e \ - --hash=sha256:32437f0b275c1d09d9c3add782516413e98cd7c09e6baf4715cbce781fc29912 \ - --hash=sha256:3c09418a14471c7ae69ba682e2428cae5b4420a766659605566c0fa6987f6b7e \ - --hash=sha256:49c6249260890e05b8111ebfc391ed58b3cb4b33e63197b2ec7f776e45330721 \ - --hash=sha256:4cc9b512e9fb590797474f58b7f6d1f1b654b3a94f4fa8558b48ca8b3cfc97cf \ - --hash=sha256:508b0b513fa1266875524ba8a9ecc27b02ad771fe1704a16314dc1a816a68737 \ - --hash=sha256:50cd26b0cf6664cb3b3dd161ba0a09c9c1343db064e7c69f9f8b551f5104d654 \ - --hash=sha256:5c4193f70f8069550a1788bd0cd3268ab7d3a2b70583dfe3b2e7f421e9aace06 \ - --hash=sha256:5dfe9d6a4c39b8b6edd7990091fea4f852888e41919d0e6722fe78dd421db0eb \ - --hash=sha256:63571bb7897a584ca3249c86dd01c10bcb5fe4296e3568b2e9c1a55356b6410e \ - --hash=sha256:75621882d2230ab77fb6a03d4cbccd2038511491076e7964ef87306623aa5272 \ - --hash=sha256:75eb7cadc8da49302f5b659d40ba4f6d94d5045fbd9569c9d058e77b0514c9e4 \ - --hash=sha256:88a5d6b268e9ad18f3533e184744acdaa2e913b13148160b1152300c949bbb5f \ - --hash=sha256:8a10968963640e75cc0193e1847616ab4c718e83b6938ae74dea44953950f6b7 \ - --hash=sha256:90bec6a86b348b4559b6482e2b684db4a9a7eed1fa054b86115a48d58fbbf62a \ - --hash=sha256:98339aa9911853f131de11010f6dd94c8cec254d3d1f7261528c3b3e3219f139 \ - --hash=sha256:a99a6b067e5190ac6d12005a4d85aa6227c5606fa93211f86b1dafb16233e57d \ - --hash=sha256:bffa2eee3b87376cc6b31eee36d05349571c236d1de1175b804b348dc0941e3f \ - --hash=sha256:c6c2d535a7beb1f8790aaa98fd089ceab2e3dd7ca48aca0af7dc60e6ef93ffe1 \ - --hash=sha256:cc14e7519fab2a4ed87d31f99c31a3796e4e1fe63a86ebdd1c5a1ea78ebd5896 \ - --hash=sha256:dd0482f3fc547f1b1b5d6a8b8e08f63fdc250c58ce688dedd8851e6e26cff0f3 \ - --hash=sha256:dde972a1e11bb7b702ed0e447953e7617723760f420decb97305e66fb4afc54f \ - --hash=sha256:e54af82d68ef8255535a6cdb353f55d6b8cf418a83e2be3569243787a4f4866f \ - --hash=sha256:e606e6316911471c8d9b4618e082635cfe98876007556e89ce03d52ff5e8fcf0 \ - --hash=sha256:f41b018f126aac18583956c54544db437f25c7ee4794bcb23eb38bef8e5e192a \ - --hash=sha256:f8f4625536926a155b80ad2bbff44f8cc59e9f2ad14cdda7acf4c135b4dc8ff2 \ - --hash=sha256:fe52dbe47d9deb69b05084abd4b0df7abb39a3c51957c09f635520abd49b29dd +numpy==1.21.6 \ + --hash=sha256:1dbe1c91269f880e364526649a52eff93ac30035507ae980d2fed33aaee633ac \ + --hash=sha256:357768c2e4451ac241465157a3e929b265dfac85d9214074985b1786244f2ef3 \ + --hash=sha256:3820724272f9913b597ccd13a467cc492a0da6b05df26ea09e78b171a0bb9da6 \ + --hash=sha256:4391bd07606be175aafd267ef9bea87cf1b8210c787666ce82073b05f202add1 \ + --hash=sha256:4aa48afdce4660b0076a00d80afa54e8a97cd49f457d68a4342d188a09451c1a \ + --hash=sha256:58459d3bad03343ac4b1b42ed14d571b8743dc80ccbf27444f266729df1d6f5b \ + --hash=sha256:5c3c8def4230e1b959671eb959083661b4a0d2e9af93ee339c7dada6759a9470 \ + --hash=sha256:5f30427731561ce75d7048ac254dbe47a2ba576229250fb60f0fb74db96501a1 \ + --hash=sha256:643843bcc1c50526b3a71cd2ee561cf0d8773f062c8cbaf9ffac9fdf573f83ab \ + --hash=sha256:67c261d6c0a9981820c3a149d255a76918278a6b03b6a036800359aba1256d46 \ + --hash=sha256:67f21981ba2f9d7ba9ade60c9e8cbaa8cf8e9ae51673934480e45cf55e953673 \ + --hash=sha256:6aaf96c7f8cebc220cdfc03f1d5a31952f027dda050e5a703a0d1c396075e3e7 \ + --hash=sha256:7c4068a8c44014b2d55f3c3f574c376b2494ca9cc73d2f1bd692382b6dffe3db \ + --hash=sha256:7c7e5fa88d9ff656e067876e4736379cc962d185d5cd808014a8a928d529ef4e \ + --hash=sha256:7f5ae4f304257569ef3b948810816bc87c9146e8c446053539947eedeaa32786 \ + --hash=sha256:82691fda7c3f77c90e62da69ae60b5ac08e87e775b09813559f8901a88266552 \ + --hash=sha256:8737609c3bbdd48e380d463134a35ffad3b22dc56295eff6f79fd85bd0eeeb25 \ + --hash=sha256:9f411b2c3f3d76bba0865b35a425157c5dcf54937f82bbeb3d3c180789dd66a6 \ + --hash=sha256:a6be4cb0ef3b8c9250c19cc122267263093eee7edd4e3fa75395dfda8c17a8e2 \ + --hash=sha256:bcb238c9c96c00d3085b264e5c1a1207672577b93fa666c3b14a45240b14123a \ + --hash=sha256:bf2ec4b75d0e9356edea834d1de42b31fe11f726a81dfb2c2112bc1eaa508fcf \ + --hash=sha256:d136337ae3cc69aa5e447e78d8e1514be8c3ec9b54264e680cf0b4bd9011574f \ + --hash=sha256:d4bf4d43077db55589ffc9009c0ba0a94fa4908b9586d6ccce2e0b164c86303c \ + --hash=sha256:d6a96eef20f639e6a97d23e57dd0c1b1069a7b4fd7027482a4c5c451cd7732f4 \ + --hash=sha256:d9caa9d5e682102453d96a0ee10c7241b72859b01a941a397fd965f23b3e016b \ + --hash=sha256:dd1c8f6bd65d07d3810b90d02eba7997e32abbdf1277a481d698969e921a3be0 \ + --hash=sha256:e31f0bb5928b793169b87e3d1e070f2342b22d5245c755e2b81caa29756246c3 \ + --hash=sha256:ecb55251139706669fdec2ff073c98ef8e9a84473e51e716211b41aa0f18e656 \ + --hash=sha256:ee5ec40fdd06d62fe5d4084bef4fd50fd4bb6bfd2bf519365f569dc470163ab0 \ + --hash=sha256:f17e562de9edf691a42ddb1eb4a5541c20dd3f9e65b09ded2beb0799c0cf29bb \ + --hash=sha256:fdffbfb6832cd0b300995a2b08b8f6fa9f6e856d562800fea9182316d99c4e8e # via # -r requirements.in - # pandas # pyarrow -packaging==21.0 \ - --hash=sha256:7dc96269f53a4ccec5c0670940a4281106dd0bb343f47b7471f779df49c2fbe7 \ - --hash=sha256:c86254f9220d55e31cc94d69bade760f0847da8000def4dfe1c6b872fd14ff14 +packaging==21.3 \ + --hash=sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb \ + --hash=sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522 # via pytest -pandas==1.3.4 \ - --hash=sha256:003ba92db58b71a5f8add604a17a059f3068ef4e8c0c365b088468d0d64935fd \ - --hash=sha256:10e10a2527db79af6e830c3d5842a4d60383b162885270f8cffc15abca4ba4a9 \ - --hash=sha256:22808afb8f96e2269dcc5b846decacb2f526dd0b47baebc63d913bf847317c8f \ - --hash=sha256:2d1dc09c0013d8faa7474574d61b575f9af6257ab95c93dcf33a14fd8d2c1bab \ - --hash=sha256:35c77609acd2e4d517da41bae0c11c70d31c87aae8dd1aabd2670906c6d2c143 \ - --hash=sha256:372d72a3d8a5f2dbaf566a5fa5fa7f230842ac80f29a931fb4b071502cf86b9a \ - --hash=sha256:42493f8ae67918bf129869abea8204df899902287a7f5eaf596c8e54e0ac7ff4 \ - --hash=sha256:5298a733e5bfbb761181fd4672c36d0c627320eb999c59c65156c6a90c7e1b4f \ - --hash=sha256:5ba0aac1397e1d7b654fccf263a4798a9e84ef749866060d19e577e927d66e1b \ - --hash=sha256:a2aa18d3f0b7d538e21932f637fbfe8518d085238b429e4790a35e1e44a96ffc \ - --hash=sha256:a388960f979665b447f0847626e40f99af8cf191bce9dc571d716433130cb3a7 \ - --hash=sha256:a51528192755f7429c5bcc9e80832c517340317c861318fea9cea081b57c9afd \ - --hash=sha256:b528e126c13816a4374e56b7b18bfe91f7a7f6576d1aadba5dee6a87a7f479ae \ - --hash=sha256:c1aa4de4919358c5ef119f6377bc5964b3a7023c23e845d9db7d9016fa0c5b1c \ - --hash=sha256:c2646458e1dce44df9f71a01dc65f7e8fa4307f29e5c0f2f92c97f47a5bf22f5 \ - --hash=sha256:d47750cf07dee6b55d8423471be70d627314277976ff2edd1381f02d52dbadf9 \ - --hash=sha256:d99d2350adb7b6c3f7f8f0e5dfb7d34ff8dd4bc0a53e62c445b7e43e163fce63 \ - --hash=sha256:dd324f8ee05925ee85de0ea3f0d66e1362e8c80799eb4eb04927d32335a3e44a \ - --hash=sha256:eaca36a80acaacb8183930e2e5ad7f71539a66805d6204ea88736570b2876a7b \ - --hash=sha256:f567e972dce3bbc3a8076e0b675273b4a9e8576ac629149cf8286ee13c259ae5 \ - --hash=sha256:fe48e4925455c964db914b958f6e7032d285848b7538a5e1b19aeb26ffaea3ec - # via -r requirements.in pathspec==0.9.0 \ --hash=sha256:7d15c4ddb0b5c802d161efc417ec1a2558ea2653c2e8ad9c19098201dc1c993a \ --hash=sha256:e564499435a2673d586f6b2130bb5b95f04a3ba06f81b8f895b651a3c76aabb1 # via black -platformdirs==2.4.0 \ - --hash=sha256:367a5e80b3d04d2428ffa76d33f124cf11e8fff2acdaa9b43d545f5c7d661ef2 \ - --hash=sha256:8868bbe3c3c80d42f20156f22e7131d2fb321f5bc86a2a345375c6481a67021d +platformdirs==2.5.2 \ + --hash=sha256:027d8e83a2d7de06bbac4e5ef7e023c02b863d7ea5d079477e722bb41ab25788 \ + --hash=sha256:58c8abb07dcb441e6ee4b11d8df0ac856038f944ab98b7be6b27b2a3c7feef19 # via black pluggy==1.0.0 \ --hash=sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159 \ --hash=sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3 # via pytest -py==1.10.0 \ - --hash=sha256:21b81bda15b66ef5e1a777a21c4dcd9c20ad3efd0b3f817e7a809035269e1bd3 \ - --hash=sha256:3b80836aa6d1feeaa108e046da6423ab8f6ceda6468545ae8d02d9d58d18818a +py==1.11.0 \ + --hash=sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719 \ + --hash=sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378 # via pytest -pyarrow==6.0.0 \ - --hash=sha256:004185e0babc6f3c3fba6ba4f106e406a0113d0f82bb9ad9a8571a1978c45d04 \ - --hash=sha256:0204e80777ab8f4e9abd3a765a8ec07ed1e3c4630bacda50d2ce212ef0f3826f \ - --hash=sha256:072c1a0fca4509eefd7d018b78542fb7e5c63aaf5698f1c0a6e45628ae17ba44 \ - --hash=sha256:15dc0d673d3f865ca63c877bd7a2eced70b0a08969fb733a28247134b8a1f18b \ - --hash=sha256:1c38263ea438a1666b13372e7565450cfeec32dbcd1c2595749476a58465eaec \ - --hash=sha256:281ce5fa03621d786a9beb514abb09846db7f0221b50eabf543caa24037eaacd \ - --hash=sha256:2d2c681659396c745e4f1988d5dd41dcc3ad557bb8d4a8c2e44030edafc08a91 \ - --hash=sha256:376c4b5f248ae63df21fe15c194e9013753164be2d38f4b3fb8bde63ac5a1958 \ - --hash=sha256:465f87fa0be0b2928b2beeba22b5813a0203fb05d90fd8563eea48e08ecc030e \ - --hash=sha256:477c746ef42c039348a288584800e299456c80c5691401bb9b19aa9c02a427b7 \ - --hash=sha256:5144bd9db2920c7cb566c96462d62443cc239104f94771d110f74393f2fb42a2 \ - --hash=sha256:5408fa8d623e66a0445f3fb0e4027fd219bf99bfb57422d543d7b7876e2c5b55 \ - --hash=sha256:5be62679201c441356d3f2a739895dcc8d4d299f2a6eabcd2163bfb6a898abba \ - --hash=sha256:5c666bc6a1cebf01206e2dc1ab05f25f39f35d3a499e0ef5cd635225e07306ca \ - --hash=sha256:6163d82cca7541774b00503c295fe86a1722820eddb958b57f091bb6f5b0a6db \ - --hash=sha256:6a1d9a2f4ee812ed0bd4182cabef99ea914ac297274f0de086f2488093d284ef \ - --hash=sha256:7a683f71b848eb6310b4ec48c0def55dac839e9994c1ac874c9b2d3d5625def1 \ - --hash=sha256:82fe80309e01acf29e3943a1f6d3c98ec109fe1d356bc1ac37d639bcaadcf684 \ - --hash=sha256:8c23f8cdecd3d9e49f9b0f9a651ae5549d1d32fd4901fb1bdc2d327edfba844f \ - --hash=sha256:8d41dfb09ba9236cca6245f33088eb42f3c54023da281139241e0f9f3b4b754e \ - --hash=sha256:a19e58dfb04e451cd8b7bdec3ac8848373b95dfc53492c9a69789aa9074a3c1b \ - --hash=sha256:a50d2f77b86af38ceabf45617208b9105d20e7a5eebc584e7c8c0acededd82ce \ - --hash=sha256:a5bed4f948c032c40597302e9bdfa65f62295240306976ecbe43a54924c6f94f \ - --hash=sha256:ac941a147d14993987cc8b605b721735a34b3e54d167302501fb4db1ad7382c7 \ - --hash=sha256:b86d175262db1eb46afdceb36d459409eb6f8e532d3dec162f8bf572c7f57623 \ - --hash=sha256:bf3400780c4d3c9cb43b1e8a1aaf2e1b7199a0572d0a645529d2784e4d0d8497 \ - --hash=sha256:c7a6e7e0bf8779e9c3428ced85507541f3da9a0675e2f4781d4eb2c7042cbf81 \ - --hash=sha256:cc1d4a70efd583befe92d4ea6f74ed2e0aa31ccdde767cd5cae8e77c65a1c2d4 \ - --hash=sha256:d046dc78a9337baa6415be915c5a16222505233e238a1017f368243c89817eea \ - --hash=sha256:da7860688c33ca88ac05f1a487d32d96d9caa091412496c35f3d1d832145675a \ - --hash=sha256:ddf2e6e3b321adaaf716f2d5af8e92d205a9671e0cb7c0779710a567fd1dd580 \ - --hash=sha256:e81508239a71943759cee272ce625ae208092dd36ef2c6713fccee30bbcf52bb \ - --hash=sha256:ea64a48a85c631eb2a0ea13ccdec5143c85b5897836b16331ee4289d27a57247 \ - --hash=sha256:ed0be080cf595ea15ff1c9ff4097bbf1fcc4b50847d98c0a3c0412fbc6ede7e9 \ - --hash=sha256:fb701ec4a94b92102606d4e88f0b8eba34f09a5ad8e014eaa4af76f42b7f62ae \ - --hash=sha256:fbda7595f24a639bcef3419ecfac17216efacb09f7b0f1b4c4c97f900d65ca0e +pyarrow==8.0.0 \ + --hash=sha256:03a10daad957970e914920b793f6a49416699e791f4c827927fd4e4d892a5d16 \ + --hash=sha256:15511ce2f50343f3fd5e9f7c30e4d004da9134e9597e93e9c96c3985928cbe82 \ + --hash=sha256:1dd482ccb07c96188947ad94d7536ab696afde23ad172df8e18944ec79f55055 \ + --hash=sha256:25a5f7c7f36df520b0b7363ba9f51c3070799d4b05d587c60c0adaba57763479 \ + --hash=sha256:3bd201af6e01f475f02be88cf1f6ee9856ab98c11d8bbb6f58347c58cd07be00 \ + --hash=sha256:3fee786259d986f8c046100ced54d63b0c8c9f7cdb7d1bbe07dc69e0f928141c \ + --hash=sha256:42b7982301a9ccd06e1dd4fabd2e8e5df74b93ce4c6b87b81eb9e2d86dc79871 \ + --hash=sha256:4a18a211ed888f1ac0b0ebcb99e2d9a3e913a481120ee9b1fe33d3fedb945d4e \ + --hash=sha256:51e58778fcb8829fca37fbfaea7f208d5ce7ea89ea133dd13d8ce745278ee6f0 \ + --hash=sha256:541e7845ce5f27a861eb5b88ee165d931943347eec17b9ff1e308663531c9647 \ + --hash=sha256:65c7f4cc2be195e3db09296d31a654bb6d8786deebcab00f0e2455fd109d7456 \ + --hash=sha256:69b043a3fce064ebd9fbae6abc30e885680296e5bd5e6f7353e6a87966cf2ad7 \ + --hash=sha256:6ea2c54e6b5ecd64e8299d2abb40770fe83a718f5ddc3825ddd5cd28e352cce1 \ + --hash=sha256:78a6ac39cd793582998dac88ab5c1c1dd1e6503df6672f064f33a21937ec1d8d \ + --hash=sha256:81b87b782a1366279411f7b235deab07c8c016e13f9af9f7c7b0ee564fedcc8f \ + --hash=sha256:8392b9a1e837230090fe916415ed4c3433b2ddb1a798e3f6438303c70fbabcfc \ + --hash=sha256:863be6bad6c53797129610930794a3e797cb7d41c0a30e6794a2ac0e42ce41b8 \ + --hash=sha256:8cd86e04a899bef43e25184f4b934584861d787cf7519851a8c031803d45c6d8 \ + --hash=sha256:95c7822eb37663e073da9892f3499fe28e84f3464711a3e555e0c5463fd53a19 \ + --hash=sha256:98c13b2e28a91b0fbf24b483df54a8d7814c074c2623ecef40dce1fa52f6539b \ + --hash=sha256:ba2b7aa7efb59156b87987a06f5241932914e4d5bbb74a465306b00a6c808849 \ + --hash=sha256:c9c97c8e288847e091dfbcdf8ce51160e638346f51919a9e74fe038b2e8aee62 \ + --hash=sha256:cb06cacc19f3b426681f2f6803cc06ff481e7fe5b3a533b406bc5b2138843d4f \ + --hash=sha256:ce64bc1da3109ef5ab9e4c60316945a7239c798098a631358e9ab39f6e5529e9 \ + --hash=sha256:d5ef4372559b191cafe7db8932801eee252bfc35e983304e7d60b6954576a071 \ + --hash=sha256:d6f1e1040413651819074ef5b500835c6c42e6c446532a1ddef8bc5054e8dba5 \ + --hash=sha256:deb400df8f19a90b662babceb6dd12daddda6bb357c216e558b207c0770c7654 \ + --hash=sha256:ea132067ec712d1b1116a841db1c95861508862b21eddbcafefbce8e4b96b867 \ + --hash=sha256:ece333706a94c1221ced8b299042f85fd88b5db802d71be70024433ddf3aecab \ + --hash=sha256:edad25522ad509e534400d6ab98cf1872d30c31bc5e947712bfd57def7af15bb # via -r requirements.in pycodestyle==2.8.0 \ --hash=sha256:720f8b39dde8b293825e7ff02c475f3077124006db4f440dcbc9a20b76548a20 \ @@ -211,119 +207,62 @@ pyflakes==2.4.0 \ --hash=sha256:05a85c2872edf37a4ed30b0cce2f6093e1d0581f8c19d7393122da7e25b2b24c \ --hash=sha256:3bb3a3f256f4b7968c9c788781e4ff07dce46bdf12339dcda61053375426ee2e # via flake8 -pyparsing==3.0.3 \ - --hash=sha256:9e3511118010f112a4b4b435ae50e1eaa610cda191acb9e421d60cf5fde83455 \ - --hash=sha256:f8d3fe9fc404576c5164f0f0c4e382c96b85265e023c409c43d48f65da9d60d0 +pyparsing==3.0.9 \ + --hash=sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb \ + --hash=sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc # via packaging -pytest==6.2.5 \ - --hash=sha256:131b36680866a76e6781d13f101efb86cf674ebb9762eb70d3082b6f29889e89 \ - --hash=sha256:7310f8d27bc79ced999e760ca304d69f6ba6c6649c0b60fb0e04a4a77cacc134 +pytest==7.1.2 \ + --hash=sha256:13d0e3ccfc2b6e26be000cb6568c832ba67ba32e719443bfe725814d3c42433c \ + --hash=sha256:a06a0425453864a270bc45e71f783330a7428defb4230fb5e6a731fde06ecd45 # via -r requirements.in -python-dateutil==2.8.2 \ - --hash=sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86 \ - --hash=sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9 - # via pandas -pytz==2021.3 \ - --hash=sha256:3672058bc3453457b622aab7a1c3bfd5ab0bdae451512f6cf25f64ed37f5b87c \ - --hash=sha256:acad2d8b20a1af07d4e4c9d2e9285c5ed9104354062f275f3fcd88dcef4f1326 - # via pandas -regex==2021.10.23 \ - --hash=sha256:0c186691a7995ef1db61205e00545bf161fb7b59cdb8c1201c89b333141c438a \ - --hash=sha256:0dcc0e71118be8c69252c207630faf13ca5e1b8583d57012aae191e7d6d28b84 \ - --hash=sha256:0f7552429dd39f70057ac5d0e897e5bfe211629652399a21671e53f2a9693a4e \ - --hash=sha256:129472cd06062fb13e7b4670a102951a3e655e9b91634432cfbdb7810af9d710 \ - --hash=sha256:13ec99df95003f56edcd307db44f06fbeb708c4ccdcf940478067dd62353181e \ - --hash=sha256:1f2b59c28afc53973d22e7bc18428721ee8ca6079becf1b36571c42627321c65 \ - --hash=sha256:2b20f544cbbeffe171911f6ce90388ad36fe3fad26b7c7a35d4762817e9ea69c \ - --hash=sha256:2fb698037c35109d3c2e30f2beb499e5ebae6e4bb8ff2e60c50b9a805a716f79 \ - --hash=sha256:34d870f9f27f2161709054d73646fc9aca49480617a65533fc2b4611c518e455 \ - --hash=sha256:391703a2abf8013d95bae39145d26b4e21531ab82e22f26cd3a181ee2644c234 \ - --hash=sha256:450dc27483548214314640c89a0f275dbc557968ed088da40bde7ef8fb52829e \ - --hash=sha256:45b65d6a275a478ac2cbd7fdbf7cc93c1982d613de4574b56fd6972ceadb8395 \ - --hash=sha256:5095a411c8479e715784a0c9236568ae72509450ee2226b649083730f3fadfc6 \ - --hash=sha256:530fc2bbb3dc1ebb17f70f7b234f90a1dd43b1b489ea38cea7be95fb21cdb5c7 \ - --hash=sha256:56f0c81c44638dfd0e2367df1a331b4ddf2e771366c4b9c5d9a473de75e3e1c7 \ - --hash=sha256:5e9c9e0ce92f27cef79e28e877c6b6988c48b16942258f3bc55d39b5f911df4f \ - --hash=sha256:6d7722136c6ed75caf84e1788df36397efdc5dbadab95e59c2bba82d4d808a4c \ - --hash=sha256:74d071dbe4b53c602edd87a7476ab23015a991374ddb228d941929ad7c8c922e \ - --hash=sha256:7b568809dca44cb75c8ebb260844ea98252c8c88396f9d203f5094e50a70355f \ - --hash=sha256:80bb5d2e92b2258188e7dcae5b188c7bf868eafdf800ea6edd0fbfc029984a88 \ - --hash=sha256:8d1cdcda6bd16268316d5db1038965acf948f2a6f43acc2e0b1641ceab443623 \ - --hash=sha256:9f665677e46c5a4d288ece12fdedf4f4204a422bb28ff05f0e6b08b7447796d1 \ - --hash=sha256:a30513828180264294953cecd942202dfda64e85195ae36c265daf4052af0464 \ - --hash=sha256:a7a986c45d1099a5de766a15de7bee3840b1e0e1a344430926af08e5297cf666 \ - --hash=sha256:a940ca7e7189d23da2bfbb38973832813eab6bd83f3bf89a977668c2f813deae \ - --hash=sha256:ab7c5684ff3538b67df3f93d66bd3369b749087871ae3786e70ef39e601345b0 \ - --hash=sha256:be04739a27be55631069b348dda0c81d8ea9822b5da10b8019b789e42d1fe452 \ - --hash=sha256:c0938ddd60cc04e8f1faf7a14a166ac939aac703745bfcd8e8f20322a7373019 \ - --hash=sha256:cb46b542133999580ffb691baf67410306833ee1e4f58ed06b6a7aaf4e046952 \ - --hash=sha256:d134757a37d8640f3c0abb41f5e68b7cf66c644f54ef1cb0573b7ea1c63e1509 \ - --hash=sha256:de557502c3bec8e634246588a94e82f1ee1b9dfcfdc453267c4fb652ff531570 \ - --hash=sha256:ded0c4a3eee56b57fcb2315e40812b173cafe79d2f992d50015f4387445737fa \ - --hash=sha256:e1dae12321b31059a1a72aaa0e6ba30156fe7e633355e445451e4021b8e122b6 \ - --hash=sha256:eb672217f7bd640411cfc69756ce721d00ae600814708d35c930930f18e8029f \ - --hash=sha256:ee684f139c91e69fe09b8e83d18b4d63bf87d9440c1eb2eeb52ee851883b1b29 \ - --hash=sha256:f3f9a91d3cc5e5b0ddf1043c0ae5fa4852f18a1c0050318baf5fc7930ecc1f9c - # via black -six==1.16.0 \ - --hash=sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926 \ - --hash=sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254 - # via python-dateutil toml==0.10.2 \ --hash=sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b \ --hash=sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f + # via -r requirements.in +tomli==2.0.1 \ + --hash=sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc \ + --hash=sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f # via - # -r requirements.in + # black # maturin # mypy # pytest -tomli==1.2.2 \ - --hash=sha256:c6ce0015eb38820eaf32b5db832dbc26deb3dd427bd5f6556cf0acac2c214fee \ - --hash=sha256:f04066f68f5554911363063a30b108d2b5a5b1a010aa8b6132af78489fe3aade - # via black -typed-ast==1.4.3 \ - --hash=sha256:01ae5f73431d21eead5015997ab41afa53aa1fbe252f9da060be5dad2c730ace \ - --hash=sha256:067a74454df670dcaa4e59349a2e5c81e567d8d65458d480a5b3dfecec08c5ff \ - --hash=sha256:0fb71b8c643187d7492c1f8352f2c15b4c4af3f6338f21681d3681b3dc31a266 \ - --hash=sha256:1b3ead4a96c9101bef08f9f7d1217c096f31667617b58de957f690c92378b528 \ - --hash=sha256:2068531575a125b87a41802130fa7e29f26c09a2833fea68d9a40cf33902eba6 \ - --hash=sha256:209596a4ec71d990d71d5e0d312ac935d86930e6eecff6ccc7007fe54d703808 \ - --hash=sha256:2c726c276d09fc5c414693a2de063f521052d9ea7c240ce553316f70656c84d4 \ - --hash=sha256:398e44cd480f4d2b7ee8d98385ca104e35c81525dd98c519acff1b79bdaac363 \ - --hash=sha256:52b1eb8c83f178ab787f3a4283f68258525f8d70f778a2f6dd54d3b5e5fb4341 \ - --hash=sha256:5feca99c17af94057417d744607b82dd0a664fd5e4ca98061480fd8b14b18d04 \ - --hash=sha256:7538e495704e2ccda9b234b82423a4038f324f3a10c43bc088a1636180f11a41 \ - --hash=sha256:760ad187b1041a154f0e4d0f6aae3e40fdb51d6de16e5c99aedadd9246450e9e \ - --hash=sha256:777a26c84bea6cd934422ac2e3b78863a37017618b6e5c08f92ef69853e765d3 \ - --hash=sha256:95431a26309a21874005845c21118c83991c63ea800dd44843e42a916aec5899 \ - --hash=sha256:9ad2c92ec681e02baf81fdfa056fe0d818645efa9af1f1cd5fd6f1bd2bdfd805 \ - --hash=sha256:9c6d1a54552b5330bc657b7ef0eae25d00ba7ffe85d9ea8ae6540d2197a3788c \ - --hash=sha256:aee0c1256be6c07bd3e1263ff920c325b59849dc95392a05f258bb9b259cf39c \ - --hash=sha256:af3d4a73793725138d6b334d9d247ce7e5f084d96284ed23f22ee626a7b88e39 \ - --hash=sha256:b36b4f3920103a25e1d5d024d155c504080959582b928e91cb608a65c3a49e1a \ - --hash=sha256:b9574c6f03f685070d859e75c7f9eeca02d6933273b5e69572e5ff9d5e3931c3 \ - --hash=sha256:bff6ad71c81b3bba8fa35f0f1921fb24ff4476235a6e94a26ada2e54370e6da7 \ - --hash=sha256:c190f0899e9f9f8b6b7863debfb739abcb21a5c054f911ca3596d12b8a4c4c7f \ - --hash=sha256:c907f561b1e83e93fad565bac5ba9c22d96a54e7ea0267c708bffe863cbe4075 \ - --hash=sha256:cae53c389825d3b46fb37538441f75d6aecc4174f615d048321b716df2757fb0 \ - --hash=sha256:dd4a21253f42b8d2b48410cb31fe501d32f8b9fbeb1f55063ad102fe9c425e40 \ - --hash=sha256:dde816ca9dac1d9c01dd504ea5967821606f02e510438120091b84e852367428 \ - --hash=sha256:f2362f3cb0f3172c42938946dbc5b7843c2a28aec307c49100c8b38764eb6927 \ - --hash=sha256:f328adcfebed9f11301eaedfa48e15bdece9b519fb27e6a8c01aa52a17ec31b3 \ - --hash=sha256:f8afcf15cc511ada719a88e013cec87c11aff7b91f019295eb4530f96fe5ef2f \ - --hash=sha256:fb1bbeac803adea29cedd70781399c99138358c26d05fcbd23c13016b7f5ec65 +typed-ast==1.5.3 \ + --hash=sha256:20d5118e494478ef2d3a2702d964dae830aedd7b4d3b626d003eea526be18718 \ + --hash=sha256:27e46cdd01d6c3a0dd8f728b6a938a6751f7bd324817501c15fb056307f918c6 \ + --hash=sha256:27f25232e2dd0edfe1f019d6bfaaf11e86e657d9bdb7b0956db95f560cceb2b3 \ + --hash=sha256:3042bfc9ca118712c9809201f55355479cfcdc17449f9f8db5e744e9625c6805 \ + --hash=sha256:37e5349d1d5de2f4763d534ccb26809d1c24b180a477659a12c4bde9dd677d74 \ + --hash=sha256:4fff9fdcce59dc61ec1b317bdb319f8f4e6b69ebbe61193ae0a60c5f9333dc49 \ + --hash=sha256:542cd732351ba8235f20faa0fc7398946fe1a57f2cdb289e5497e1e7f48cfedb \ + --hash=sha256:5dc2c11ae59003d4a26dda637222d9ae924387f96acae9492df663843aefad55 \ + --hash=sha256:8831479695eadc8b5ffed06fdfb3e424adc37962a75925668deeb503f446c0a3 \ + --hash=sha256:8cdf91b0c466a6c43f36c1964772918a2c04cfa83df8001ff32a89e357f8eb06 \ + --hash=sha256:8e0b8528838ffd426fea8d18bde4c73bcb4167218998cc8b9ee0a0f2bfe678a6 \ + --hash=sha256:8ef1d96ad05a291f5c36895d86d1375c0ee70595b90f6bb5f5fdbee749b146db \ + --hash=sha256:9ad3b48cf2b487be140072fb86feff36801487d4abb7382bb1929aaac80638ea \ + --hash=sha256:9cc9e1457e1feb06b075c8ef8aeb046a28ec351b1958b42c7c31c989c841403a \ + --hash=sha256:9e237e74fd321a55c90eee9bc5d44be976979ad38a29bbd734148295c1ce7617 \ + --hash=sha256:c9f1a27592fac87daa4e3f16538713d705599b0a27dfe25518b80b6b017f0a6d \ + --hash=sha256:d64dabc6336ddc10373922a146fa2256043b3b43e61f28961caec2a5207c56d5 \ + --hash=sha256:e20d196815eeffb3d76b75223e8ffed124e65ee62097e4e73afb5fec6b993e7a \ + --hash=sha256:e34f9b9e61333ecb0f7d79c21c28aa5cd63bec15cb7e1310d7d3da6ce886bc9b \ + --hash=sha256:ed44e81517364cb5ba367e4f68fca01fba42a7a4690d40c07886586ac267d9b9 \ + --hash=sha256:ee852185964744987609b40aee1d2eb81502ae63ee8eef614558f96a56c1902d \ + --hash=sha256:f60d9de0d087454c91b3999a296d0c4558c1666771e3460621875021bf899af9 \ + --hash=sha256:f818c5b81966d4728fec14caa338e30a70dfc3da577984d38f97816c4b3071ec \ + --hash=sha256:fd5df1313915dbd70eaaa88c19030b441742e8b05e6103c631c83b75e0435ccc # via # black # mypy -typing-extensions==3.10.0.2 \ - --hash=sha256:49f75d16ff11f1cd258e1b988ccff82a3ca5570217d7ad8c5f48205dd99a677e \ - --hash=sha256:d8226d10bc02a29bcc81df19a26e56a9647f8b0a6d4a83924139f4a8b01f17b7 \ - --hash=sha256:f1d25edafde516b146ecd0613dabcc61409817af4766fbbcfb8d1ad4ec441a34 +typing-extensions==4.2.0 \ + --hash=sha256:6657594ee297170d19f67d55c05852a874e7eb634f4f753dbd667855e07c1708 \ + --hash=sha256:f1c24655a0da0d1b67f07e17a5e6b2a105894e6824b92096378bb3668ef02376 # via # black # importlib-metadata # mypy -zipp==3.6.0 \ - --hash=sha256:71c644c5369f4a6e07636f0aa966270449561fcea2e3d6747b8d23efaa9d7832 \ - --hash=sha256:9fe5ea21568a0a70e50f273397638d39b03353731e6cbbb3fd8502a33fec40bc +zipp==3.8.0 \ + --hash=sha256:56bf8aadb83c24db6c4b577e13de374ccfb67da2078beba1d037c17980bf43ad \ + --hash=sha256:c4f6e5bbf48e74f7a38e7cc5b0480ff42b0ae5178957d564d18932525d5cf099 # via importlib-metadata diff --git a/python/requirements.in b/python/requirements.in index 7e54705fc..7ee6a48dc 100644 --- a/python/requirements.in +++ b/python/requirements.in @@ -21,7 +21,7 @@ isort maturin mypy numpy -pandas pyarrow pytest toml +importlib_metadata; python_version < "3.8" diff --git a/python/src/ballista_context.rs b/python/src/ballista_context.rs new file mode 100644 index 000000000..e3bf21923 --- /dev/null +++ b/python/src/ballista_context.rs @@ -0,0 +1,125 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use pyo3::exceptions::PyValueError; +use pyo3::prelude::*; +use std::path::PathBuf; + +use crate::errors::BallistaError; +use crate::utils::wait_for_future; + +use crate::dataframe::PyDataFrame; +use ballista::prelude::*; +use datafusion::arrow::datatypes::Schema; +use datafusion::prelude::{AvroReadOptions, CsvReadOptions, ParquetReadOptions}; + +/// `PyBallistaContext` is able to plan and execute DataFusion plans. +/// It has a powerful optimizer, a physical planner for local execution, and a +/// multi-threaded execution engine to perform the execution. +#[pyclass(name = "BallistaContext", module = "ballista", subclass, unsendable)] +pub(crate) struct PyBallistaContext { + ctx: BallistaContext, +} + +#[pymethods] +impl PyBallistaContext { + #[new] + #[args(port = "50050")] + fn new(py: Python, host: &str, port: u16) -> PyResult { + let config = BallistaConfig::builder() + .set("ballista.shuffle.partitions", "4") + .build() + .map_err(BallistaError::from)?; + + let result = BallistaContext::remote(host, port, &config); + let ctx = wait_for_future(py, result).map_err(BallistaError::from)?; + + Ok(PyBallistaContext { ctx }) + } + + /// Returns a PyDataFrame whose plan corresponds to the SQL statement. + fn sql(&mut self, query: &str, py: Python) -> PyResult { + let ctx = &self.ctx; + + let result = ctx.sql(query); + let df = wait_for_future(py, result).map_err(BallistaError::from)?; + Ok(PyDataFrame::new(df)) + } + + #[allow(clippy::too_many_arguments)] + #[args( + schema = "None", + has_header = "true", + delimiter = "\",\"", + schema_infer_max_records = "1000", + file_extension = "\".csv\"" + )] + fn register_csv( + &mut self, + name: &str, + path: PathBuf, + schema: Option, + has_header: bool, + delimiter: &str, + schema_infer_max_records: usize, + file_extension: &str, + py: Python, + ) -> PyResult<()> { + let ctx = &self.ctx; + + let path = path + .to_str() + .ok_or_else(|| PyValueError::new_err("Unable to convert path to a string"))?; + + let delimiter = delimiter.as_bytes(); + if delimiter.len() != 1 { + return Err(PyValueError::new_err( + "Delimiter must be a single character", + )); + } + + let mut options = CsvReadOptions::new() + .has_header(has_header) + .delimiter(delimiter[0]) + .schema_infer_max_records(schema_infer_max_records) + .file_extension(file_extension); + options.schema = schema.as_ref(); + + let result = ctx.register_csv(name, path, options); + wait_for_future(py, result).map_err(BallistaError::from)?; + + Ok(()) + } + + fn register_avro(&mut self, name: &str, path: &str, py: Python) -> PyResult<()> { + let ctx = &self.ctx; + + let result = ctx.register_avro(name, path, AvroReadOptions::default()); + wait_for_future(py, result).map_err(BallistaError::from)?; + + Ok(()) + } + + fn register_parquet(&mut self, name: &str, path: &str, py: Python) -> PyResult<()> { + let ctx = &self.ctx; + + let result = ctx.register_parquet(name, path, ParquetReadOptions::default()); + wait_for_future(py, result).map_err(BallistaError::from)?; + + Ok(()) + } +} diff --git a/python/src/dataframe.rs b/python/src/dataframe.rs index 9050df92e..cff1733fb 100644 --- a/python/src/dataframe.rs +++ b/python/src/dataframe.rs @@ -15,42 +15,67 @@ // specific language governing permissions and limitations // under the License. -use std::sync::Arc; - -use pyo3::prelude::*; - +use crate::errors::DataFusionError; +use crate::expression::PyExpr; +use crate::utils::wait_for_future; use datafusion::arrow::datatypes::Schema; use datafusion::arrow::pyarrow::PyArrowConvert; use datafusion::arrow::util::pretty; use datafusion::dataframe::DataFrame; use datafusion::logical_plan::JoinType; - -use crate::utils::wait_for_future; -use crate::{errors::DataFusionError, expression::PyExpr}; +use pyo3::exceptions::PyTypeError; +use pyo3::prelude::*; +use pyo3::types::PyTuple; +use std::sync::Arc; /// A PyDataFrame is a representation of a logical plan and an API to compose statements. /// Use it to build a plan and `.collect()` to execute the plan and collect the result. /// The actual execution of a plan runs natively on Rust and Arrow on a multi-threaded environment. -#[pyclass(name = "DataFrame", module = "datafusion", subclass)] +#[pyclass(name = "DataFrame", module = "ballista", subclass)] #[derive(Clone)] pub(crate) struct PyDataFrame { - df: Arc, + df: Arc, } impl PyDataFrame { /// creates a new PyDataFrame - pub fn new(df: Arc) -> Self { + pub fn new(df: Arc) -> Self { Self { df } } } #[pymethods] impl PyDataFrame { + fn __getitem__(&self, key: PyObject) -> PyResult { + Python::with_gil(|py| { + if let Ok(key) = key.extract::<&str>(py) { + self.select_columns(vec![key]) + } else if let Ok(tuple) = key.extract::<&PyTuple>(py) { + let keys = tuple + .iter() + .map(|item| item.extract::<&str>()) + .collect::>>()?; + self.select_columns(keys) + } else if let Ok(keys) = key.extract::>(py) { + self.select_columns(keys) + } else { + let message = "DataFrame can only be indexed by string index or indices"; + Err(PyTypeError::new_err(message)) + } + }) + } + /// Returns the schema from the logical plan fn schema(&self) -> Schema { self.df.schema().into() } + #[args(args = "*")] + fn select_columns(&self, args: Vec<&str>) -> PyResult { + let df = self.df.select_columns(&args)?; + Ok(Self::new(df)) + } + #[args(args = "*")] fn select(&self, args: Vec) -> PyResult { let expr = args.into_iter().map(|e| e.into()).collect(); @@ -124,7 +149,15 @@ impl PyDataFrame { let df = self .df - .join(right.df, join_type, &join_keys.0, &join_keys.1)?; + .join(right.df, join_type, &join_keys.0, &join_keys.1, None)?; Ok(Self::new(df)) } + + /// Print the query plan + #[args(verbose = false, analyze = false)] + fn explain(&self, py: Python, verbose: bool, analyze: bool) -> PyResult<()> { + let df = self.df.explain(verbose, analyze)?; + let batches = wait_for_future(py, df.collect())?; + Ok(pretty::print_batches(&batches)?) + } } diff --git a/python/src/errors.rs b/python/src/errors.rs index 655ed8441..7aecbed90 100644 --- a/python/src/errors.rs +++ b/python/src/errors.rs @@ -17,6 +17,7 @@ use core::fmt; +use ballista::prelude::BallistaError as InnerBallistaError; use datafusion::arrow::error::ArrowError; use datafusion::error::DataFusionError as InnerDataFusionError; use pyo3::{exceptions::PyException, PyErr}; @@ -55,3 +56,40 @@ impl From for PyErr { PyException::new_err(err.to_string()) } } + +impl From for BallistaError { + fn from(err: InnerDataFusionError) -> BallistaError { + BallistaError::DataFusionExecutionError(err) + } +} + +impl From for BallistaError { + fn from(err: InnerBallistaError) -> BallistaError { + BallistaError::ExecutionError(err) + } +} + +impl fmt::Display for BallistaError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + BallistaError::DataFusionExecutionError(e) => write!(f, "Datafusion error: {:?}", e), + BallistaError::ExecutionError(e) => write!(f, "Ballista error: {:?}", e), + BallistaError::ArrowError(e) => write!(f, "Arrow error: {:?}", e), + BallistaError::Common(e) => write!(f, "{}", e), + } + } +} + +#[derive(Debug)] +pub enum BallistaError { + DataFusionExecutionError(InnerDataFusionError), + ExecutionError(InnerBallistaError), + ArrowError(ArrowError), + Common(String), +} + +impl From for PyErr { + fn from(err: BallistaError) -> PyErr { + PyException::new_err(err.to_string()) + } +} diff --git a/python/src/expression.rs b/python/src/expression.rs index d646d6b58..b3275ccf0 100644 --- a/python/src/expression.rs +++ b/python/src/expression.rs @@ -15,8 +15,7 @@ // specific language governing permissions and limitations // under the License. -use pyo3::PyMappingProtocol; -use pyo3::{basic::CompareOp, prelude::*, PyNumberProtocol, PyObjectProtocol}; +use pyo3::{basic::CompareOp, prelude::*}; use std::convert::{From, Into}; use datafusion::arrow::datatypes::DataType; @@ -25,7 +24,7 @@ use datafusion::logical_plan::{col, lit, Expr}; use datafusion::scalar::ScalarValue; /// An PyExpr that can be used on a DataFrame -#[pyclass(name = "Expression", module = "datafusion", subclass)] +#[pyclass(name = "Expression", module = "ballista", subclass)] #[derive(Debug, Clone)] pub(crate) struct PyExpr { pub(crate) expr: Expr, @@ -37,68 +36,70 @@ impl From for Expr { } } -impl Into for Expr { - fn into(self) -> PyExpr { - PyExpr { expr: self } +impl From for PyExpr { + fn from(expr: Expr) -> PyExpr { + PyExpr { expr } } } -#[pyproto] -impl PyNumberProtocol for PyExpr { - fn __add__(lhs: PyExpr, rhs: PyExpr) -> PyResult { - Ok((lhs.expr + rhs.expr).into()) +#[pymethods] +impl PyExpr { + fn __richcmp__(&self, other: PyExpr, op: CompareOp) -> PyExpr { + let expr = match op { + CompareOp::Lt => self.expr.clone().lt(other.expr), + CompareOp::Le => self.expr.clone().lt_eq(other.expr), + CompareOp::Eq => self.expr.clone().eq(other.expr), + CompareOp::Ne => self.expr.clone().not_eq(other.expr), + CompareOp::Gt => self.expr.clone().gt(other.expr), + CompareOp::Ge => self.expr.clone().gt_eq(other.expr), + }; + expr.into() } - fn __sub__(lhs: PyExpr, rhs: PyExpr) -> PyResult { - Ok((lhs.expr - rhs.expr).into()) + fn __str__(&self) -> PyResult { + Ok(format!("{}", self.expr)) } - fn __truediv__(lhs: PyExpr, rhs: PyExpr) -> PyResult { - Ok((lhs.expr / rhs.expr).into()) + fn __add__(&self, rhs: PyExpr) -> PyResult { + Ok((self.expr.clone() + rhs.expr).into()) } - fn __mul__(lhs: PyExpr, rhs: PyExpr) -> PyResult { - Ok((lhs.expr * rhs.expr).into()) + fn __sub__(&self, rhs: PyExpr) -> PyResult { + Ok((self.expr.clone() - rhs.expr).into()) } - fn __mod__(lhs: PyExpr, rhs: PyExpr) -> PyResult { - Ok(lhs.expr.clone().modulus(rhs.expr).into()) + fn __truediv__(&self, rhs: PyExpr) -> PyResult { + Ok((self.expr.clone() / rhs.expr).into()) } - fn __and__(lhs: PyExpr, rhs: PyExpr) -> PyResult { - Ok(lhs.expr.clone().and(rhs.expr).into()) + fn __mul__(&self, rhs: PyExpr) -> PyResult { + Ok((self.expr.clone() * rhs.expr).into()) } - fn __or__(lhs: PyExpr, rhs: PyExpr) -> PyResult { - Ok(lhs.expr.clone().or(rhs.expr).into()) + fn __mod__(&self, rhs: PyExpr) -> PyResult { + Ok(self.expr.clone().modulus(rhs.expr).into()) } - fn __invert__(&self) -> PyResult { - Ok(self.expr.clone().not().into()) + fn __and__(&self, rhs: PyExpr) -> PyResult { + Ok(self.expr.clone().and(rhs.expr).into()) } -} -#[pyproto] -impl PyObjectProtocol for PyExpr { - fn __richcmp__(&self, other: PyExpr, op: CompareOp) -> PyExpr { - let expr = match op { - CompareOp::Lt => self.expr.clone().lt(other.expr), - CompareOp::Le => self.expr.clone().lt_eq(other.expr), - CompareOp::Eq => self.expr.clone().eq(other.expr), - CompareOp::Ne => self.expr.clone().not_eq(other.expr), - CompareOp::Gt => self.expr.clone().gt(other.expr), - CompareOp::Ge => self.expr.clone().gt_eq(other.expr), - }; - expr.into() + fn __or__(&self, rhs: PyExpr) -> PyResult { + Ok(self.expr.clone().or(rhs.expr).into()) } - fn __str__(&self) -> PyResult { - Ok(format!("{}", self.expr)) + fn __invert__(&self) -> PyResult { + Ok(self.expr.clone().not().into()) + } + + fn __getitem__(&self, key: &str) -> PyResult { + Ok(Expr::GetIndexedField { + expr: Box::new(self.expr.clone()), + key: ScalarValue::Utf8(Some(key.to_string())), + } + .into()) } -} -#[pymethods] -impl PyExpr { #[staticmethod] pub fn literal(value: ScalarValue) -> PyExpr { lit(value).into() @@ -134,14 +135,3 @@ impl PyExpr { expr.into() } } - -#[pyproto] -impl PyMappingProtocol for PyExpr { - fn __getitem__(&self, key: &str) -> PyResult { - Ok(Expr::GetIndexedField { - expr: Box::new(self.expr.clone()), - key: ScalarValue::Utf8(Some(key.to_string()).to_owned()), - } - .into()) - } -} diff --git a/python/src/functions.rs b/python/src/functions.rs index c0b4e5989..1a2c4ed73 100644 --- a/python/src/functions.rs +++ b/python/src/functions.rs @@ -18,10 +18,8 @@ use pyo3::{prelude::*, wrap_pyfunction}; use datafusion::logical_plan; - -use datafusion::physical_plan::{ - aggregates::AggregateFunction, functions::BuiltinScalarFunction, -}; +use datafusion::physical_plan::aggregates::AggregateFunction; +use datafusion_expr::BuiltinScalarFunction; use crate::errors; use crate::expression::PyExpr; @@ -88,11 +86,7 @@ fn concat_ws(sep: String, args: Vec) -> PyResult { /// Creates a new Sort expression #[pyfunction] -fn order_by( - expr: PyExpr, - asc: Option, - nulls_first: Option, -) -> PyResult { +fn order_by(expr: PyExpr, asc: Option, nulls_first: Option) -> PyResult { Ok(PyExpr { expr: datafusion::logical_plan::Expr::Sort { expr: Box::new(expr.expr), @@ -106,10 +100,7 @@ fn order_by( #[pyfunction] fn alias(expr: PyExpr, name: &str) -> PyResult { Ok(PyExpr { - expr: datafusion::logical_plan::Expr::Alias( - Box::new(expr.expr), - String::from(name), - ), + expr: datafusion::logical_plan::Expr::Alias(Box::new(expr.expr), String::from(name)), }) } @@ -122,19 +113,19 @@ fn window( order_by: Option>, ) -> PyResult { use std::str::FromStr; - let fun = datafusion::physical_plan::window_functions::WindowFunction::from_str(name) + let fun = datafusion_expr::window_function::WindowFunction::from_str(name) .map_err(|e| -> errors::DataFusionError { e.into() })?; Ok(PyExpr { expr: datafusion::logical_plan::Expr::WindowFunction { fun, args: args.into_iter().map(|x| x.expr).collect::>(), partition_by: partition_by - .unwrap_or(vec![]) + .unwrap_or_default() .into_iter() .map(|x| x.expr) .collect::>(), order_by: order_by - .unwrap_or(vec![]) + .unwrap_or_default() .into_iter() .map(|x| x.expr) .collect::>(), @@ -244,7 +235,11 @@ scalar_function!(sha384, SHA384); scalar_function!(sha512, SHA512); scalar_function!(signum, Signum); scalar_function!(sin, Sin); -scalar_function!(split_part, SplitPart, "Splits string at occurrences of delimiter and returns the n'th field (counting from one)."); +scalar_function!( + split_part, + SplitPart, + "Splits string at occurrences of delimiter and returns the n'th field (counting from one)." +); scalar_function!(sqrt, Sqrt); scalar_function!( starts_with, diff --git a/python/src/lib.rs b/python/src/lib.rs index d40bae251..70400f3f9 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -15,17 +15,18 @@ // specific language governing permissions and limitations // under the License. +use mimalloc::MiMalloc; use pyo3::prelude::*; -mod catalog; -mod context; +mod ballista_context; mod dataframe; -mod errors; +pub mod errors; mod expression; mod functions; -mod udaf; -mod udf; -mod utils; +pub mod utils; + +#[global_allocator] +static GLOBAL: MiMalloc = MiMalloc; /// Low-level DataFusion internal package. /// @@ -34,14 +35,9 @@ mod utils; #[pymodule] fn _internal(py: Python, m: &PyModule) -> PyResult<()> { // Register the python classes - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; + m.add_class::()?; m.add_class::()?; m.add_class::()?; - m.add_class::()?; - m.add_class::()?; // Register the functions as a submodule let funcs = PyModule::new(py, "functions")?; diff --git a/python/src/utils.rs b/python/src/utils.rs index c8e1c63b1..795058bc9 100644 --- a/python/src/utils.rs +++ b/python/src/utils.rs @@ -15,17 +15,12 @@ // specific language governing permissions and limitations // under the License. -use std::future::Future; - use pyo3::prelude::*; +use std::future::Future; use tokio::runtime::Runtime; -use datafusion::physical_plan::functions::Volatility; - -use crate::errors::DataFusionError; - /// Utility to collect rust futures with GIL released -pub(crate) fn wait_for_future(py: Python, f: F) -> F::Output +pub fn wait_for_future(py: Python, f: F) -> F::Output where F: Send, F::Output: Send, @@ -33,18 +28,3 @@ where let rt = Runtime::new().unwrap(); py.allow_threads(|| rt.block_on(f)) } - -pub(crate) fn parse_volatility(value: &str) -> Result { - Ok(match value { - "immutable" => Volatility::Immutable, - "stable" => Volatility::Stable, - "volatile" => Volatility::Volatile, - value => { - return Err(DataFusionError::Common(format!( - "Unsupportad volatility type: `{}`, supported \ - values are: immutable, stable and volatile.", - value - ))) - } - }) -} From 638f147d2f6ffa300cf83898c75caf89ca191312 Mon Sep 17 00:00:00 2001 From: Remco Verhoef Date: Wed, 8 Jun 2022 20:10:14 +0200 Subject: [PATCH 5/5] remove ballista-python --- ballista-python/.dockerignore | 19 - ballista-python/.gitignore | 173 -- ballista-python/CHANGELOG.md | 86 - ballista-python/Cargo.lock | 1607 ----------------- ballista-python/Cargo.toml | 51 - ballista-python/LICENSE.txt | 202 --- ballista-python/README.md | 105 -- ballista-python/ballista/__init__.py | 120 -- ballista-python/ballista/functions.py | 23 - ballista-python/ballista/tests/__init__.py | 16 - ballista-python/ballista/tests/conftest.py | 33 - ballista-python/ballista/tests/generic.py | 87 - .../ballista/tests/test_aggregation.py | 48 - .../ballista/tests/test_catalog.py | 40 - .../ballista/tests/test_context.py | 74 - .../ballista/tests/test_dataframe.py | 199 -- .../ballista/tests/test_functions.py | 219 --- .../ballista/tests/test_imports.py | 69 - .../ballista/tests/test_indexing.py | 55 - ballista-python/ballista/tests/test_sql.py | 245 --- ballista-python/ballista/tests/test_udaf.py | 136 -- ballista-python/dev/create_license.py | 252 --- ballista-python/pyproject.toml | 58 - ballista-python/requirements-310.txt | 213 --- ballista-python/requirements-37.txt | 268 --- ballista-python/requirements.in | 27 - ballista-python/src/ballista_context.rs | 125 -- ballista-python/src/dataframe.rs | 163 -- ballista-python/src/errors.rs | 95 - ballista-python/src/expression.rs | 137 -- ballista-python/src/functions.rs | 338 ---- ballista-python/src/lib.rs | 48 - ballista-python/src/utils.rs | 30 - 33 files changed, 5361 deletions(-) delete mode 100644 ballista-python/.dockerignore delete mode 100644 ballista-python/.gitignore delete mode 100644 ballista-python/CHANGELOG.md delete mode 100644 ballista-python/Cargo.lock delete mode 100644 ballista-python/Cargo.toml delete mode 100644 ballista-python/LICENSE.txt delete mode 100644 ballista-python/README.md delete mode 100644 ballista-python/ballista/__init__.py delete mode 100644 ballista-python/ballista/functions.py delete mode 100644 ballista-python/ballista/tests/__init__.py delete mode 100644 ballista-python/ballista/tests/conftest.py delete mode 100644 ballista-python/ballista/tests/generic.py delete mode 100644 ballista-python/ballista/tests/test_aggregation.py delete mode 100644 ballista-python/ballista/tests/test_catalog.py delete mode 100644 ballista-python/ballista/tests/test_context.py delete mode 100644 ballista-python/ballista/tests/test_dataframe.py delete mode 100644 ballista-python/ballista/tests/test_functions.py delete mode 100644 ballista-python/ballista/tests/test_imports.py delete mode 100644 ballista-python/ballista/tests/test_indexing.py delete mode 100644 ballista-python/ballista/tests/test_sql.py delete mode 100644 ballista-python/ballista/tests/test_udaf.py delete mode 100644 ballista-python/dev/create_license.py delete mode 100644 ballista-python/pyproject.toml delete mode 100644 ballista-python/requirements-310.txt delete mode 100644 ballista-python/requirements-37.txt delete mode 100644 ballista-python/requirements.in delete mode 100644 ballista-python/src/ballista_context.rs delete mode 100644 ballista-python/src/dataframe.rs delete mode 100644 ballista-python/src/errors.rs delete mode 100644 ballista-python/src/expression.rs delete mode 100644 ballista-python/src/functions.rs delete mode 100644 ballista-python/src/lib.rs delete mode 100644 ballista-python/src/utils.rs diff --git a/ballista-python/.dockerignore b/ballista-python/.dockerignore deleted file mode 100644 index 08c131c2e..000000000 --- a/ballista-python/.dockerignore +++ /dev/null @@ -1,19 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -target -venv diff --git a/ballista-python/.gitignore b/ballista-python/.gitignore deleted file mode 100644 index 050f1edbf..000000000 --- a/ballista-python/.gitignore +++ /dev/null @@ -1,173 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - - -# Created by https://www.toptal.com/developers/gitignore/api/python,rust -# Edit at https://www.toptal.com/developers/gitignore?templates=python,rust - -### Python ### -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -cover/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -.pybuilder/ -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -# For a library or package, you might want to ignore these files since the code is -# intended to run in multiple environments; otherwise, check them in: -# .python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# pytype static type analyzer -.pytype/ - -# Cython debug symbols -cython_debug/ - -### Rust ### -# Generated by Cargo -# will have compiled files and executables -debug/ - -# These are backup files generated by rustfmt -**/*.rs.bk - -# MSVC Windows builds of rustc generate these, which store debugging information -*.pdb - -# End of https://www.toptal.com/developers/gitignore/api/python,rust diff --git a/ballista-python/CHANGELOG.md b/ballista-python/CHANGELOG.md deleted file mode 100644 index a27ad0adb..000000000 --- a/ballista-python/CHANGELOG.md +++ /dev/null @@ -1,86 +0,0 @@ - - -# Changelog - -## [Unreleased](https://github.com/datafusion-contrib/datafusion-python/tree/HEAD) - -[Full Changelog](https://github.com/datafusion-contrib/datafusion-python/compare/0.5.1...HEAD) - -**Merged pull requests:** - -- use \_\_getitem\_\_ for df column selection [\#41](https://github.com/datafusion-contrib/datafusion-python/pull/41) ([Jimexist](https://github.com/Jimexist)) -- fix demo in readme [\#40](https://github.com/datafusion-contrib/datafusion-python/pull/40) ([Jimexist](https://github.com/Jimexist)) -- Implement select_columns [\#39](https://github.com/datafusion-contrib/datafusion-python/pull/39) ([andygrove](https://github.com/andygrove)) -- update readme and changelog [\#38](https://github.com/datafusion-contrib/datafusion-python/pull/38) ([Jimexist](https://github.com/Jimexist)) -- Add PyDataFrame.explain [\#36](https://github.com/datafusion-contrib/datafusion-python/pull/36) ([andygrove](https://github.com/andygrove)) -- Release 0.5.0 [\#34](https://github.com/datafusion-contrib/datafusion-python/pull/34) ([Jimexist](https://github.com/Jimexist)) -- disable nightly in workflow [\#33](https://github.com/datafusion-contrib/datafusion-python/pull/33) ([Jimexist](https://github.com/Jimexist)) -- update requirements to 37 and 310, update readme [\#32](https://github.com/datafusion-contrib/datafusion-python/pull/32) ([Jimexist](https://github.com/Jimexist)) -- Add custom global allocator [\#30](https://github.com/datafusion-contrib/datafusion-python/pull/30) ([matthewmturner](https://github.com/matthewmturner)) -- Remove pandas dependency [\#25](https://github.com/datafusion-contrib/datafusion-python/pull/25) ([matthewmturner](https://github.com/matthewmturner)) -- upgrade datafusion and pyo3 [\#20](https://github.com/datafusion-contrib/datafusion-python/pull/20) ([Jimexist](https://github.com/Jimexist)) -- update maturin 0.12+ [\#17](https://github.com/datafusion-contrib/datafusion-python/pull/17) ([Jimexist](https://github.com/Jimexist)) -- Update README.md [\#16](https://github.com/datafusion-contrib/datafusion-python/pull/16) ([Jimexist](https://github.com/Jimexist)) -- apply cargo clippy --fix [\#15](https://github.com/datafusion-contrib/datafusion-python/pull/15) ([Jimexist](https://github.com/Jimexist)) -- update test workflow to include rust clippy and check [\#14](https://github.com/datafusion-contrib/datafusion-python/pull/14) ([Jimexist](https://github.com/Jimexist)) -- use maturin 0.12.6 [\#13](https://github.com/datafusion-contrib/datafusion-python/pull/13) ([Jimexist](https://github.com/Jimexist)) -- apply cargo fmt [\#12](https://github.com/datafusion-contrib/datafusion-python/pull/12) ([Jimexist](https://github.com/Jimexist)) -- use stable not nightly [\#11](https://github.com/datafusion-contrib/datafusion-python/pull/11) ([Jimexist](https://github.com/Jimexist)) -- ci: test against more compilers, setup clippy and fix clippy lints [\#9](https://github.com/datafusion-contrib/datafusion-python/pull/9) ([cpcloud](https://github.com/cpcloud)) -- Fix use of importlib.metadata and unify requirements.txt [\#8](https://github.com/datafusion-contrib/datafusion-python/pull/8) ([cpcloud](https://github.com/cpcloud)) -- Ship the Cargo.lock file in the source distribution [\#7](https://github.com/datafusion-contrib/datafusion-python/pull/7) ([cpcloud](https://github.com/cpcloud)) -- add \_\_version\_\_ attribute to datafusion object [\#3](https://github.com/datafusion-contrib/datafusion-python/pull/3) ([tfeda](https://github.com/tfeda)) -- fix ci by fixing directories [\#2](https://github.com/datafusion-contrib/datafusion-python/pull/2) ([Jimexist](https://github.com/Jimexist)) -- setup workflow [\#1](https://github.com/datafusion-contrib/datafusion-python/pull/1) ([Jimexist](https://github.com/Jimexist)) - -## [0.5.1](https://github.com/datafusion-contrib/datafusion-python/tree/0.5.1) (2022-03-15) - -[Full Changelog](https://github.com/datafusion-contrib/datafusion-python/compare/0.5.1-rc1...0.5.1) - -## [0.5.1-rc1](https://github.com/datafusion-contrib/datafusion-python/tree/0.5.1-rc1) (2022-03-15) - -[Full Changelog](https://github.com/datafusion-contrib/datafusion-python/compare/0.5.0...0.5.1-rc1) - -## [0.5.0](https://github.com/datafusion-contrib/datafusion-python/tree/0.5.0) (2022-03-10) - -[Full Changelog](https://github.com/datafusion-contrib/datafusion-python/compare/0.5.0-rc2...0.5.0) - -## [0.5.0-rc2](https://github.com/datafusion-contrib/datafusion-python/tree/0.5.0-rc2) (2022-03-10) - -[Full Changelog](https://github.com/datafusion-contrib/datafusion-python/compare/0.5.0-rc1...0.5.0-rc2) - -**Closed issues:** - -- Add support for Ballista [\#37](https://github.com/datafusion-contrib/datafusion-python/issues/37) -- Implement DataFrame.explain [\#35](https://github.com/datafusion-contrib/datafusion-python/issues/35) - -## [0.5.0-rc1](https://github.com/datafusion-contrib/datafusion-python/tree/0.5.0-rc1) (2022-03-09) - -[Full Changelog](https://github.com/datafusion-contrib/datafusion-python/compare/4c98b8e9c3c3f8e2e6a8f2d1ffcfefda344c4680...0.5.0-rc1) - -**Closed issues:** - -- Investigate exposing additional optimizations [\#28](https://github.com/datafusion-contrib/datafusion-python/issues/28) -- Use custom allocator in Python build [\#27](https://github.com/datafusion-contrib/datafusion-python/issues/27) -- Why is pandas a requirement? [\#24](https://github.com/datafusion-contrib/datafusion-python/issues/24) -- Unable to build [\#18](https://github.com/datafusion-contrib/datafusion-python/issues/18) -- Setup CI against multiple Python version [\#6](https://github.com/datafusion-contrib/datafusion-python/issues/6) - -\* _This Changelog was automatically generated by [github_changelog_generator](https://github.com/github-changelog-generator/github-changelog-generator)_ diff --git a/ballista-python/Cargo.lock b/ballista-python/Cargo.lock deleted file mode 100644 index e71932b3b..000000000 --- a/ballista-python/Cargo.lock +++ /dev/null @@ -1,1607 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 3 - -[[package]] -name = "adler" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" - -[[package]] -name = "ahash" -version = "0.7.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" -dependencies = [ - "getrandom 0.2.6", - "once_cell", - "version_check", -] - -[[package]] -name = "aho-corasick" -version = "0.7.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" -dependencies = [ - "memchr", -] - -[[package]] -name = "alloc-no-stdlib" -version = "2.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35ef4730490ad1c4eae5c4325b2a95f521d023e5c885853ff7aca0a6a1631db3" - -[[package]] -name = "alloc-stdlib" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "697ed7edc0f1711de49ce108c541623a0af97c6c60b2f6e2b65229847ac843c2" -dependencies = [ - "alloc-no-stdlib", -] - -[[package]] -name = "arrayref" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544" - -[[package]] -name = "arrayvec" -version = "0.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" - -[[package]] -name = "arrow" -version = "13.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c6bee230122beb516ead31935a61f683715f987c6f003eff44ad6986624105a" -dependencies = [ - "bitflags", - "chrono", - "comfy-table", - "csv", - "flatbuffers", - "half", - "hex", - "indexmap", - "lazy_static", - "lexical-core", - "multiversion", - "num", - "pyo3", - "rand 0.8.5", - "regex", - "serde", - "serde_derive", - "serde_json", -] - -[[package]] -name = "async-trait" -version = "0.1.53" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed6aa3524a2dfcf9fe180c51eae2b58738348d819517ceadf95789c51fff7600" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "autocfg" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" - -[[package]] -name = "base64" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" - -[[package]] -name = "bitflags" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" - -[[package]] -name = "blake2" -version = "0.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9cf849ee05b2ee5fba5e36f97ff8ec2533916700fc0758d40d92136a42f3388" -dependencies = [ - "digest", -] - -[[package]] -name = "blake3" -version = "1.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a08e53fc5a564bb15bfe6fae56bd71522205f1f91893f9c0116edad6496c183f" -dependencies = [ - "arrayref", - "arrayvec", - "cc", - "cfg-if", - "constant_time_eq", - "digest", -] - -[[package]] -name = "block-buffer" -version = "0.10.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0bf7fe51849ea569fd452f37822f606a5cabb684dc918707a0193fd4664ff324" -dependencies = [ - "generic-array", -] - -[[package]] -name = "brotli" -version = "3.3.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1a0b1dbcc8ae29329621f8d4f0d835787c1c38bb1401979b49d13b0b305ff68" -dependencies = [ - "alloc-no-stdlib", - "alloc-stdlib", - "brotli-decompressor", -] - -[[package]] -name = "brotli-decompressor" -version = "2.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59ad2d4653bf5ca36ae797b1f4bb4dbddb60ce49ca4aed8a2ce4829f60425b80" -dependencies = [ - "alloc-no-stdlib", - "alloc-stdlib", -] - -[[package]] -name = "bstr" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" -dependencies = [ - "lazy_static", - "memchr", - "regex-automata", - "serde", -] - -[[package]] -name = "byteorder" -version = "1.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" - -[[package]] -name = "cc" -version = "1.0.73" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" -dependencies = [ - "jobserver", -] - -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - -[[package]] -name = "chrono" -version = "0.4.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73" -dependencies = [ - "libc", - "num-integer", - "num-traits", - "winapi", -] - -[[package]] -name = "comfy-table" -version = "5.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b103d85ca6e209388771bfb7aa6b68a7aeec4afbf6f0a0264bfbf50360e5212e" -dependencies = [ - "strum", - "strum_macros", - "unicode-width", -] - -[[package]] -name = "constant_time_eq" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc" - -[[package]] -name = "cpufeatures" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59a6001667ab124aebae2a495118e11d30984c3a653e99d86d58971708cf5e4b" -dependencies = [ - "libc", -] - -[[package]] -name = "crc32fast" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" -dependencies = [ - "cfg-if", -] - -[[package]] -name = "crypto-common" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57952ca27b5e3606ff4dd79b0020231aaf9d6aa76dc05fd30137538c50bd3ce8" -dependencies = [ - "generic-array", - "typenum", -] - -[[package]] -name = "csv" -version = "1.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1" -dependencies = [ - "bstr", - "csv-core", - "itoa 0.4.8", - "ryu", - "serde", -] - -[[package]] -name = "csv-core" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" -dependencies = [ - "memchr", -] - -[[package]] -name = "datafusion" -version = "8.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8174c458d0266ba442038160fad2c98f02924e6179d6d46175f600b69abb5bb7" -dependencies = [ - "ahash", - "arrow", - "async-trait", - "chrono", - "datafusion-common", - "datafusion-data-access", - "datafusion-expr", - "datafusion-physical-expr", - "datafusion-row", - "futures", - "hashbrown 0.12.1", - "lazy_static", - "log", - "num_cpus", - "ordered-float 3.0.0", - "parking_lot", - "parquet", - "paste", - "pin-project-lite", - "pyo3", - "rand 0.8.5", - "smallvec", - "sqlparser", - "tempfile", - "tokio", - "tokio-stream", - "uuid 1.0.0", -] - -[[package]] -name = "datafusion-common" -version = "8.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c687e12a58d1499b19ee899fa467d122c8cc9223570af6f3eb3c4d9d9be929b" -dependencies = [ - "arrow", - "ordered-float 3.0.0", - "parquet", - "pyo3", - "sqlparser", -] - -[[package]] -name = "datafusion-data-access" -version = "8.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6cca9e1200ddd362d97f22d185e98995ebd3737d2b3d69a200a06a5099fa699a" -dependencies = [ - "async-trait", - "chrono", - "futures", - "glob", - "parking_lot", - "tempfile", - "tokio", -] - -[[package]] -name = "datafusion-expr" -version = "8.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d20909d70931b88605b6f121c0ed820cec1d5b802cb51b7b5759f0421be3add8" -dependencies = [ - "ahash", - "arrow", - "datafusion-common", - "sqlparser", -] - -[[package]] -name = "datafusion-physical-expr" -version = "8.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad9083cb20b57216430d5b8e52341782d9520ce77e65c60803e330fd09bdfa6e" -dependencies = [ - "ahash", - "arrow", - "blake2", - "blake3", - "chrono", - "datafusion-common", - "datafusion-expr", - "datafusion-row", - "hashbrown 0.12.1", - "lazy_static", - "md-5", - "ordered-float 3.0.0", - "paste", - "rand 0.8.5", - "regex", - "sha2", - "unicode-segmentation", -] - -[[package]] -name = "datafusion-python" -version = "0.6.0" -dependencies = [ - "datafusion", - "datafusion-common", - "datafusion-expr", - "mimalloc", - "pyo3", - "rand 0.7.3", - "tokio", - "uuid 0.8.2", -] - -[[package]] -name = "datafusion-row" -version = "8.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f649a2021eefef44e0bef6782d053148b2fef74db7592fecfe87e042d52947a" -dependencies = [ - "arrow", - "datafusion-common", - "paste", - "rand 0.8.5", -] - -[[package]] -name = "digest" -version = "0.10.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2fb860ca6fafa5552fb6d0e816a69c8e49f0908bf524e30a90d97c85892d506" -dependencies = [ - "block-buffer", - "crypto-common", - "subtle", -] - -[[package]] -name = "fastrand" -version = "1.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3fcf0cee53519c866c09b5de1f6c56ff9d647101f81c1964fa632e148896cdf" -dependencies = [ - "instant", -] - -[[package]] -name = "flatbuffers" -version = "2.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86b428b715fdbdd1c364b84573b5fdc0f84f8e423661b9f398735278bc7f2b6a" -dependencies = [ - "bitflags", - "smallvec", - "thiserror", -] - -[[package]] -name = "flate2" -version = "1.0.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b39522e96686d38f4bc984b9198e3a0613264abaebaff2c5c918bfa6b6da09af" -dependencies = [ - "cfg-if", - "crc32fast", - "libc", - "miniz_oxide", -] - -[[package]] -name = "futures" -version = "0.3.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f73fe65f54d1e12b726f517d3e2135ca3125a437b6d998caf1962961f7172d9e" -dependencies = [ - "futures-channel", - "futures-core", - "futures-executor", - "futures-io", - "futures-sink", - "futures-task", - "futures-util", -] - -[[package]] -name = "futures-channel" -version = "0.3.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3083ce4b914124575708913bca19bfe887522d6e2e6d0952943f5eac4a74010" -dependencies = [ - "futures-core", - "futures-sink", -] - -[[package]] -name = "futures-core" -version = "0.3.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c09fd04b7e4073ac7156a9539b57a484a8ea920f79c7c675d05d289ab6110d3" - -[[package]] -name = "futures-executor" -version = "0.3.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9420b90cfa29e327d0429f19be13e7ddb68fa1cccb09d65e5706b8c7a749b8a6" -dependencies = [ - "futures-core", - "futures-task", - "futures-util", -] - -[[package]] -name = "futures-io" -version = "0.3.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc4045962a5a5e935ee2fdedaa4e08284547402885ab326734432bed5d12966b" - -[[package]] -name = "futures-macro" -version = "0.3.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33c1e13800337f4d4d7a316bf45a567dbcb6ffe087f16424852d97e97a91f512" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "futures-sink" -version = "0.3.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21163e139fa306126e6eedaf49ecdb4588f939600f0b1e770f4205ee4b7fa868" - -[[package]] -name = "futures-task" -version = "0.3.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57c66a976bf5909d801bbef33416c41372779507e7a6b3a5e25e4749c58f776a" - -[[package]] -name = "futures-util" -version = "0.3.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8b7abd5d659d9b90c8cba917f6ec750a74e2dc23902ef9cd4cc8c8b22e6036a" -dependencies = [ - "futures-channel", - "futures-core", - "futures-io", - "futures-macro", - "futures-sink", - "futures-task", - "memchr", - "pin-project-lite", - "pin-utils", - "slab", -] - -[[package]] -name = "generic-array" -version = "0.14.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd48d33ec7f05fbfa152300fdad764757cbded343c1aa1cff2fbaf4134851803" -dependencies = [ - "typenum", - "version_check", -] - -[[package]] -name = "getrandom" -version = "0.1.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" -dependencies = [ - "cfg-if", - "libc", - "wasi 0.9.0+wasi-snapshot-preview1", -] - -[[package]] -name = "getrandom" -version = "0.2.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9be70c98951c83b8d2f8f60d7065fa6d5146873094452a1008da8c2f1e4205ad" -dependencies = [ - "cfg-if", - "libc", - "wasi 0.10.2+wasi-snapshot-preview1", -] - -[[package]] -name = "glob" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" - -[[package]] -name = "half" -version = "1.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" - -[[package]] -name = "hashbrown" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" - -[[package]] -name = "hashbrown" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db0d4cf898abf0081f964436dc980e96670a0f36863e4b83aaacdb65c9d7ccc3" -dependencies = [ - "ahash", -] - -[[package]] -name = "heck" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" -dependencies = [ - "unicode-segmentation", -] - -[[package]] -name = "hermit-abi" -version = "0.1.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" -dependencies = [ - "libc", -] - -[[package]] -name = "hex" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" - -[[package]] -name = "indexmap" -version = "1.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f647032dfaa1f8b6dc29bd3edb7bbef4861b8b8007ebb118d6db284fd59f6ee" -dependencies = [ - "autocfg", - "hashbrown 0.11.2", -] - -[[package]] -name = "indoc" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05a0bd019339e5d968b37855180087b7b9d512c5046fbd244cf8c95687927d6e" - -[[package]] -name = "instant" -version = "0.1.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" -dependencies = [ - "cfg-if", -] - -[[package]] -name = "integer-encoding" -version = "1.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48dc51180a9b377fd75814d0cc02199c20f8e99433d6762f650d39cdbbd3b56f" - -[[package]] -name = "itoa" -version = "0.4.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" - -[[package]] -name = "itoa" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "112c678d4050afce233f4f2852bb2eb519230b3cf12f33585275537d7e41578d" - -[[package]] -name = "jobserver" -version = "0.1.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af25a77299a7f711a01975c35a6a424eb6862092cc2d6c72c4ed6cbc56dfc1fa" -dependencies = [ - "libc", -] - -[[package]] -name = "lazy_static" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" - -[[package]] -name = "lexical-core" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92912c4af2e7d9075be3e5e3122c4d7263855fa6cce34fbece4dd08e5884624d" -dependencies = [ - "lexical-parse-float", - "lexical-parse-integer", - "lexical-util", - "lexical-write-float", - "lexical-write-integer", -] - -[[package]] -name = "lexical-parse-float" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f518eed87c3be6debe6d26b855c97358d8a11bf05acec137e5f53080f5ad2dd8" -dependencies = [ - "lexical-parse-integer", - "lexical-util", - "static_assertions", -] - -[[package]] -name = "lexical-parse-integer" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "afc852ec67c6538bbb2b9911116a385b24510e879a69ab516e6a151b15a79168" -dependencies = [ - "lexical-util", - "static_assertions", -] - -[[package]] -name = "lexical-util" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c72a9d52c5c4e62fa2cdc2cb6c694a39ae1382d9c2a17a466f18e272a0930eb1" -dependencies = [ - "static_assertions", -] - -[[package]] -name = "lexical-write-float" -version = "0.8.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a89ec1d062e481210c309b672f73a0567b7855f21e7d2fae636df44d12e97f9" -dependencies = [ - "lexical-util", - "lexical-write-integer", - "static_assertions", -] - -[[package]] -name = "lexical-write-integer" -version = "0.8.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "094060bd2a7c2ff3a16d5304a6ae82727cb3cc9d1c70f813cc73f744c319337e" -dependencies = [ - "lexical-util", - "static_assertions", -] - -[[package]] -name = "libc" -version = "0.2.126" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" - -[[package]] -name = "libmimalloc-sys" -version = "0.1.25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11ca136052550448f55df7898c6dbe651c6b574fe38a0d9ea687a9f8088a2e2c" -dependencies = [ - "cc", -] - -[[package]] -name = "lock_api" -version = "0.4.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "327fa5b6a6940e4699ec49a9beae1ea4845c6bab9314e4f84ac68742139d8c53" -dependencies = [ - "autocfg", - "scopeguard", -] - -[[package]] -name = "log" -version = "0.4.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" -dependencies = [ - "cfg-if", -] - -[[package]] -name = "lz4" -version = "1.23.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4edcb94251b1c375c459e5abe9fb0168c1c826c3370172684844f8f3f8d1a885" -dependencies = [ - "libc", - "lz4-sys", -] - -[[package]] -name = "lz4-sys" -version = "1.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7be8908e2ed6f31c02db8a9fa962f03e36c53fbfde437363eae3306b85d7e17" -dependencies = [ - "cc", - "libc", -] - -[[package]] -name = "md-5" -version = "0.10.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "658646b21e0b72f7866c7038ab086d3d5e1cd6271f060fd37defb241949d0582" -dependencies = [ - "digest", -] - -[[package]] -name = "memchr" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" - -[[package]] -name = "mimalloc" -version = "0.1.29" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f64ad83c969af2e732e907564deb0d0ed393cec4af80776f77dd77a1a427698" -dependencies = [ - "libmimalloc-sys", -] - -[[package]] -name = "miniz_oxide" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2b29bd4bc3f33391105ebee3589c19197c4271e3e5a9ec9bfe8127eeff8f082" -dependencies = [ - "adler", -] - -[[package]] -name = "multiversion" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "025c962a3dd3cc5e0e520aa9c612201d127dcdf28616974961a649dca64f5373" -dependencies = [ - "multiversion-macros", -] - -[[package]] -name = "multiversion-macros" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8a3e2bde382ebf960c1f3e79689fa5941625fe9bf694a1cb64af3e85faff3af" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "num" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43db66d1170d347f9a065114077f7dccb00c1b9478c89384490a3425279a4606" -dependencies = [ - "num-bigint", - "num-complex", - "num-integer", - "num-iter", - "num-rational", - "num-traits", -] - -[[package]] -name = "num-bigint" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f93ab6289c7b344a8a9f60f88d80aa20032336fe78da341afc91c8a2341fc75f" -dependencies = [ - "autocfg", - "num-integer", - "num-traits", -] - -[[package]] -name = "num-complex" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97fbc387afefefd5e9e39493299f3069e14a140dd34dc19b4c1c1a8fddb6a790" -dependencies = [ - "num-traits", -] - -[[package]] -name = "num-integer" -version = "0.1.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" -dependencies = [ - "autocfg", - "num-traits", -] - -[[package]] -name = "num-iter" -version = "0.1.43" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d03e6c028c5dc5cac6e2dec0efda81fc887605bb3d884578bb6d6bf7514e252" -dependencies = [ - "autocfg", - "num-integer", - "num-traits", -] - -[[package]] -name = "num-rational" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d41702bd167c2df5520b384281bc111a4b5efcf7fbc4c9c222c815b07e0a6a6a" -dependencies = [ - "autocfg", - "num-bigint", - "num-integer", - "num-traits", -] - -[[package]] -name = "num-traits" -version = "0.2.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" -dependencies = [ - "autocfg", -] - -[[package]] -name = "num_cpus" -version = "1.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1" -dependencies = [ - "hermit-abi", - "libc", -] - -[[package]] -name = "once_cell" -version = "1.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87f3e037eac156d1775da914196f0f37741a274155e34a0b7e427c35d2a2ecb9" - -[[package]] -name = "ordered-float" -version = "1.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3305af35278dd29f46fcdd139e0b1fbfae2153f0e5928b39b035542dd31e37b7" -dependencies = [ - "num-traits", -] - -[[package]] -name = "ordered-float" -version = "3.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96bcbab4bfea7a59c2c0fe47211a1ac4e3e96bea6eb446d704f310bc5c732ae2" -dependencies = [ - "num-traits", -] - -[[package]] -name = "parking_lot" -version = "0.12.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87f5ec2493a61ac0506c0f4199f99070cbe83857b0337006a30f3e6719b8ef58" -dependencies = [ - "lock_api", - "parking_lot_core", -] - -[[package]] -name = "parking_lot_core" -version = "0.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09a279cbf25cb0757810394fbc1e359949b59e348145c643a939a525692e6929" -dependencies = [ - "cfg-if", - "libc", - "redox_syscall", - "smallvec", - "windows-sys", -] - -[[package]] -name = "parquet" -version = "13.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c6d737baed48775e87a69aa262f1fa2f1d6bd074dedbe9cac244b9aabf2a0b4" -dependencies = [ - "arrow", - "base64", - "brotli", - "byteorder", - "chrono", - "flate2", - "lz4", - "num", - "num-bigint", - "parquet-format", - "rand 0.8.5", - "snap", - "thrift", - "zstd", -] - -[[package]] -name = "parquet-format" -version = "4.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f0c06cdcd5460967c485f9c40a821746f5955ad81990533c7fae95dbd9bc0b5" -dependencies = [ - "thrift", -] - -[[package]] -name = "paste" -version = "1.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c520e05135d6e763148b6426a837e239041653ba7becd2e538c076c738025fc" - -[[package]] -name = "pin-project-lite" -version = "0.2.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116" - -[[package]] -name = "pin-utils" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" - -[[package]] -name = "ppv-lite86" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872" - -[[package]] -name = "proc-macro2" -version = "1.0.39" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c54b25569025b7fc9651de43004ae593a75ad88543b17178aa5e1b9c4f15f56f" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "pyo3" -version = "0.16.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e6302e85060011447471887705bb7838f14aba43fcb06957d823739a496b3dc" -dependencies = [ - "cfg-if", - "indoc", - "libc", - "parking_lot", - "pyo3-build-config", - "pyo3-ffi", - "pyo3-macros", - "unindent", -] - -[[package]] -name = "pyo3-build-config" -version = "0.16.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5b65b546c35d8a3b1b2f0ddbac7c6a569d759f357f2b9df884f5d6b719152c8" -dependencies = [ - "once_cell", - "target-lexicon", -] - -[[package]] -name = "pyo3-ffi" -version = "0.16.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c275a07127c1aca33031a563e384ffdd485aee34ef131116fcd58e3430d1742b" -dependencies = [ - "libc", - "pyo3-build-config", -] - -[[package]] -name = "pyo3-macros" -version = "0.16.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "284fc4485bfbcc9850a6d661d627783f18d19c2ab55880b021671c4ba83e90f7" -dependencies = [ - "proc-macro2", - "pyo3-macros-backend", - "quote", - "syn", -] - -[[package]] -name = "pyo3-macros-backend" -version = "0.16.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53bda0f58f73f5c5429693c96ed57f7abdb38fdfc28ae06da4101a257adb7faf" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "quote" -version = "1.0.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1feb54ed693b93a84e14094943b84b7c4eae204c512b7ccb95ab0c66d278ad1" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "rand" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" -dependencies = [ - "getrandom 0.1.16", - "libc", - "rand_chacha 0.2.2", - "rand_core 0.5.1", - "rand_hc", -] - -[[package]] -name = "rand" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" -dependencies = [ - "libc", - "rand_chacha 0.3.1", - "rand_core 0.6.3", -] - -[[package]] -name = "rand_chacha" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" -dependencies = [ - "ppv-lite86", - "rand_core 0.5.1", -] - -[[package]] -name = "rand_chacha" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" -dependencies = [ - "ppv-lite86", - "rand_core 0.6.3", -] - -[[package]] -name = "rand_core" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" -dependencies = [ - "getrandom 0.1.16", -] - -[[package]] -name = "rand_core" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" -dependencies = [ - "getrandom 0.2.6", -] - -[[package]] -name = "rand_hc" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" -dependencies = [ - "rand_core 0.5.1", -] - -[[package]] -name = "redox_syscall" -version = "0.2.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62f25bc4c7e55e0b0b7a1d43fb893f4fa1361d0abe38b9ce4f323c2adfe6ef42" -dependencies = [ - "bitflags", -] - -[[package]] -name = "regex" -version = "1.5.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a11647b6b25ff05a515cb92c365cec08801e83423a235b51e231e1808747286" -dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax", -] - -[[package]] -name = "regex-automata" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" - -[[package]] -name = "regex-syntax" -version = "0.6.25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" - -[[package]] -name = "remove_dir_all" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" -dependencies = [ - "winapi", -] - -[[package]] -name = "rustversion" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2cc38e8fa666e2de3c4aba7edeb5ffc5246c1c2ed0e3d17e560aeeba736b23f" - -[[package]] -name = "ryu" -version = "1.0.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3f6f92acf49d1b98f7a81226834412ada05458b7364277387724a237f062695" - -[[package]] -name = "scopeguard" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" - -[[package]] -name = "serde" -version = "1.0.137" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61ea8d54c77f8315140a05f4c7237403bf38b72704d031543aa1d16abbf517d1" - -[[package]] -name = "serde_derive" -version = "1.0.137" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f26faba0c3959972377d3b2d306ee9f71faee9714294e41bb777f83f88578be" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "serde_json" -version = "1.0.81" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b7ce2b32a1aed03c558dc61a5cd328f15aff2dbc17daad8fb8af04d2100e15c" -dependencies = [ - "indexmap", - "itoa 1.0.2", - "ryu", - "serde", -] - -[[package]] -name = "sha2" -version = "0.10.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55deaec60f81eefe3cce0dc50bda92d6d8e88f2a27df7c5033b42afeb1ed2676" -dependencies = [ - "cfg-if", - "cpufeatures", - "digest", -] - -[[package]] -name = "slab" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb703cfe953bccee95685111adeedb76fabe4e97549a58d16f03ea7b9367bb32" - -[[package]] -name = "smallvec" -version = "1.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" - -[[package]] -name = "snap" -version = "1.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45456094d1983e2ee2a18fdfebce3189fa451699d0502cb8e3b49dba5ba41451" - -[[package]] -name = "sqlparser" -version = "0.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddc2739f3a9bfc68e2f7b7695589f6cb0181c88af73ceaee0c84215cd2a2ae28" -dependencies = [ - "log", -] - -[[package]] -name = "static_assertions" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" - -[[package]] -name = "strum" -version = "0.23.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cae14b91c7d11c9a851d3fbc80a963198998c2a64eec840477fa92d8ce9b70bb" - -[[package]] -name = "strum_macros" -version = "0.23.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5bb0dc7ee9c15cea6199cde9a127fa16a4c5819af85395457ad72d68edc85a38" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "rustversion", - "syn", -] - -[[package]] -name = "subtle" -version = "2.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601" - -[[package]] -name = "syn" -version = "1.0.95" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbaf6116ab8924f39d52792136fb74fd60a80194cf1b1c6ffa6453eef1c3f942" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "target-lexicon" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7fa7e55043acb85fca6b3c01485a2eeb6b69c5d21002e273c79e465f43b7ac1" - -[[package]] -name = "tempfile" -version = "3.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cdb1ef4eaeeaddc8fbd371e5017057064af0911902ef36b39801f67cc6d79e4" -dependencies = [ - "cfg-if", - "fastrand", - "libc", - "redox_syscall", - "remove_dir_all", - "winapi", -] - -[[package]] -name = "thiserror" -version = "1.0.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd829fe32373d27f76265620b5309d0340cb8550f523c1dda251d6298069069a" -dependencies = [ - "thiserror-impl", -] - -[[package]] -name = "thiserror-impl" -version = "1.0.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0396bc89e626244658bef819e22d0cc459e795a5ebe878e6ec336d1674a8d79a" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "threadpool" -version = "1.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d050e60b33d41c19108b32cea32164033a9013fe3b46cbd4457559bfbf77afaa" -dependencies = [ - "num_cpus", -] - -[[package]] -name = "thrift" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c6d965454947cc7266d22716ebfd07b18d84ebaf35eec558586bbb2a8cb6b5b" -dependencies = [ - "byteorder", - "integer-encoding", - "log", - "ordered-float 1.1.1", - "threadpool", -] - -[[package]] -name = "tokio" -version = "1.18.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4903bf0427cf68dddd5aa6a93220756f8be0c34fcfa9f5e6191e103e15a31395" -dependencies = [ - "num_cpus", - "once_cell", - "parking_lot", - "pin-project-lite", - "tokio-macros", -] - -[[package]] -name = "tokio-macros" -version = "1.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b557f72f448c511a979e2564e55d74e6c4432fc96ff4f6241bc6bded342643b7" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "tokio-stream" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50145484efff8818b5ccd256697f36863f587da82cf8b409c53adf1e840798e3" -dependencies = [ - "futures-core", - "pin-project-lite", - "tokio", -] - -[[package]] -name = "typenum" -version = "1.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987" - -[[package]] -name = "unicode-ident" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d22af068fba1eb5edcb4aea19d382b2a3deb4c8f9d475c589b6ada9e0fd493ee" - -[[package]] -name = "unicode-segmentation" -version = "1.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e8820f5d777f6224dc4be3632222971ac30164d4a258d595640799554ebfd99" - -[[package]] -name = "unicode-width" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973" - -[[package]] -name = "unindent" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52fee519a3e570f7df377a06a1a7775cdbfb7aa460be7e08de2b1f0e69973a44" - -[[package]] -name = "uuid" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7" -dependencies = [ - "getrandom 0.2.6", -] - -[[package]] -name = "uuid" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8cfcd319456c4d6ea10087ed423473267e1a071f3bc0aa89f80d60997843c6f0" -dependencies = [ - "getrandom 0.2.6", -] - -[[package]] -name = "version_check" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" - -[[package]] -name = "wasi" -version = "0.9.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" - -[[package]] -name = "wasi" -version = "0.10.2+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" - -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - -[[package]] -name = "windows-sys" -version = "0.36.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2" -dependencies = [ - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_msvc", -] - -[[package]] -name = "windows_aarch64_msvc" -version = "0.36.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47" - -[[package]] -name = "windows_i686_gnu" -version = "0.36.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6" - -[[package]] -name = "windows_i686_msvc" -version = "0.36.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.36.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.36.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" - -[[package]] -name = "zstd" -version = "0.11.2+zstd.1.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4" -dependencies = [ - "zstd-safe", -] - -[[package]] -name = "zstd-safe" -version = "5.0.2+zstd.1.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d2a5585e04f9eea4b2a3d1eca508c4dee9592a89ef6f450c11719da0726f4db" -dependencies = [ - "libc", - "zstd-sys", -] - -[[package]] -name = "zstd-sys" -version = "2.0.1+zstd.1.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fd07cbbc53846d9145dbffdf6dd09a7a0aa52be46741825f5c97bdd4f73f12b" -dependencies = [ - "cc", - "libc", -] diff --git a/ballista-python/Cargo.toml b/ballista-python/Cargo.toml deleted file mode 100644 index 0c3a42c3d..000000000 --- a/ballista-python/Cargo.toml +++ /dev/null @@ -1,51 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[package] -name = "ballista-python" -version = "0.6.0" -homepage = "https://github.com/apache/arrow" -repository = "https://github.com/apache/arrow" -authors = ["Apache Arrow "] -description = "Build and run queries against data" -readme = "README.md" -license = "Apache-2.0" -edition = "2021" -rust-version = "1.57" - -[package.metadata.maturin] -name = "ballista._internal" - -[dependencies] -ballista = { git = "https://github.com/apache/arrow-ballista", rev = "9e78b8ecd55cc3e9d46387c87098c4e6625294eb" } -datafusion = { git = "https://github.com/apache/arrow-datafusion", rev = "585bc3a629b92ea7a86ebfe8bf762dbef4155710", features = ["pyarrow"] } -datafusion-common = { git = "https://github.com/apache/arrow-datafusion", rev = "585bc3a629b92ea7a86ebfe8bf762dbef4155710" } -datafusion-expr = { git = "https://github.com/apache/arrow-datafusion", rev = "585bc3a629b92ea7a86ebfe8bf762dbef4155710" } - -mimalloc = { version = "*", default-features = false } -pyo3 = { version = "~0.16.5", features = ["extension-module", "abi3", "abi3-py37"] } -rand = "0.7" -tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync"] } -uuid = { version = "0.8", features = ["v4"] } - -[lib] -crate-type = ["cdylib", "rlib"] -name = "ballista_python" - -[profile.release] -codegen-units = 1 -lto = true diff --git a/ballista-python/LICENSE.txt b/ballista-python/LICENSE.txt deleted file mode 100644 index d64569567..000000000 --- a/ballista-python/LICENSE.txt +++ /dev/null @@ -1,202 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/ballista-python/README.md b/ballista-python/README.md deleted file mode 100644 index a9d7b873e..000000000 --- a/ballista-python/README.md +++ /dev/null @@ -1,105 +0,0 @@ - - -# Ballista in Python - -[![Python test](https://github.com/datafusion-contrib/datafusion-python/actions/workflows/test.yaml/badge.svg)](https://github.com/datafusion-contrib/datafusion-python/actions/workflows/test.yaml) - -This is a Python library that binds to [Apache Arrow](https://arrow.apache.org/) in-memory query engine [DataFusion](https://github.com/apache/arrow-datafusion). - -Like pyspark, it allows you to build a plan through SQL or a DataFrame API against in-memory data, parquet or CSV files, run it in a multi-threaded environment, and obtain the result back in Python. - -The major advantage of this library over other execution engines is that this library achieves zero-copy between Python and its execution engine: there is no cost in using UDFs, UDAFs, and collecting the results to Python apart from having to lock the GIL when running those operations. - -Its query engine, DataFusion, is written in [Rust](https://www.rust-lang.org/), which makes strong assumptions about thread safety and lack of memory leaks. - -Technically, zero-copy is achieved via the [c data interface](https://arrow.apache.org/docs/format/CDataInterface.html). - -## How to use it - -Simple usage: - -```python -import ballista -ctx = ballista.BallistaContext(host="host.docker.internal") -ctx.register_parquet("table", "/data") -df = ctx.sql("SELECT * FROM table LIMIT 10") -df.show() -``` - -## How to install (from pip) - -```bash -pip install ballista -# or -python -m pip install ballista -``` - -You can verify the installation by running: - -```python ->>> import ballista ->>> ballista.__version__ -'0.6.0' -``` - -## How to develop - -This assumes that you have rust and cargo installed. We use the workflow recommended by [pyo3](https://github.com/PyO3/pyo3) and [maturin](https://github.com/PyO3/maturin). - -Bootstrap: - -```bash -# fetch this repo -git clone git@github.com:datafusion-contrib/datafusion-python.git -# prepare development environment (used to build wheel / install in development) -python3 -m venv venv -# activate the venv -source venv/bin/activate -# update pip itself if necessary -python -m pip install -U pip -# install dependencies (for Python 3.8+) -python -m pip install -r requirements-310.txt -``` - -Whenever rust code changes (your changes or via `git pull`): - -```bash -# make sure you activate the venv using "source venv/bin/activate" first -maturin develop -python -m pytest -``` - -## How to update dependencies - -To change test dependencies, change the `requirements.in` and run - -```bash -# install pip-tools (this can be done only once), also consider running in venv -python -m pip install pip-tools -python -m piptools compile --generate-hashes -o requirements-310.txt -``` - -To update dependencies, run with `-U` - -```bash -python -m piptools compile -U --generate-hashes -o requirements-310.txt -``` - -More details [here](https://github.com/jazzband/pip-tools) diff --git a/ballista-python/ballista/__init__.py b/ballista-python/ballista/__init__.py deleted file mode 100644 index 96108f9c6..000000000 --- a/ballista-python/ballista/__init__.py +++ /dev/null @@ -1,120 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from abc import ABCMeta, abstractmethod -from typing import List - -try: - import importlib.metadata as importlib_metadata -except ImportError: - import importlib_metadata - - -import datafusion - -import pyarrow as pa - -from ._internal import ( - DataFrame, - BallistaContext, - Expression, -) - - -__version__ = importlib_metadata.version(__name__) - - -__all__ = [ - "DataFrame", - "BallistaContext", - "Expression", - "AggregateUDF", - "ScalarUDF", - "column", - "literal", -] - - -class Accumulator(metaclass=ABCMeta): - @abstractmethod - def state(self) -> List[pa.Scalar]: - pass - - @abstractmethod - def update(self, values: pa.Array) -> None: - pass - - @abstractmethod - def merge(self, states: pa.Array) -> None: - pass - - @abstractmethod - def evaluate(self) -> pa.Scalar: - pass - - -def column(value): - return Expression.column(value) - - -col = column - - -def literal(value): - if not isinstance(value, pa.Scalar): - value = pa.scalar(value) - return Expression.literal(value) - - -lit = literal - - -def udf(func, input_types, return_type, volatility, name=None): - """ - Create a new User Defined Function - """ - if not callable(func): - raise TypeError("`func` argument must be callable") - if name is None: - name = func.__qualname__ - return datafusion.ScalarUDF( - name=name, - func=func, - input_types=input_types, - return_type=return_type, - volatility=volatility, - ) - - -def udaf(accum, input_type, return_type, state_type, volatility, name=None): - """ - Create a new User Defined Aggregate Function - """ - if not issubclass(accum, Accumulator): - raise TypeError( - "`accum` must implement the abstract base class Accumulator" - ) - if name is None: - name = accum.__qualname__ - return datafusion.AggregateUDF( - name=name, - accumulator=accum, - input_type=input_type, - return_type=return_type, - state_type=state_type, - volatility=volatility, - ) diff --git a/ballista-python/ballista/functions.py b/ballista-python/ballista/functions.py deleted file mode 100644 index 782ecba22..000000000 --- a/ballista-python/ballista/functions.py +++ /dev/null @@ -1,23 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - - -from ._internal import functions - - -def __getattr__(name): - return getattr(functions, name) diff --git a/ballista-python/ballista/tests/__init__.py b/ballista-python/ballista/tests/__init__.py deleted file mode 100644 index 13a83393a..000000000 --- a/ballista-python/ballista/tests/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. diff --git a/ballista-python/ballista/tests/conftest.py b/ballista-python/ballista/tests/conftest.py deleted file mode 100644 index 93662a082..000000000 --- a/ballista-python/ballista/tests/conftest.py +++ /dev/null @@ -1,33 +0,0 @@ -import pytest -from datafusion import SessionContext -import pyarrow as pa - - -@pytest.fixture -def ctx(): - return SessionContext() - - -@pytest.fixture -def database(ctx, tmp_path): - path = tmp_path / "test.csv" - - table = pa.Table.from_arrays( - [ - [1, 2, 3, 4], - ["a", "b", "c", "d"], - [1.1, 2.2, 3.3, 4.4], - ], - names=["int", "str", "float"], - ) - pa.csv.write_csv(table, path) - - ctx.register_csv("csv", path) - ctx.register_csv("csv1", str(path)) - ctx.register_csv( - "csv2", - path, - has_header=True, - delimiter=",", - schema_infer_max_records=10, - ) diff --git a/ballista-python/ballista/tests/generic.py b/ballista-python/ballista/tests/generic.py deleted file mode 100644 index 1f984a40a..000000000 --- a/ballista-python/ballista/tests/generic.py +++ /dev/null @@ -1,87 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import datetime - -import numpy as np -import pyarrow as pa -import pyarrow.csv - -# used to write parquet files -import pyarrow.parquet as pq - - -def data(): - np.random.seed(1) - data = np.concatenate( - [ - np.random.normal(0, 0.01, size=50), - np.random.normal(50, 0.01, size=50), - ] - ) - return pa.array(data) - - -def data_with_nans(): - np.random.seed(0) - data = np.random.normal(0, 0.01, size=50) - mask = np.random.randint(0, 2, size=50) - data[mask == 0] = np.NaN - return data - - -def data_datetime(f): - data = [ - datetime.datetime.now(), - datetime.datetime.now() - datetime.timedelta(days=1), - datetime.datetime.now() + datetime.timedelta(days=1), - ] - return pa.array( - data, type=pa.timestamp(f), mask=np.array([False, True, False]) - ) - - -def data_date32(): - data = [ - datetime.date(2000, 1, 1), - datetime.date(1980, 1, 1), - datetime.date(2030, 1, 1), - ] - return pa.array( - data, type=pa.date32(), mask=np.array([False, True, False]) - ) - - -def data_timedelta(f): - data = [ - datetime.timedelta(days=100), - datetime.timedelta(days=1), - datetime.timedelta(seconds=1), - ] - return pa.array( - data, type=pa.duration(f), mask=np.array([False, True, False]) - ) - - -def data_binary_other(): - return np.array([1, 0, 0], dtype="u4") - - -def write_parquet(path, data): - table = pa.Table.from_arrays([data], names=["a"]) - pq.write_table(table, path) - return str(path) diff --git a/ballista-python/ballista/tests/test_aggregation.py b/ballista-python/ballista/tests/test_aggregation.py deleted file mode 100644 index bba1ed41d..000000000 --- a/ballista-python/ballista/tests/test_aggregation.py +++ /dev/null @@ -1,48 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import pyarrow as pa -import pytest - -from datafusion import SessionContext, column -from datafusion import functions as f - - -@pytest.fixture -def df(): - ctx = SessionContext() - - # create a RecordBatch and a new DataFrame from it - batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2, 3]), pa.array([4, 4, 6])], - names=["a", "b"], - ) - return ctx.create_dataframe([[batch]]) - - -def test_built_in_aggregation(df): - col_a = column("a") - col_b = column("b") - df = df.aggregate( - [], - [f.max(col_a), f.min(col_a), f.count(col_a), f.approx_distinct(col_b)], - ) - result = df.collect()[0] - assert result.column(0) == pa.array([3]) - assert result.column(1) == pa.array([1]) - assert result.column(2) == pa.array([3], type=pa.uint64()) - assert result.column(3) == pa.array([2], type=pa.uint64()) diff --git a/ballista-python/ballista/tests/test_catalog.py b/ballista-python/ballista/tests/test_catalog.py deleted file mode 100644 index a9bdf72b3..000000000 --- a/ballista-python/ballista/tests/test_catalog.py +++ /dev/null @@ -1,40 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import pyarrow as pa -import pytest - - -def test_basic(ctx, database): - with pytest.raises(KeyError): - ctx.catalog("non-existent") - - default = ctx.catalog() - assert default.names() == ["public"] - - for database in [default.database("public"), default.database()]: - assert database.names() == {"csv1", "csv", "csv2"} - - table = database.table("csv") - assert table.kind == "physical" - assert table.schema == pa.schema( - [ - pa.field("int", pa.int64(), nullable=False), - pa.field("str", pa.string(), nullable=False), - pa.field("float", pa.float64(), nullable=False), - ] - ) diff --git a/ballista-python/ballista/tests/test_context.py b/ballista-python/ballista/tests/test_context.py deleted file mode 100644 index 4d4a38c9d..000000000 --- a/ballista-python/ballista/tests/test_context.py +++ /dev/null @@ -1,74 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import pyarrow as pa - - -def test_register_record_batches(ctx): - # create a RecordBatch and register it as memtable - batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2, 3]), pa.array([4, 5, 6])], - names=["a", "b"], - ) - - ctx.register_record_batches("t", [[batch]]) - - assert ctx.tables() == {"t"} - - result = ctx.sql("SELECT a+b, a-b FROM t").collect() - - assert result[0].column(0) == pa.array([5, 7, 9]) - assert result[0].column(1) == pa.array([-3, -3, -3]) - - -def test_create_dataframe_registers_unique_table_name(ctx): - # create a RecordBatch and register it as memtable - batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2, 3]), pa.array([4, 5, 6])], - names=["a", "b"], - ) - - df = ctx.create_dataframe([[batch]]) - tables = list(ctx.tables()) - - assert df - assert len(tables) == 1 - assert len(tables[0]) == 33 - assert tables[0].startswith("c") - # ensure that the rest of the table name contains - # only hexadecimal numbers - for c in tables[0][1:]: - assert c in "0123456789abcdef" - - -def test_register_table(ctx, database): - default = ctx.catalog() - public = default.database("public") - assert public.names() == {"csv", "csv1", "csv2"} - table = public.table("csv") - - ctx.register_table("csv3", table) - assert public.names() == {"csv", "csv1", "csv2", "csv3"} - - -def test_deregister_table(ctx, database): - default = ctx.catalog() - public = default.database("public") - assert public.names() == {"csv", "csv1", "csv2"} - - ctx.deregister_table("csv") - assert public.names() == {"csv1", "csv2"} diff --git a/ballista-python/ballista/tests/test_dataframe.py b/ballista-python/ballista/tests/test_dataframe.py deleted file mode 100644 index 43c260ae2..000000000 --- a/ballista-python/ballista/tests/test_dataframe.py +++ /dev/null @@ -1,199 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import pyarrow as pa -import pytest - -from datafusion import functions as f -from datafusion import DataFrame, SessionContext, column, literal, udf - - -@pytest.fixture -def df(): - ctx = SessionContext() - - # create a RecordBatch and a new DataFrame from it - batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2, 3]), pa.array([4, 5, 6])], - names=["a", "b"], - ) - - return ctx.create_dataframe([[batch]]) - - -@pytest.fixture -def struct_df(): - ctx = SessionContext() - - # create a RecordBatch and a new DataFrame from it - batch = pa.RecordBatch.from_arrays( - [pa.array([{"c": 1}, {"c": 2}, {"c": 3}]), pa.array([4, 5, 6])], - names=["a", "b"], - ) - - return ctx.create_dataframe([[batch]]) - - -def test_select(df): - df = df.select( - column("a") + column("b"), - column("a") - column("b"), - ) - - # execute and collect the first (and only) batch - result = df.collect()[0] - - assert result.column(0) == pa.array([5, 7, 9]) - assert result.column(1) == pa.array([-3, -3, -3]) - - -def test_select_columns(df): - df = df.select_columns("b", "a") - - # execute and collect the first (and only) batch - result = df.collect()[0] - - assert result.column(0) == pa.array([4, 5, 6]) - assert result.column(1) == pa.array([1, 2, 3]) - - -def test_filter(df): - df = df.select( - column("a") + column("b"), - column("a") - column("b"), - ).filter(column("a") > literal(2)) - - # execute and collect the first (and only) batch - result = df.collect()[0] - - assert result.column(0) == pa.array([9]) - assert result.column(1) == pa.array([-3]) - - -def test_sort(df): - df = df.sort(column("b").sort(ascending=False)) - - table = pa.Table.from_batches(df.collect()) - expected = {"a": [3, 2, 1], "b": [6, 5, 4]} - - assert table.to_pydict() == expected - - -def test_limit(df): - df = df.limit(1) - - # execute and collect the first (and only) batch - result = df.collect()[0] - - assert len(result.column(0)) == 1 - assert len(result.column(1)) == 1 - - -def test_udf(df): - # is_null is a pa function over arrays - is_null = udf( - lambda x: x.is_null(), - [pa.int64()], - pa.bool_(), - volatility="immutable", - ) - - df = df.select(is_null(column("a"))) - result = df.collect()[0].column(0) - - assert result == pa.array([False, False, False]) - - -def test_join(): - ctx = SessionContext() - - batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2, 3]), pa.array([4, 5, 6])], - names=["a", "b"], - ) - df = ctx.create_dataframe([[batch]]) - - batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2]), pa.array([8, 10])], - names=["a", "c"], - ) - df1 = ctx.create_dataframe([[batch]]) - - df = df.join(df1, join_keys=(["a"], ["a"]), how="inner") - df = df.sort(column("a").sort(ascending=True)) - table = pa.Table.from_batches(df.collect()) - - expected = {"a": [1, 2], "c": [8, 10], "b": [4, 5]} - assert table.to_pydict() == expected - - -def test_window_lead(df): - df = df.select( - column("a"), - f.alias( - f.window( - "lead", [column("b")], order_by=[f.order_by(column("b"))] - ), - "a_next", - ), - ) - - table = pa.Table.from_batches(df.collect()) - - expected = {"a": [1, 2, 3], "a_next": [5, 6, None]} - assert table.to_pydict() == expected - - -def test_get_dataframe(tmp_path): - ctx = SessionContext() - - path = tmp_path / "test.csv" - table = pa.Table.from_arrays( - [ - [1, 2, 3, 4], - ["a", "b", "c", "d"], - [1.1, 2.2, 3.3, 4.4], - ], - names=["int", "str", "float"], - ) - pa.csv.write_csv(table, path) - - ctx.register_csv("csv", path) - - df = ctx.table("csv") - assert isinstance(df, DataFrame) - - -def test_struct_select(struct_df): - df = struct_df.select( - column("a")["c"] + column("b"), - column("a")["c"] - column("b"), - ) - - # execute and collect the first (and only) batch - result = df.collect()[0] - - assert result.column(0) == pa.array([5, 7, 9]) - assert result.column(1) == pa.array([-3, -3, -3]) - - -def test_explain(df): - df = df.select( - column("a") + column("b"), - column("a") - column("b"), - ) - df.explain() diff --git a/ballista-python/ballista/tests/test_functions.py b/ballista-python/ballista/tests/test_functions.py deleted file mode 100644 index daa2f1967..000000000 --- a/ballista-python/ballista/tests/test_functions.py +++ /dev/null @@ -1,219 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import numpy as np -import pyarrow as pa -import pytest - -from datafusion import SessionContext, column -from datafusion import functions as f -from datafusion import literal - - -@pytest.fixture -def df(): - ctx = SessionContext() - # create a RecordBatch and a new DataFrame from it - batch = pa.RecordBatch.from_arrays( - [pa.array(["Hello", "World", "!"]), pa.array([4, 5, 6])], - names=["a", "b"], - ) - return ctx.create_dataframe([[batch]]) - - -def test_literal(df): - df = df.select( - literal(1), - literal("1"), - literal("OK"), - literal(3.14), - literal(True), - literal(b"hello world"), - ) - result = df.collect() - assert len(result) == 1 - result = result[0] - assert result.column(0) == pa.array([1] * 3) - assert result.column(1) == pa.array(["1"] * 3) - assert result.column(2) == pa.array(["OK"] * 3) - assert result.column(3) == pa.array([3.14] * 3) - assert result.column(4) == pa.array([True] * 3) - assert result.column(5) == pa.array([b"hello world"] * 3) - - -def test_lit_arith(df): - """ - Test literals with arithmetic operations - """ - df = df.select( - literal(1) + column("b"), f.concat(column("a"), literal("!")) - ) - result = df.collect() - assert len(result) == 1 - result = result[0] - assert result.column(0) == pa.array([5, 6, 7]) - assert result.column(1) == pa.array(["Hello!", "World!", "!!"]) - - -def test_math_functions(): - ctx = SessionContext() - # create a RecordBatch and a new DataFrame from it - batch = pa.RecordBatch.from_arrays( - [pa.array([0.1, -0.7, 0.55])], names=["value"] - ) - df = ctx.create_dataframe([[batch]]) - - values = np.array([0.1, -0.7, 0.55]) - col_v = column("value") - df = df.select( - f.abs(col_v), - f.sin(col_v), - f.cos(col_v), - f.tan(col_v), - f.asin(col_v), - f.acos(col_v), - f.exp(col_v), - f.ln(col_v + literal(pa.scalar(1))), - f.log2(col_v + literal(pa.scalar(1))), - f.log10(col_v + literal(pa.scalar(1))), - f.random(), - ) - batches = df.collect() - assert len(batches) == 1 - result = batches[0] - - np.testing.assert_array_almost_equal(result.column(0), np.abs(values)) - np.testing.assert_array_almost_equal(result.column(1), np.sin(values)) - np.testing.assert_array_almost_equal(result.column(2), np.cos(values)) - np.testing.assert_array_almost_equal(result.column(3), np.tan(values)) - np.testing.assert_array_almost_equal(result.column(4), np.arcsin(values)) - np.testing.assert_array_almost_equal(result.column(5), np.arccos(values)) - np.testing.assert_array_almost_equal(result.column(6), np.exp(values)) - np.testing.assert_array_almost_equal( - result.column(7), np.log(values + 1.0) - ) - np.testing.assert_array_almost_equal( - result.column(8), np.log2(values + 1.0) - ) - np.testing.assert_array_almost_equal( - result.column(9), np.log10(values + 1.0) - ) - np.testing.assert_array_less(result.column(10), np.ones_like(values)) - - -def test_string_functions(df): - df = df.select(f.md5(column("a")), f.lower(column("a"))) - result = df.collect() - assert len(result) == 1 - result = result[0] - assert result.column(0) == pa.array( - [ - "8b1a9953c4611296a827abf8c47804d7", - "f5a7924e621e84c9280a9a27e1bcb7f6", - "9033e0e305f247c0c3c80d0c7848c8b3", - ] - ) - assert result.column(1) == pa.array(["hello", "world", "!"]) - - -def test_hash_functions(df): - exprs = [ - f.digest(column("a"), literal(m)) - for m in ("md5", "sha256", "sha512", "blake2s", "blake3") - ] - df = df.select(*exprs) - result = df.collect() - assert len(result) == 1 - result = result[0] - b = bytearray.fromhex - assert result.column(0) == pa.array( - [ - b("8B1A9953C4611296A827ABF8C47804D7"), - b("F5A7924E621E84C9280A9A27E1BCB7F6"), - b("9033E0E305F247C0C3C80D0C7848C8B3"), - ] - ) - assert result.column(1) == pa.array( - [ - b( - "185F8DB32271FE25F561A6FC938B2E26" - "4306EC304EDA518007D1764826381969" - ), - b( - "78AE647DC5544D227130A0682A51E30B" - "C7777FBB6D8A8F17007463A3ECD1D524" - ), - b( - "BB7208BC9B5D7C04F1236A82A0093A5E" - "33F40423D5BA8D4266F7092C3BA43B62" - ), - ] - ) - assert result.column(2) == pa.array( - [ - b( - "3615F80C9D293ED7402687F94B22D58E" - "529B8CC7916F8FAC7FDDF7FBD5AF4CF7" - "77D3D795A7A00A16BF7E7F3FB9561EE9" - "BAAE480DA9FE7A18769E71886B03F315" - ), - b( - "8EA77393A42AB8FA92500FB077A9509C" - "C32BC95E72712EFA116EDAF2EDFAE34F" - "BB682EFDD6C5DD13C117E08BD4AAEF71" - "291D8AACE2F890273081D0677C16DF0F" - ), - b( - "3831A6A6155E509DEE59A7F451EB3532" - "4D8F8F2DF6E3708894740F98FDEE2388" - "9F4DE5ADB0C5010DFB555CDA77C8AB5D" - "C902094C52DE3278F35A75EBC25F093A" - ), - ] - ) - assert result.column(3) == pa.array( - [ - b( - "F73A5FBF881F89B814871F46E26AD3FA" - "37CB2921C5E8561618639015B3CCBB71" - ), - b( - "B792A0383FB9E7A189EC150686579532" - "854E44B71AC394831DAED169BA85CCC5" - ), - b( - "27988A0E51812297C77A433F63523334" - "6AEE29A829DCF4F46E0F58F402C6CFCB" - ), - ] - ) - assert result.column(4) == pa.array( - [ - b( - "FBC2B0516EE8744D293B980779178A35" - "08850FDCFE965985782C39601B65794F" - ), - b( - "BF73D18575A736E4037D45F9E316085B" - "86C19BE6363DE6AA789E13DEAACC1C4E" - ), - b( - "C8D11B9F7237E4034ADBCD2005735F9B" - "C4C597C75AD89F4492BEC8F77D15F7EB" - ), - ] - ) diff --git a/ballista-python/ballista/tests/test_imports.py b/ballista-python/ballista/tests/test_imports.py deleted file mode 100644 index 9522bc31a..000000000 --- a/ballista-python/ballista/tests/test_imports.py +++ /dev/null @@ -1,69 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import pytest - -import ballista -from ballista import ( - AggregateUDF, - DataFrame, - SessionContext, - Expression, - ScalarUDF, - functions, -) - - -def test_import_ballista(): - assert ballista.__name__ == "ballista" - - -def test_ballista_python_version(): - assert ballista.__version__ is not None - - -def test_class_module_is_ballista(): - for klass in [ - SessionContext, - Expression, - DataFrame, - ScalarUDF, - AggregateUDF, - ]: - assert klass.__module__ == "ballista" - - -def test_import_from_functions_submodule(): - from ballista.functions import abs, sin # noqa - - assert functions.abs is abs - assert functions.sin is sin - - msg = "cannot import name 'foobar' from 'ballista.functions'" - with pytest.raises(ImportError, match=msg): - from ballista.functions import foobar # noqa - - -def test_classes_are_inheritable(): - class MyExecContext(SessionContext): - pass - - class MyExpression(Expression): - pass - - class MyDataFrame(DataFrame): - pass diff --git a/ballista-python/ballista/tests/test_indexing.py b/ballista-python/ballista/tests/test_indexing.py deleted file mode 100644 index 9e65c0efd..000000000 --- a/ballista-python/ballista/tests/test_indexing.py +++ /dev/null @@ -1,55 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import pyarrow as pa -import pytest - -from datafusion import SessionContext - - -@pytest.fixture -def df(): - ctx = SessionContext() - - # create a RecordBatch and a new DataFrame from it - batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2, 3]), pa.array([4, 4, 6])], - names=["a", "b"], - ) - return ctx.create_dataframe([[batch]]) - - -def test_indexing(df): - assert df["a"] is not None - assert df["a", "b"] is not None - assert df[("a", "b")] is not None - assert df[["a"]] is not None - - -def test_err(df): - with pytest.raises(Exception) as e_info: - df["c"] - - assert "Schema error: No field named 'c'" in e_info.value.args[0] - - with pytest.raises(Exception) as e_info: - df[1] - - assert ( - "DataFrame can only be indexed by string index or indices" - in e_info.value.args[0] - ) diff --git a/ballista-python/ballista/tests/test_sql.py b/ballista-python/ballista/tests/test_sql.py deleted file mode 100644 index cde5425a8..000000000 --- a/ballista-python/ballista/tests/test_sql.py +++ /dev/null @@ -1,245 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import numpy as np -import pyarrow as pa -import pytest - -from datafusion import udf - -from . import generic as helpers - - -def test_no_table(ctx): - with pytest.raises(Exception, match="DataFusion error"): - ctx.sql("SELECT a FROM b").collect() - - -def test_register_csv(ctx, tmp_path): - path = tmp_path / "test.csv" - - table = pa.Table.from_arrays( - [ - [1, 2, 3, 4], - ["a", "b", "c", "d"], - [1.1, 2.2, 3.3, 4.4], - ], - names=["int", "str", "float"], - ) - pa.csv.write_csv(table, path) - - ctx.register_csv("csv", path) - ctx.register_csv("csv1", str(path)) - ctx.register_csv( - "csv2", - path, - has_header=True, - delimiter=",", - schema_infer_max_records=10, - ) - alternative_schema = pa.schema( - [ - ("some_int", pa.int16()), - ("some_bytes", pa.string()), - ("some_floats", pa.float32()), - ] - ) - ctx.register_csv("csv3", path, schema=alternative_schema) - - assert ctx.tables() == {"csv", "csv1", "csv2", "csv3"} - - for table in ["csv", "csv1", "csv2"]: - result = ctx.sql(f"SELECT COUNT(int) AS cnt FROM {table}").collect() - result = pa.Table.from_batches(result) - assert result.to_pydict() == {"cnt": [4]} - - result = ctx.sql("SELECT * FROM csv3").collect() - result = pa.Table.from_batches(result) - assert result.schema == alternative_schema - - with pytest.raises( - ValueError, match="Delimiter must be a single character" - ): - ctx.register_csv("csv4", path, delimiter="wrong") - - -def test_register_parquet(ctx, tmp_path): - path = helpers.write_parquet(tmp_path / "a.parquet", helpers.data()) - ctx.register_parquet("t", path) - assert ctx.tables() == {"t"} - - result = ctx.sql("SELECT COUNT(a) AS cnt FROM t").collect() - result = pa.Table.from_batches(result) - assert result.to_pydict() == {"cnt": [100]} - - -def test_execute(ctx, tmp_path): - data = [1, 1, 2, 2, 3, 11, 12] - - # single column, "a" - path = helpers.write_parquet(tmp_path / "a.parquet", pa.array(data)) - ctx.register_parquet("t", path) - - assert ctx.tables() == {"t"} - - # count - result = ctx.sql("SELECT COUNT(a) AS cnt FROM t").collect() - - expected = pa.array([7], pa.uint64()) - expected = [pa.RecordBatch.from_arrays([expected], ["cnt"])] - assert result == expected - - # where - expected = pa.array([2], pa.uint64()) - expected = [pa.RecordBatch.from_arrays([expected], ["cnt"])] - result = ctx.sql("SELECT COUNT(a) AS cnt FROM t WHERE a > 10").collect() - assert result == expected - - # group by - results = ctx.sql( - "SELECT CAST(a as int) AS a, COUNT(a) AS cnt FROM t GROUP BY a" - ).collect() - - # group by returns batches - result_keys = [] - result_values = [] - for result in results: - pydict = result.to_pydict() - result_keys.extend(pydict["a"]) - result_values.extend(pydict["cnt"]) - - result_keys, result_values = ( - list(t) for t in zip(*sorted(zip(result_keys, result_values))) - ) - - assert result_keys == [1, 2, 3, 11, 12] - assert result_values == [2, 2, 1, 1, 1] - - # order by - result = ctx.sql( - "SELECT a, CAST(a AS int) AS a_int FROM t ORDER BY a DESC LIMIT 2" - ).collect() - expected_a = pa.array([50.0219, 50.0152], pa.float64()) - expected_cast = pa.array([50, 50], pa.int32()) - expected = [ - pa.RecordBatch.from_arrays([expected_a, expected_cast], ["a", "a_int"]) - ] - np.testing.assert_equal(expected[0].column(1), expected[0].column(1)) - - -def test_cast(ctx, tmp_path): - """ - Verify that we can cast - """ - path = helpers.write_parquet(tmp_path / "a.parquet", helpers.data()) - ctx.register_parquet("t", path) - - valid_types = [ - "smallint", - "int", - "bigint", - "float(32)", - "float(64)", - "float", - ] - - select = ", ".join( - [f"CAST(9 AS {t}) AS A{i}" for i, t in enumerate(valid_types)] - ) - - # can execute, which implies that we can cast - ctx.sql(f"SELECT {select} FROM t").collect() - - -@pytest.mark.parametrize( - ("fn", "input_types", "output_type", "input_values", "expected_values"), - [ - ( - lambda x: x, - [pa.float64()], - pa.float64(), - [-1.2, None, 1.2], - [-1.2, None, 1.2], - ), - ( - lambda x: x.is_null(), - [pa.float64()], - pa.bool_(), - [-1.2, None, 1.2], - [False, True, False], - ), - ], -) -def test_udf( - ctx, tmp_path, fn, input_types, output_type, input_values, expected_values -): - # write to disk - path = helpers.write_parquet( - tmp_path / "a.parquet", pa.array(input_values) - ) - ctx.register_parquet("t", path) - - func = udf( - fn, input_types, output_type, name="func", volatility="immutable" - ) - ctx.register_udf(func) - - batches = ctx.sql("SELECT func(a) AS tt FROM t").collect() - result = batches[0].column(0) - - assert result == pa.array(expected_values) - - -_null_mask = np.array([False, True, False]) - - -@pytest.mark.parametrize( - "arr", - [ - pa.array(["a", "b", "c"], pa.utf8(), _null_mask), - pa.array(["a", "b", "c"], pa.large_utf8(), _null_mask), - pa.array([b"1", b"2", b"3"], pa.binary(), _null_mask), - pa.array([b"1111", b"2222", b"3333"], pa.large_binary(), _null_mask), - pa.array([False, True, True], None, _null_mask), - pa.array([0, 1, 2], None), - helpers.data_binary_other(), - helpers.data_date32(), - helpers.data_with_nans(), - # C data interface missing - pytest.param( - pa.array([b"1111", b"2222", b"3333"], pa.binary(4), _null_mask), - marks=pytest.mark.xfail, - ), - pytest.param(helpers.data_datetime("s"), marks=pytest.mark.xfail), - pytest.param(helpers.data_datetime("ms"), marks=pytest.mark.xfail), - pytest.param(helpers.data_datetime("us"), marks=pytest.mark.xfail), - pytest.param(helpers.data_datetime("ns"), marks=pytest.mark.xfail), - # Not writtable to parquet - pytest.param(helpers.data_timedelta("s"), marks=pytest.mark.xfail), - pytest.param(helpers.data_timedelta("ms"), marks=pytest.mark.xfail), - pytest.param(helpers.data_timedelta("us"), marks=pytest.mark.xfail), - pytest.param(helpers.data_timedelta("ns"), marks=pytest.mark.xfail), - ], -) -def test_simple_select(ctx, tmp_path, arr): - path = helpers.write_parquet(tmp_path / "a.parquet", arr) - ctx.register_parquet("t", path) - - batches = ctx.sql("SELECT a AS tt FROM t").collect() - result = batches[0].column(0) - - np.testing.assert_equal(result, arr) diff --git a/ballista-python/ballista/tests/test_udaf.py b/ballista-python/ballista/tests/test_udaf.py deleted file mode 100644 index c2b29d199..000000000 --- a/ballista-python/ballista/tests/test_udaf.py +++ /dev/null @@ -1,136 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from typing import List - -import pyarrow as pa -import pyarrow.compute as pc -import pytest - -from datafusion import Accumulator, SessionContext, column, udaf - - -class Summarize(Accumulator): - """ - Interface of a user-defined accumulation. - """ - - def __init__(self): - self._sum = pa.scalar(0.0) - - def state(self) -> List[pa.Scalar]: - return [self._sum] - - def update(self, values: pa.Array) -> None: - # Not nice since pyarrow scalars can't be summed yet. - # This breaks on `None` - self._sum = pa.scalar(self._sum.as_py() + pc.sum(values).as_py()) - - def merge(self, states: pa.Array) -> None: - # Not nice since pyarrow scalars can't be summed yet. - # This breaks on `None` - self._sum = pa.scalar(self._sum.as_py() + pc.sum(states).as_py()) - - def evaluate(self) -> pa.Scalar: - return self._sum - - -class NotSubclassOfAccumulator: - pass - - -class MissingMethods(Accumulator): - def __init__(self): - self._sum = pa.scalar(0) - - def state(self) -> List[pa.Scalar]: - return [self._sum] - - -@pytest.fixture -def df(): - ctx = SessionContext() - - # create a RecordBatch and a new DataFrame from it - batch = pa.RecordBatch.from_arrays( - [pa.array([1, 2, 3]), pa.array([4, 4, 6])], - names=["a", "b"], - ) - return ctx.create_dataframe([[batch]]) - - -@pytest.mark.skip(reason="df.collect() will hang, need more investigations") -def test_errors(df): - with pytest.raises(TypeError): - udaf( - NotSubclassOfAccumulator, - pa.float64(), - pa.float64(), - [pa.float64()], - volatility="immutable", - ) - - accum = udaf( - MissingMethods, - pa.int64(), - pa.int64(), - [pa.int64()], - volatility="immutable", - ) - df = df.aggregate([], [accum(column("a"))]) - - msg = ( - "Can't instantiate abstract class MissingMethods with abstract " - "methods evaluate, merge, update" - ) - with pytest.raises(Exception, match=msg): - df.collect() - - -def test_aggregate(df): - summarize = udaf( - Summarize, - pa.float64(), - pa.float64(), - [pa.float64()], - volatility="immutable", - ) - - df = df.aggregate([], [summarize(column("a"))]) - - # execute and collect the first (and only) batch - result = df.collect()[0] - - assert result.column(0) == pa.array([1.0 + 2.0 + 3.0]) - - -def test_group_by(df): - summarize = udaf( - Summarize, - pa.float64(), - pa.float64(), - [pa.float64()], - volatility="immutable", - ) - - df = df.aggregate([column("b")], [summarize(column("a"))]) - - batches = df.collect() - - arrays = [batch.column(1) for batch in batches] - joined = pa.concat_arrays(arrays) - assert joined == pa.array([1.0 + 2.0, 3.0]) diff --git a/ballista-python/dev/create_license.py b/ballista-python/dev/create_license.py deleted file mode 100644 index 2a67cb8fd..000000000 --- a/ballista-python/dev/create_license.py +++ /dev/null @@ -1,252 +0,0 @@ -#!/usr/bin/python -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# This file is a mirror of https://github.com/apache/arrow-datafusion/blob/master/dev/create_license.py - -import json -import subprocess - -subprocess.check_output(["cargo", "install", "cargo-license"]) -data = subprocess.check_output( - [ - "cargo", - "license", - "--avoid-build-deps", - "--avoid-dev-deps", - "--do-not-bundle", - "--json", - ] -) -data = json.loads(data) - -result = """ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -""" -result += "\n------------------\n\n" -result += "This software is built and contains the following software:\n\n" -result += "(automatically generated via [cargo-license](https://crates.io/crates/cargo-license))\n\n" -for item in data: - license = item["license"] - name = item["name"] - version = item["version"] - repository = item["repository"] - result += "------------------\n\n" - result += f"### {name} {version}\n* source: [{repository}]({repository})\n* license: {license}\n\n" - -with open("LICENSE.txt", "w") as f: - f.write(result) diff --git a/ballista-python/pyproject.toml b/ballista-python/pyproject.toml deleted file mode 100644 index 2d9d30e78..000000000 --- a/ballista-python/pyproject.toml +++ /dev/null @@ -1,58 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -[build-system] -requires = ["maturin>=0.11,<0.13"] -build-backend = "maturin" - -[project] -name = "ballista" -description = "Build and run queries against data" -readme = "README.md" -license = {file = "LICENSE.txt"} -requires-python = ">=3.6" -keywords = ["ballista", "datafusion", "dataframe", "rust", "query-engine"] -classifier = [ - "Development Status :: 2 - Pre-Alpha", - "Intended Audience :: Developers", - "License :: OSI Approved :: Apache Software License", - "License :: OSI Approved", - "Operating System :: MacOS", - "Operating System :: Microsoft :: Windows", - "Operating System :: POSIX :: Linux", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python", - "Programming Language :: Rust", -] -dependencies = [ - "datafusion>=0.5.2", - "pyarrow>=1", -] - -[project.urls] -documentation = "https://arrow.apache.org/ballista/python" -repository = "https://github.com/apache/arrow-ballista" - -[tool.isort] -profile = "black" - -[tool.maturin] -sdist-include = ["Cargo.lock"] diff --git a/ballista-python/requirements-310.txt b/ballista-python/requirements-310.txt deleted file mode 100644 index 30a2291c4..000000000 --- a/ballista-python/requirements-310.txt +++ /dev/null @@ -1,213 +0,0 @@ -# -# This file is autogenerated by pip-compile with python 3.10 -# To update, run: -# -# pip-compile --generate-hashes -# -attrs==21.4.0 \ - --hash=sha256:2d27e3784d7a565d36ab851fe94887c5eccd6a463168875832a1be79c82828b4 \ - --hash=sha256:626ba8234211db98e869df76230a137c4c40a12d72445c45d5f5b716f076e2fd - # via pytest -black==22.3.0 \ - --hash=sha256:06f9d8846f2340dfac80ceb20200ea5d1b3f181dd0556b47af4e8e0b24fa0a6b \ - --hash=sha256:10dbe6e6d2988049b4655b2b739f98785a884d4d6b85bc35133a8fb9a2233176 \ - --hash=sha256:2497f9c2386572e28921fa8bec7be3e51de6801f7459dffd6e62492531c47e09 \ - --hash=sha256:30d78ba6bf080eeaf0b7b875d924b15cd46fec5fd044ddfbad38c8ea9171043a \ - --hash=sha256:328efc0cc70ccb23429d6be184a15ce613f676bdfc85e5fe8ea2a9354b4e9015 \ - --hash=sha256:35020b8886c022ced9282b51b5a875b6d1ab0c387b31a065b84db7c33085ca79 \ - --hash=sha256:5795a0375eb87bfe902e80e0c8cfaedf8af4d49694d69161e5bd3206c18618bb \ - --hash=sha256:5891ef8abc06576985de8fa88e95ab70641de6c1fca97e2a15820a9b69e51b20 \ - --hash=sha256:637a4014c63fbf42a692d22b55d8ad6968a946b4a6ebc385c5505d9625b6a464 \ - --hash=sha256:67c8301ec94e3bcc8906740fe071391bce40a862b7be0b86fb5382beefecd968 \ - --hash=sha256:6d2fc92002d44746d3e7db7cf9313cf4452f43e9ea77a2c939defce3b10b5c82 \ - --hash=sha256:6ee227b696ca60dd1c507be80a6bc849a5a6ab57ac7352aad1ffec9e8b805f21 \ - --hash=sha256:863714200ada56cbc366dc9ae5291ceb936573155f8bf8e9de92aef51f3ad0f0 \ - --hash=sha256:9b542ced1ec0ceeff5b37d69838106a6348e60db7b8fdd245294dc1d26136265 \ - --hash=sha256:a6342964b43a99dbc72f72812bf88cad8f0217ae9acb47c0d4f141a6416d2d7b \ - --hash=sha256:ad4efa5fad66b903b4a5f96d91461d90b9507a812b3c5de657d544215bb7877a \ - --hash=sha256:bc58025940a896d7e5356952228b68f793cf5fcb342be703c3a2669a1488cb72 \ - --hash=sha256:cc1e1de68c8e5444e8f94c3670bb48a2beef0e91dddfd4fcc29595ebd90bb9ce \ - --hash=sha256:cee3e11161dde1b2a33a904b850b0899e0424cc331b7295f2a9698e79f9a69a0 \ - --hash=sha256:e3556168e2e5c49629f7b0f377070240bd5511e45e25a4497bb0073d9dda776a \ - --hash=sha256:e8477ec6bbfe0312c128e74644ac8a02ca06bcdb8982d4ee06f209be28cdf163 \ - --hash=sha256:ee8f1f7228cce7dffc2b464f07ce769f478968bfb3dd1254a4c2eeed84928aad \ - --hash=sha256:fd57160949179ec517d32ac2ac898b5f20d68ed1a9c977346efbac9c2f1e779d - # via -r requirements.in -click==8.1.3 \ - --hash=sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e \ - --hash=sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48 - # via black -flake8==4.0.1 \ - --hash=sha256:479b1304f72536a55948cb40a32dce8bb0ffe3501e26eaf292c7e60eb5e0428d \ - --hash=sha256:806e034dda44114815e23c16ef92f95c91e4c71100ff52813adf7132a6ad870d - # via -r requirements.in -iniconfig==1.1.1 \ - --hash=sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3 \ - --hash=sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32 - # via pytest -isort==5.10.1 \ - --hash=sha256:6f62d78e2f89b4500b080fe3a81690850cd254227f27f75c3a0c491a1f351ba7 \ - --hash=sha256:e8443a5e7a020e9d7f97f1d7d9cd17c88bcb3bc7e218bf9cf5095fe550be2951 - # via -r requirements.in -maturin==0.12.16 \ - --hash=sha256:29a635699db1f4981b891a4ee51ddcae8c410136ed40103232aea3b5f62e8504 \ - --hash=sha256:4b5fe1de8b8e7ba5a9f52002b24a2d8148a23d1260c7bd59291c319ccc5b31f1 \ - --hash=sha256:54ecff17c64cf5c5dc59ff22745517ea56b791995e70008d1dcd1623ce609f78 \ - --hash=sha256:641c8ed8452cb8a288baf953be78d03e27e60189a64f00cc7bcc1731d158e8f6 \ - --hash=sha256:70a042197fdcb726c911146a1c875f65f596de122a01eeb58af10faf3bd3a2c5 \ - --hash=sha256:781abebb255061b5eda0413ecbac22b88a7ab50ecaee607fe5d8e3c55ab48e52 \ - --hash=sha256:83a8f9378c320e981412f8d367e181af22f145d489a7da0a0c3aea86cf23f048 \ - --hash=sha256:8aeb62a328bf4d9439758b59ccf5360a5f3127bbe58bedbcb6c64e888de3eb36 \ - --hash=sha256:8d11e801216f4c91b2ba9da4bad615ffc3638f80a7ba973245a0154dcfdbee64 \ - --hash=sha256:917f77382cdff55d2f290d0f58b7e6f4a7aaa74b58e2b61e4c67b37786d8a965 \ - --hash=sha256:97756ad5ff113478de237b029add91def0a40af0dc5e120c25e1595addd9c151 \ - --hash=sha256:9ce67a844d63d1ba8cdcf903ee2e6e0b21c0b0461b97c8737751d74002ded4c4 \ - --hash=sha256:a026515e39fd48ee5318de57ddc6841a5fbcd5169b3860fb9ac9ea9521cc6027 \ - --hash=sha256:bc4da52ef0c7e975396e7e6fb90da8858c518b4dccb810ceabec9db7ecedde57 \ - --hash=sha256:d63f60dd5dddb165f824b2d8e593dcb31300d832eb6cbc6288dd484e29dfbd89 \ - --hash=sha256:e5e4e3bfcf209ea1a6d20cade2de1ea716e17ea491a7a8b3fee0e45a10aa1e98 \ - --hash=sha256:e7e3fa53c5207c05d4148ecbc0ce7463b7757989dadebcd8ab3a61c67b874157 - # via -r requirements.in -mccabe==0.6.1 \ - --hash=sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42 \ - --hash=sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f - # via flake8 -mypy==0.950 \ - --hash=sha256:0112752a6ff07230f9ec2f71b0d3d4e088a910fdce454fdb6553e83ed0eced7d \ - --hash=sha256:0384d9f3af49837baa92f559d3fa673e6d2652a16550a9ee07fc08c736f5e6f8 \ - --hash=sha256:1b333cfbca1762ff15808a0ef4f71b5d3eed8528b23ea1c3fb50543c867d68de \ - --hash=sha256:1fdeb0a0f64f2a874a4c1f5271f06e40e1e9779bf55f9567f149466fc7a55038 \ - --hash=sha256:4c653e4846f287051599ed8f4b3c044b80e540e88feec76b11044ddc5612ffed \ - --hash=sha256:563514c7dc504698fb66bb1cf897657a173a496406f1866afae73ab5b3cdb334 \ - --hash=sha256:5b231afd6a6e951381b9ef09a1223b1feabe13625388db48a8690f8daa9b71ff \ - --hash=sha256:5ce6a09042b6da16d773d2110e44f169683d8cc8687e79ec6d1181a72cb028d2 \ - --hash=sha256:5e7647df0f8fc947388e6251d728189cfadb3b1e558407f93254e35abc026e22 \ - --hash=sha256:6003de687c13196e8a1243a5e4bcce617d79b88f83ee6625437e335d89dfebe2 \ - --hash=sha256:61504b9a5ae166ba5ecfed9e93357fd51aa693d3d434b582a925338a2ff57fd2 \ - --hash=sha256:77423570c04aca807508a492037abbd72b12a1fb25a385847d191cd50b2c9605 \ - --hash=sha256:a4d9898f46446bfb6405383b57b96737dcfd0a7f25b748e78ef3e8c576bba3cb \ - --hash=sha256:a952b8bc0ae278fc6316e6384f67bb9a396eb30aced6ad034d3a76120ebcc519 \ - --hash=sha256:b5b5bd0ffb11b4aba2bb6d31b8643902c48f990cc92fda4e21afac658044f0c0 \ - --hash=sha256:ca75ecf2783395ca3016a5e455cb322ba26b6d33b4b413fcdedfc632e67941dc \ - --hash=sha256:cf9c261958a769a3bd38c3e133801ebcd284ffb734ea12d01457cb09eacf7d7b \ - --hash=sha256:dd4d670eee9610bf61c25c940e9ade2d0ed05eb44227275cce88701fee014b1f \ - --hash=sha256:e19736af56947addedce4674c0971e5dceef1b5ec7d667fe86bcd2b07f8f9075 \ - --hash=sha256:eaea21d150fb26d7b4856766e7addcf929119dd19fc832b22e71d942835201ef \ - --hash=sha256:eaff8156016487c1af5ffa5304c3e3fd183edcb412f3e9c72db349faf3f6e0eb \ - --hash=sha256:ee0a36edd332ed2c5208565ae6e3a7afc0eabb53f5327e281f2ef03a6bc7687a \ - --hash=sha256:ef7beb2a3582eb7a9f37beaf38a28acfd801988cde688760aea9e6cc4832b10b - # via -r requirements.in -mypy-extensions==0.4.3 \ - --hash=sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d \ - --hash=sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8 - # via - # black - # mypy -numpy==1.22.3 \ - --hash=sha256:07a8c89a04997625236c5ecb7afe35a02af3896c8aa01890a849913a2309c676 \ - --hash=sha256:08d9b008d0156c70dc392bb3ab3abb6e7a711383c3247b410b39962263576cd4 \ - --hash=sha256:201b4d0552831f7250a08d3b38de0d989d6f6e4658b709a02a73c524ccc6ffce \ - --hash=sha256:2c10a93606e0b4b95c9b04b77dc349b398fdfbda382d2a39ba5a822f669a0123 \ - --hash=sha256:3ca688e1b9b95d80250bca34b11a05e389b1420d00e87a0d12dc45f131f704a1 \ - --hash=sha256:48a3aecd3b997bf452a2dedb11f4e79bc5bfd21a1d4cc760e703c31d57c84b3e \ - --hash=sha256:568dfd16224abddafb1cbcce2ff14f522abe037268514dd7e42c6776a1c3f8e5 \ - --hash=sha256:5bfb1bb598e8229c2d5d48db1860bcf4311337864ea3efdbe1171fb0c5da515d \ - --hash=sha256:639b54cdf6aa4f82fe37ebf70401bbb74b8508fddcf4797f9fe59615b8c5813a \ - --hash=sha256:8251ed96f38b47b4295b1ae51631de7ffa8260b5b087808ef09a39a9d66c97ab \ - --hash=sha256:92bfa69cfbdf7dfc3040978ad09a48091143cffb778ec3b03fa170c494118d75 \ - --hash=sha256:97098b95aa4e418529099c26558eeb8486e66bd1e53a6b606d684d0c3616b168 \ - --hash=sha256:a3bae1a2ed00e90b3ba5f7bd0a7c7999b55d609e0c54ceb2b076a25e345fa9f4 \ - --hash=sha256:c34ea7e9d13a70bf2ab64a2532fe149a9aced424cd05a2c4ba662fd989e3e45f \ - --hash=sha256:dbc7601a3b7472d559dc7b933b18b4b66f9aa7452c120e87dfb33d02008c8a18 \ - --hash=sha256:e7927a589df200c5e23c57970bafbd0cd322459aa7b1ff73b7c2e84d6e3eae62 \ - --hash=sha256:f8c1f39caad2c896bc0018f699882b345b2a63708008be29b1f355ebf6f933fe \ - --hash=sha256:f950f8845b480cffe522913d35567e29dd381b0dc7e4ce6a4a9f9156417d2430 \ - --hash=sha256:fade0d4f4d292b6f39951b6836d7a3c7ef5b2347f3c420cd9820a1d90d794802 \ - --hash=sha256:fdf3c08bce27132395d3c3ba1503cac12e17282358cb4bddc25cc46b0aca07aa - # via - # -r requirements.in - # pyarrow -packaging==21.3 \ - --hash=sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb \ - --hash=sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522 - # via pytest -pathspec==0.9.0 \ - --hash=sha256:7d15c4ddb0b5c802d161efc417ec1a2558ea2653c2e8ad9c19098201dc1c993a \ - --hash=sha256:e564499435a2673d586f6b2130bb5b95f04a3ba06f81b8f895b651a3c76aabb1 - # via black -platformdirs==2.5.2 \ - --hash=sha256:027d8e83a2d7de06bbac4e5ef7e023c02b863d7ea5d079477e722bb41ab25788 \ - --hash=sha256:58c8abb07dcb441e6ee4b11d8df0ac856038f944ab98b7be6b27b2a3c7feef19 - # via black -pluggy==1.0.0 \ - --hash=sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159 \ - --hash=sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3 - # via pytest -py==1.11.0 \ - --hash=sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719 \ - --hash=sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378 - # via pytest -pyarrow==8.0.0 \ - --hash=sha256:03a10daad957970e914920b793f6a49416699e791f4c827927fd4e4d892a5d16 \ - --hash=sha256:15511ce2f50343f3fd5e9f7c30e4d004da9134e9597e93e9c96c3985928cbe82 \ - --hash=sha256:1dd482ccb07c96188947ad94d7536ab696afde23ad172df8e18944ec79f55055 \ - --hash=sha256:25a5f7c7f36df520b0b7363ba9f51c3070799d4b05d587c60c0adaba57763479 \ - --hash=sha256:3bd201af6e01f475f02be88cf1f6ee9856ab98c11d8bbb6f58347c58cd07be00 \ - --hash=sha256:3fee786259d986f8c046100ced54d63b0c8c9f7cdb7d1bbe07dc69e0f928141c \ - --hash=sha256:42b7982301a9ccd06e1dd4fabd2e8e5df74b93ce4c6b87b81eb9e2d86dc79871 \ - --hash=sha256:4a18a211ed888f1ac0b0ebcb99e2d9a3e913a481120ee9b1fe33d3fedb945d4e \ - --hash=sha256:51e58778fcb8829fca37fbfaea7f208d5ce7ea89ea133dd13d8ce745278ee6f0 \ - --hash=sha256:541e7845ce5f27a861eb5b88ee165d931943347eec17b9ff1e308663531c9647 \ - --hash=sha256:65c7f4cc2be195e3db09296d31a654bb6d8786deebcab00f0e2455fd109d7456 \ - --hash=sha256:69b043a3fce064ebd9fbae6abc30e885680296e5bd5e6f7353e6a87966cf2ad7 \ - --hash=sha256:6ea2c54e6b5ecd64e8299d2abb40770fe83a718f5ddc3825ddd5cd28e352cce1 \ - --hash=sha256:78a6ac39cd793582998dac88ab5c1c1dd1e6503df6672f064f33a21937ec1d8d \ - --hash=sha256:81b87b782a1366279411f7b235deab07c8c016e13f9af9f7c7b0ee564fedcc8f \ - --hash=sha256:8392b9a1e837230090fe916415ed4c3433b2ddb1a798e3f6438303c70fbabcfc \ - --hash=sha256:863be6bad6c53797129610930794a3e797cb7d41c0a30e6794a2ac0e42ce41b8 \ - --hash=sha256:8cd86e04a899bef43e25184f4b934584861d787cf7519851a8c031803d45c6d8 \ - --hash=sha256:95c7822eb37663e073da9892f3499fe28e84f3464711a3e555e0c5463fd53a19 \ - --hash=sha256:98c13b2e28a91b0fbf24b483df54a8d7814c074c2623ecef40dce1fa52f6539b \ - --hash=sha256:ba2b7aa7efb59156b87987a06f5241932914e4d5bbb74a465306b00a6c808849 \ - --hash=sha256:c9c97c8e288847e091dfbcdf8ce51160e638346f51919a9e74fe038b2e8aee62 \ - --hash=sha256:cb06cacc19f3b426681f2f6803cc06ff481e7fe5b3a533b406bc5b2138843d4f \ - --hash=sha256:ce64bc1da3109ef5ab9e4c60316945a7239c798098a631358e9ab39f6e5529e9 \ - --hash=sha256:d5ef4372559b191cafe7db8932801eee252bfc35e983304e7d60b6954576a071 \ - --hash=sha256:d6f1e1040413651819074ef5b500835c6c42e6c446532a1ddef8bc5054e8dba5 \ - --hash=sha256:deb400df8f19a90b662babceb6dd12daddda6bb357c216e558b207c0770c7654 \ - --hash=sha256:ea132067ec712d1b1116a841db1c95861508862b21eddbcafefbce8e4b96b867 \ - --hash=sha256:ece333706a94c1221ced8b299042f85fd88b5db802d71be70024433ddf3aecab \ - --hash=sha256:edad25522ad509e534400d6ab98cf1872d30c31bc5e947712bfd57def7af15bb - # via -r requirements.in -pycodestyle==2.8.0 \ - --hash=sha256:720f8b39dde8b293825e7ff02c475f3077124006db4f440dcbc9a20b76548a20 \ - --hash=sha256:eddd5847ef438ea1c7870ca7eb78a9d47ce0cdb4851a5523949f2601d0cbbe7f - # via flake8 -pyflakes==2.4.0 \ - --hash=sha256:05a85c2872edf37a4ed30b0cce2f6093e1d0581f8c19d7393122da7e25b2b24c \ - --hash=sha256:3bb3a3f256f4b7968c9c788781e4ff07dce46bdf12339dcda61053375426ee2e - # via flake8 -pyparsing==3.0.9 \ - --hash=sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb \ - --hash=sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc - # via packaging -pytest==7.1.2 \ - --hash=sha256:13d0e3ccfc2b6e26be000cb6568c832ba67ba32e719443bfe725814d3c42433c \ - --hash=sha256:a06a0425453864a270bc45e71f783330a7428defb4230fb5e6a731fde06ecd45 - # via -r requirements.in -toml==0.10.2 \ - --hash=sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b \ - --hash=sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f - # via -r requirements.in -tomli==2.0.1 \ - --hash=sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc \ - --hash=sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f - # via - # black - # maturin - # mypy - # pytest -typing-extensions==4.2.0 \ - --hash=sha256:6657594ee297170d19f67d55c05852a874e7eb634f4f753dbd667855e07c1708 \ - --hash=sha256:f1c24655a0da0d1b67f07e17a5e6b2a105894e6824b92096378bb3668ef02376 - # via mypy diff --git a/ballista-python/requirements-37.txt b/ballista-python/requirements-37.txt deleted file mode 100644 index b1776eed7..000000000 --- a/ballista-python/requirements-37.txt +++ /dev/null @@ -1,268 +0,0 @@ -# -# This file is autogenerated by pip-compile with python 3.7 -# To update, run: -# -# pip-compile --generate-hashes -# -attrs==21.4.0 \ - --hash=sha256:2d27e3784d7a565d36ab851fe94887c5eccd6a463168875832a1be79c82828b4 \ - --hash=sha256:626ba8234211db98e869df76230a137c4c40a12d72445c45d5f5b716f076e2fd - # via pytest -black==22.3.0 \ - --hash=sha256:06f9d8846f2340dfac80ceb20200ea5d1b3f181dd0556b47af4e8e0b24fa0a6b \ - --hash=sha256:10dbe6e6d2988049b4655b2b739f98785a884d4d6b85bc35133a8fb9a2233176 \ - --hash=sha256:2497f9c2386572e28921fa8bec7be3e51de6801f7459dffd6e62492531c47e09 \ - --hash=sha256:30d78ba6bf080eeaf0b7b875d924b15cd46fec5fd044ddfbad38c8ea9171043a \ - --hash=sha256:328efc0cc70ccb23429d6be184a15ce613f676bdfc85e5fe8ea2a9354b4e9015 \ - --hash=sha256:35020b8886c022ced9282b51b5a875b6d1ab0c387b31a065b84db7c33085ca79 \ - --hash=sha256:5795a0375eb87bfe902e80e0c8cfaedf8af4d49694d69161e5bd3206c18618bb \ - --hash=sha256:5891ef8abc06576985de8fa88e95ab70641de6c1fca97e2a15820a9b69e51b20 \ - --hash=sha256:637a4014c63fbf42a692d22b55d8ad6968a946b4a6ebc385c5505d9625b6a464 \ - --hash=sha256:67c8301ec94e3bcc8906740fe071391bce40a862b7be0b86fb5382beefecd968 \ - --hash=sha256:6d2fc92002d44746d3e7db7cf9313cf4452f43e9ea77a2c939defce3b10b5c82 \ - --hash=sha256:6ee227b696ca60dd1c507be80a6bc849a5a6ab57ac7352aad1ffec9e8b805f21 \ - --hash=sha256:863714200ada56cbc366dc9ae5291ceb936573155f8bf8e9de92aef51f3ad0f0 \ - --hash=sha256:9b542ced1ec0ceeff5b37d69838106a6348e60db7b8fdd245294dc1d26136265 \ - --hash=sha256:a6342964b43a99dbc72f72812bf88cad8f0217ae9acb47c0d4f141a6416d2d7b \ - --hash=sha256:ad4efa5fad66b903b4a5f96d91461d90b9507a812b3c5de657d544215bb7877a \ - --hash=sha256:bc58025940a896d7e5356952228b68f793cf5fcb342be703c3a2669a1488cb72 \ - --hash=sha256:cc1e1de68c8e5444e8f94c3670bb48a2beef0e91dddfd4fcc29595ebd90bb9ce \ - --hash=sha256:cee3e11161dde1b2a33a904b850b0899e0424cc331b7295f2a9698e79f9a69a0 \ - --hash=sha256:e3556168e2e5c49629f7b0f377070240bd5511e45e25a4497bb0073d9dda776a \ - --hash=sha256:e8477ec6bbfe0312c128e74644ac8a02ca06bcdb8982d4ee06f209be28cdf163 \ - --hash=sha256:ee8f1f7228cce7dffc2b464f07ce769f478968bfb3dd1254a4c2eeed84928aad \ - --hash=sha256:fd57160949179ec517d32ac2ac898b5f20d68ed1a9c977346efbac9c2f1e779d - # via -r requirements.in -click==8.1.3 \ - --hash=sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e \ - --hash=sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48 - # via black -flake8==4.0.1 \ - --hash=sha256:479b1304f72536a55948cb40a32dce8bb0ffe3501e26eaf292c7e60eb5e0428d \ - --hash=sha256:806e034dda44114815e23c16ef92f95c91e4c71100ff52813adf7132a6ad870d - # via -r requirements.in -importlib-metadata==4.2.0 ; python_version < "3.8" \ - --hash=sha256:057e92c15bc8d9e8109738a48db0ccb31b4d9d5cfbee5a8670879a30be66304b \ - --hash=sha256:b7e52a1f8dec14a75ea73e0891f3060099ca1d8e6a462a4dff11c3e119ea1b31 - # via - # -r requirements.in - # click - # flake8 - # pluggy - # pytest -iniconfig==1.1.1 \ - --hash=sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3 \ - --hash=sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32 - # via pytest -isort==5.10.1 \ - --hash=sha256:6f62d78e2f89b4500b080fe3a81690850cd254227f27f75c3a0c491a1f351ba7 \ - --hash=sha256:e8443a5e7a020e9d7f97f1d7d9cd17c88bcb3bc7e218bf9cf5095fe550be2951 - # via -r requirements.in -maturin==0.12.16 \ - --hash=sha256:29a635699db1f4981b891a4ee51ddcae8c410136ed40103232aea3b5f62e8504 \ - --hash=sha256:4b5fe1de8b8e7ba5a9f52002b24a2d8148a23d1260c7bd59291c319ccc5b31f1 \ - --hash=sha256:54ecff17c64cf5c5dc59ff22745517ea56b791995e70008d1dcd1623ce609f78 \ - --hash=sha256:641c8ed8452cb8a288baf953be78d03e27e60189a64f00cc7bcc1731d158e8f6 \ - --hash=sha256:70a042197fdcb726c911146a1c875f65f596de122a01eeb58af10faf3bd3a2c5 \ - --hash=sha256:781abebb255061b5eda0413ecbac22b88a7ab50ecaee607fe5d8e3c55ab48e52 \ - --hash=sha256:83a8f9378c320e981412f8d367e181af22f145d489a7da0a0c3aea86cf23f048 \ - --hash=sha256:8aeb62a328bf4d9439758b59ccf5360a5f3127bbe58bedbcb6c64e888de3eb36 \ - --hash=sha256:8d11e801216f4c91b2ba9da4bad615ffc3638f80a7ba973245a0154dcfdbee64 \ - --hash=sha256:917f77382cdff55d2f290d0f58b7e6f4a7aaa74b58e2b61e4c67b37786d8a965 \ - --hash=sha256:97756ad5ff113478de237b029add91def0a40af0dc5e120c25e1595addd9c151 \ - --hash=sha256:9ce67a844d63d1ba8cdcf903ee2e6e0b21c0b0461b97c8737751d74002ded4c4 \ - --hash=sha256:a026515e39fd48ee5318de57ddc6841a5fbcd5169b3860fb9ac9ea9521cc6027 \ - --hash=sha256:bc4da52ef0c7e975396e7e6fb90da8858c518b4dccb810ceabec9db7ecedde57 \ - --hash=sha256:d63f60dd5dddb165f824b2d8e593dcb31300d832eb6cbc6288dd484e29dfbd89 \ - --hash=sha256:e5e4e3bfcf209ea1a6d20cade2de1ea716e17ea491a7a8b3fee0e45a10aa1e98 \ - --hash=sha256:e7e3fa53c5207c05d4148ecbc0ce7463b7757989dadebcd8ab3a61c67b874157 - # via -r requirements.in -mccabe==0.6.1 \ - --hash=sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42 \ - --hash=sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f - # via flake8 -mypy==0.950 \ - --hash=sha256:0112752a6ff07230f9ec2f71b0d3d4e088a910fdce454fdb6553e83ed0eced7d \ - --hash=sha256:0384d9f3af49837baa92f559d3fa673e6d2652a16550a9ee07fc08c736f5e6f8 \ - --hash=sha256:1b333cfbca1762ff15808a0ef4f71b5d3eed8528b23ea1c3fb50543c867d68de \ - --hash=sha256:1fdeb0a0f64f2a874a4c1f5271f06e40e1e9779bf55f9567f149466fc7a55038 \ - --hash=sha256:4c653e4846f287051599ed8f4b3c044b80e540e88feec76b11044ddc5612ffed \ - --hash=sha256:563514c7dc504698fb66bb1cf897657a173a496406f1866afae73ab5b3cdb334 \ - --hash=sha256:5b231afd6a6e951381b9ef09a1223b1feabe13625388db48a8690f8daa9b71ff \ - --hash=sha256:5ce6a09042b6da16d773d2110e44f169683d8cc8687e79ec6d1181a72cb028d2 \ - --hash=sha256:5e7647df0f8fc947388e6251d728189cfadb3b1e558407f93254e35abc026e22 \ - --hash=sha256:6003de687c13196e8a1243a5e4bcce617d79b88f83ee6625437e335d89dfebe2 \ - --hash=sha256:61504b9a5ae166ba5ecfed9e93357fd51aa693d3d434b582a925338a2ff57fd2 \ - --hash=sha256:77423570c04aca807508a492037abbd72b12a1fb25a385847d191cd50b2c9605 \ - --hash=sha256:a4d9898f46446bfb6405383b57b96737dcfd0a7f25b748e78ef3e8c576bba3cb \ - --hash=sha256:a952b8bc0ae278fc6316e6384f67bb9a396eb30aced6ad034d3a76120ebcc519 \ - --hash=sha256:b5b5bd0ffb11b4aba2bb6d31b8643902c48f990cc92fda4e21afac658044f0c0 \ - --hash=sha256:ca75ecf2783395ca3016a5e455cb322ba26b6d33b4b413fcdedfc632e67941dc \ - --hash=sha256:cf9c261958a769a3bd38c3e133801ebcd284ffb734ea12d01457cb09eacf7d7b \ - --hash=sha256:dd4d670eee9610bf61c25c940e9ade2d0ed05eb44227275cce88701fee014b1f \ - --hash=sha256:e19736af56947addedce4674c0971e5dceef1b5ec7d667fe86bcd2b07f8f9075 \ - --hash=sha256:eaea21d150fb26d7b4856766e7addcf929119dd19fc832b22e71d942835201ef \ - --hash=sha256:eaff8156016487c1af5ffa5304c3e3fd183edcb412f3e9c72db349faf3f6e0eb \ - --hash=sha256:ee0a36edd332ed2c5208565ae6e3a7afc0eabb53f5327e281f2ef03a6bc7687a \ - --hash=sha256:ef7beb2a3582eb7a9f37beaf38a28acfd801988cde688760aea9e6cc4832b10b - # via -r requirements.in -mypy-extensions==0.4.3 \ - --hash=sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d \ - --hash=sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8 - # via - # black - # mypy -numpy==1.21.6 \ - --hash=sha256:1dbe1c91269f880e364526649a52eff93ac30035507ae980d2fed33aaee633ac \ - --hash=sha256:357768c2e4451ac241465157a3e929b265dfac85d9214074985b1786244f2ef3 \ - --hash=sha256:3820724272f9913b597ccd13a467cc492a0da6b05df26ea09e78b171a0bb9da6 \ - --hash=sha256:4391bd07606be175aafd267ef9bea87cf1b8210c787666ce82073b05f202add1 \ - --hash=sha256:4aa48afdce4660b0076a00d80afa54e8a97cd49f457d68a4342d188a09451c1a \ - --hash=sha256:58459d3bad03343ac4b1b42ed14d571b8743dc80ccbf27444f266729df1d6f5b \ - --hash=sha256:5c3c8def4230e1b959671eb959083661b4a0d2e9af93ee339c7dada6759a9470 \ - --hash=sha256:5f30427731561ce75d7048ac254dbe47a2ba576229250fb60f0fb74db96501a1 \ - --hash=sha256:643843bcc1c50526b3a71cd2ee561cf0d8773f062c8cbaf9ffac9fdf573f83ab \ - --hash=sha256:67c261d6c0a9981820c3a149d255a76918278a6b03b6a036800359aba1256d46 \ - --hash=sha256:67f21981ba2f9d7ba9ade60c9e8cbaa8cf8e9ae51673934480e45cf55e953673 \ - --hash=sha256:6aaf96c7f8cebc220cdfc03f1d5a31952f027dda050e5a703a0d1c396075e3e7 \ - --hash=sha256:7c4068a8c44014b2d55f3c3f574c376b2494ca9cc73d2f1bd692382b6dffe3db \ - --hash=sha256:7c7e5fa88d9ff656e067876e4736379cc962d185d5cd808014a8a928d529ef4e \ - --hash=sha256:7f5ae4f304257569ef3b948810816bc87c9146e8c446053539947eedeaa32786 \ - --hash=sha256:82691fda7c3f77c90e62da69ae60b5ac08e87e775b09813559f8901a88266552 \ - --hash=sha256:8737609c3bbdd48e380d463134a35ffad3b22dc56295eff6f79fd85bd0eeeb25 \ - --hash=sha256:9f411b2c3f3d76bba0865b35a425157c5dcf54937f82bbeb3d3c180789dd66a6 \ - --hash=sha256:a6be4cb0ef3b8c9250c19cc122267263093eee7edd4e3fa75395dfda8c17a8e2 \ - --hash=sha256:bcb238c9c96c00d3085b264e5c1a1207672577b93fa666c3b14a45240b14123a \ - --hash=sha256:bf2ec4b75d0e9356edea834d1de42b31fe11f726a81dfb2c2112bc1eaa508fcf \ - --hash=sha256:d136337ae3cc69aa5e447e78d8e1514be8c3ec9b54264e680cf0b4bd9011574f \ - --hash=sha256:d4bf4d43077db55589ffc9009c0ba0a94fa4908b9586d6ccce2e0b164c86303c \ - --hash=sha256:d6a96eef20f639e6a97d23e57dd0c1b1069a7b4fd7027482a4c5c451cd7732f4 \ - --hash=sha256:d9caa9d5e682102453d96a0ee10c7241b72859b01a941a397fd965f23b3e016b \ - --hash=sha256:dd1c8f6bd65d07d3810b90d02eba7997e32abbdf1277a481d698969e921a3be0 \ - --hash=sha256:e31f0bb5928b793169b87e3d1e070f2342b22d5245c755e2b81caa29756246c3 \ - --hash=sha256:ecb55251139706669fdec2ff073c98ef8e9a84473e51e716211b41aa0f18e656 \ - --hash=sha256:ee5ec40fdd06d62fe5d4084bef4fd50fd4bb6bfd2bf519365f569dc470163ab0 \ - --hash=sha256:f17e562de9edf691a42ddb1eb4a5541c20dd3f9e65b09ded2beb0799c0cf29bb \ - --hash=sha256:fdffbfb6832cd0b300995a2b08b8f6fa9f6e856d562800fea9182316d99c4e8e - # via - # -r requirements.in - # pyarrow -packaging==21.3 \ - --hash=sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb \ - --hash=sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522 - # via pytest -pathspec==0.9.0 \ - --hash=sha256:7d15c4ddb0b5c802d161efc417ec1a2558ea2653c2e8ad9c19098201dc1c993a \ - --hash=sha256:e564499435a2673d586f6b2130bb5b95f04a3ba06f81b8f895b651a3c76aabb1 - # via black -platformdirs==2.5.2 \ - --hash=sha256:027d8e83a2d7de06bbac4e5ef7e023c02b863d7ea5d079477e722bb41ab25788 \ - --hash=sha256:58c8abb07dcb441e6ee4b11d8df0ac856038f944ab98b7be6b27b2a3c7feef19 - # via black -pluggy==1.0.0 \ - --hash=sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159 \ - --hash=sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3 - # via pytest -py==1.11.0 \ - --hash=sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719 \ - --hash=sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378 - # via pytest -pyarrow==8.0.0 \ - --hash=sha256:03a10daad957970e914920b793f6a49416699e791f4c827927fd4e4d892a5d16 \ - --hash=sha256:15511ce2f50343f3fd5e9f7c30e4d004da9134e9597e93e9c96c3985928cbe82 \ - --hash=sha256:1dd482ccb07c96188947ad94d7536ab696afde23ad172df8e18944ec79f55055 \ - --hash=sha256:25a5f7c7f36df520b0b7363ba9f51c3070799d4b05d587c60c0adaba57763479 \ - --hash=sha256:3bd201af6e01f475f02be88cf1f6ee9856ab98c11d8bbb6f58347c58cd07be00 \ - --hash=sha256:3fee786259d986f8c046100ced54d63b0c8c9f7cdb7d1bbe07dc69e0f928141c \ - --hash=sha256:42b7982301a9ccd06e1dd4fabd2e8e5df74b93ce4c6b87b81eb9e2d86dc79871 \ - --hash=sha256:4a18a211ed888f1ac0b0ebcb99e2d9a3e913a481120ee9b1fe33d3fedb945d4e \ - --hash=sha256:51e58778fcb8829fca37fbfaea7f208d5ce7ea89ea133dd13d8ce745278ee6f0 \ - --hash=sha256:541e7845ce5f27a861eb5b88ee165d931943347eec17b9ff1e308663531c9647 \ - --hash=sha256:65c7f4cc2be195e3db09296d31a654bb6d8786deebcab00f0e2455fd109d7456 \ - --hash=sha256:69b043a3fce064ebd9fbae6abc30e885680296e5bd5e6f7353e6a87966cf2ad7 \ - --hash=sha256:6ea2c54e6b5ecd64e8299d2abb40770fe83a718f5ddc3825ddd5cd28e352cce1 \ - --hash=sha256:78a6ac39cd793582998dac88ab5c1c1dd1e6503df6672f064f33a21937ec1d8d \ - --hash=sha256:81b87b782a1366279411f7b235deab07c8c016e13f9af9f7c7b0ee564fedcc8f \ - --hash=sha256:8392b9a1e837230090fe916415ed4c3433b2ddb1a798e3f6438303c70fbabcfc \ - --hash=sha256:863be6bad6c53797129610930794a3e797cb7d41c0a30e6794a2ac0e42ce41b8 \ - --hash=sha256:8cd86e04a899bef43e25184f4b934584861d787cf7519851a8c031803d45c6d8 \ - --hash=sha256:95c7822eb37663e073da9892f3499fe28e84f3464711a3e555e0c5463fd53a19 \ - --hash=sha256:98c13b2e28a91b0fbf24b483df54a8d7814c074c2623ecef40dce1fa52f6539b \ - --hash=sha256:ba2b7aa7efb59156b87987a06f5241932914e4d5bbb74a465306b00a6c808849 \ - --hash=sha256:c9c97c8e288847e091dfbcdf8ce51160e638346f51919a9e74fe038b2e8aee62 \ - --hash=sha256:cb06cacc19f3b426681f2f6803cc06ff481e7fe5b3a533b406bc5b2138843d4f \ - --hash=sha256:ce64bc1da3109ef5ab9e4c60316945a7239c798098a631358e9ab39f6e5529e9 \ - --hash=sha256:d5ef4372559b191cafe7db8932801eee252bfc35e983304e7d60b6954576a071 \ - --hash=sha256:d6f1e1040413651819074ef5b500835c6c42e6c446532a1ddef8bc5054e8dba5 \ - --hash=sha256:deb400df8f19a90b662babceb6dd12daddda6bb357c216e558b207c0770c7654 \ - --hash=sha256:ea132067ec712d1b1116a841db1c95861508862b21eddbcafefbce8e4b96b867 \ - --hash=sha256:ece333706a94c1221ced8b299042f85fd88b5db802d71be70024433ddf3aecab \ - --hash=sha256:edad25522ad509e534400d6ab98cf1872d30c31bc5e947712bfd57def7af15bb - # via -r requirements.in -pycodestyle==2.8.0 \ - --hash=sha256:720f8b39dde8b293825e7ff02c475f3077124006db4f440dcbc9a20b76548a20 \ - --hash=sha256:eddd5847ef438ea1c7870ca7eb78a9d47ce0cdb4851a5523949f2601d0cbbe7f - # via flake8 -pyflakes==2.4.0 \ - --hash=sha256:05a85c2872edf37a4ed30b0cce2f6093e1d0581f8c19d7393122da7e25b2b24c \ - --hash=sha256:3bb3a3f256f4b7968c9c788781e4ff07dce46bdf12339dcda61053375426ee2e - # via flake8 -pyparsing==3.0.9 \ - --hash=sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb \ - --hash=sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc - # via packaging -pytest==7.1.2 \ - --hash=sha256:13d0e3ccfc2b6e26be000cb6568c832ba67ba32e719443bfe725814d3c42433c \ - --hash=sha256:a06a0425453864a270bc45e71f783330a7428defb4230fb5e6a731fde06ecd45 - # via -r requirements.in -toml==0.10.2 \ - --hash=sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b \ - --hash=sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f - # via -r requirements.in -tomli==2.0.1 \ - --hash=sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc \ - --hash=sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f - # via - # black - # maturin - # mypy - # pytest -typed-ast==1.5.3 \ - --hash=sha256:20d5118e494478ef2d3a2702d964dae830aedd7b4d3b626d003eea526be18718 \ - --hash=sha256:27e46cdd01d6c3a0dd8f728b6a938a6751f7bd324817501c15fb056307f918c6 \ - --hash=sha256:27f25232e2dd0edfe1f019d6bfaaf11e86e657d9bdb7b0956db95f560cceb2b3 \ - --hash=sha256:3042bfc9ca118712c9809201f55355479cfcdc17449f9f8db5e744e9625c6805 \ - --hash=sha256:37e5349d1d5de2f4763d534ccb26809d1c24b180a477659a12c4bde9dd677d74 \ - --hash=sha256:4fff9fdcce59dc61ec1b317bdb319f8f4e6b69ebbe61193ae0a60c5f9333dc49 \ - --hash=sha256:542cd732351ba8235f20faa0fc7398946fe1a57f2cdb289e5497e1e7f48cfedb \ - --hash=sha256:5dc2c11ae59003d4a26dda637222d9ae924387f96acae9492df663843aefad55 \ - --hash=sha256:8831479695eadc8b5ffed06fdfb3e424adc37962a75925668deeb503f446c0a3 \ - --hash=sha256:8cdf91b0c466a6c43f36c1964772918a2c04cfa83df8001ff32a89e357f8eb06 \ - --hash=sha256:8e0b8528838ffd426fea8d18bde4c73bcb4167218998cc8b9ee0a0f2bfe678a6 \ - --hash=sha256:8ef1d96ad05a291f5c36895d86d1375c0ee70595b90f6bb5f5fdbee749b146db \ - --hash=sha256:9ad3b48cf2b487be140072fb86feff36801487d4abb7382bb1929aaac80638ea \ - --hash=sha256:9cc9e1457e1feb06b075c8ef8aeb046a28ec351b1958b42c7c31c989c841403a \ - --hash=sha256:9e237e74fd321a55c90eee9bc5d44be976979ad38a29bbd734148295c1ce7617 \ - --hash=sha256:c9f1a27592fac87daa4e3f16538713d705599b0a27dfe25518b80b6b017f0a6d \ - --hash=sha256:d64dabc6336ddc10373922a146fa2256043b3b43e61f28961caec2a5207c56d5 \ - --hash=sha256:e20d196815eeffb3d76b75223e8ffed124e65ee62097e4e73afb5fec6b993e7a \ - --hash=sha256:e34f9b9e61333ecb0f7d79c21c28aa5cd63bec15cb7e1310d7d3da6ce886bc9b \ - --hash=sha256:ed44e81517364cb5ba367e4f68fca01fba42a7a4690d40c07886586ac267d9b9 \ - --hash=sha256:ee852185964744987609b40aee1d2eb81502ae63ee8eef614558f96a56c1902d \ - --hash=sha256:f60d9de0d087454c91b3999a296d0c4558c1666771e3460621875021bf899af9 \ - --hash=sha256:f818c5b81966d4728fec14caa338e30a70dfc3da577984d38f97816c4b3071ec \ - --hash=sha256:fd5df1313915dbd70eaaa88c19030b441742e8b05e6103c631c83b75e0435ccc - # via - # black - # mypy -typing-extensions==4.2.0 \ - --hash=sha256:6657594ee297170d19f67d55c05852a874e7eb634f4f753dbd667855e07c1708 \ - --hash=sha256:f1c24655a0da0d1b67f07e17a5e6b2a105894e6824b92096378bb3668ef02376 - # via - # black - # importlib-metadata - # mypy -zipp==3.8.0 \ - --hash=sha256:56bf8aadb83c24db6c4b577e13de374ccfb67da2078beba1d037c17980bf43ad \ - --hash=sha256:c4f6e5bbf48e74f7a38e7cc5b0480ff42b0ae5178957d564d18932525d5cf099 - # via importlib-metadata diff --git a/ballista-python/requirements.in b/ballista-python/requirements.in deleted file mode 100644 index 7ee6a48dc..000000000 --- a/ballista-python/requirements.in +++ /dev/null @@ -1,27 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -black -flake8 -isort -maturin -mypy -numpy -pyarrow -pytest -toml -importlib_metadata; python_version < "3.8" diff --git a/ballista-python/src/ballista_context.rs b/ballista-python/src/ballista_context.rs deleted file mode 100644 index e3bf21923..000000000 --- a/ballista-python/src/ballista_context.rs +++ /dev/null @@ -1,125 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use pyo3::exceptions::PyValueError; -use pyo3::prelude::*; -use std::path::PathBuf; - -use crate::errors::BallistaError; -use crate::utils::wait_for_future; - -use crate::dataframe::PyDataFrame; -use ballista::prelude::*; -use datafusion::arrow::datatypes::Schema; -use datafusion::prelude::{AvroReadOptions, CsvReadOptions, ParquetReadOptions}; - -/// `PyBallistaContext` is able to plan and execute DataFusion plans. -/// It has a powerful optimizer, a physical planner for local execution, and a -/// multi-threaded execution engine to perform the execution. -#[pyclass(name = "BallistaContext", module = "ballista", subclass, unsendable)] -pub(crate) struct PyBallistaContext { - ctx: BallistaContext, -} - -#[pymethods] -impl PyBallistaContext { - #[new] - #[args(port = "50050")] - fn new(py: Python, host: &str, port: u16) -> PyResult { - let config = BallistaConfig::builder() - .set("ballista.shuffle.partitions", "4") - .build() - .map_err(BallistaError::from)?; - - let result = BallistaContext::remote(host, port, &config); - let ctx = wait_for_future(py, result).map_err(BallistaError::from)?; - - Ok(PyBallistaContext { ctx }) - } - - /// Returns a PyDataFrame whose plan corresponds to the SQL statement. - fn sql(&mut self, query: &str, py: Python) -> PyResult { - let ctx = &self.ctx; - - let result = ctx.sql(query); - let df = wait_for_future(py, result).map_err(BallistaError::from)?; - Ok(PyDataFrame::new(df)) - } - - #[allow(clippy::too_many_arguments)] - #[args( - schema = "None", - has_header = "true", - delimiter = "\",\"", - schema_infer_max_records = "1000", - file_extension = "\".csv\"" - )] - fn register_csv( - &mut self, - name: &str, - path: PathBuf, - schema: Option, - has_header: bool, - delimiter: &str, - schema_infer_max_records: usize, - file_extension: &str, - py: Python, - ) -> PyResult<()> { - let ctx = &self.ctx; - - let path = path - .to_str() - .ok_or_else(|| PyValueError::new_err("Unable to convert path to a string"))?; - - let delimiter = delimiter.as_bytes(); - if delimiter.len() != 1 { - return Err(PyValueError::new_err( - "Delimiter must be a single character", - )); - } - - let mut options = CsvReadOptions::new() - .has_header(has_header) - .delimiter(delimiter[0]) - .schema_infer_max_records(schema_infer_max_records) - .file_extension(file_extension); - options.schema = schema.as_ref(); - - let result = ctx.register_csv(name, path, options); - wait_for_future(py, result).map_err(BallistaError::from)?; - - Ok(()) - } - - fn register_avro(&mut self, name: &str, path: &str, py: Python) -> PyResult<()> { - let ctx = &self.ctx; - - let result = ctx.register_avro(name, path, AvroReadOptions::default()); - wait_for_future(py, result).map_err(BallistaError::from)?; - - Ok(()) - } - - fn register_parquet(&mut self, name: &str, path: &str, py: Python) -> PyResult<()> { - let ctx = &self.ctx; - - let result = ctx.register_parquet(name, path, ParquetReadOptions::default()); - wait_for_future(py, result).map_err(BallistaError::from)?; - - Ok(()) - } -} diff --git a/ballista-python/src/dataframe.rs b/ballista-python/src/dataframe.rs deleted file mode 100644 index cff1733fb..000000000 --- a/ballista-python/src/dataframe.rs +++ /dev/null @@ -1,163 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use crate::errors::DataFusionError; -use crate::expression::PyExpr; -use crate::utils::wait_for_future; -use datafusion::arrow::datatypes::Schema; -use datafusion::arrow::pyarrow::PyArrowConvert; -use datafusion::arrow::util::pretty; -use datafusion::dataframe::DataFrame; -use datafusion::logical_plan::JoinType; -use pyo3::exceptions::PyTypeError; -use pyo3::prelude::*; -use pyo3::types::PyTuple; -use std::sync::Arc; - -/// A PyDataFrame is a representation of a logical plan and an API to compose statements. -/// Use it to build a plan and `.collect()` to execute the plan and collect the result. -/// The actual execution of a plan runs natively on Rust and Arrow on a multi-threaded environment. -#[pyclass(name = "DataFrame", module = "ballista", subclass)] -#[derive(Clone)] -pub(crate) struct PyDataFrame { - df: Arc, -} - -impl PyDataFrame { - /// creates a new PyDataFrame - pub fn new(df: Arc) -> Self { - Self { df } - } -} - -#[pymethods] -impl PyDataFrame { - fn __getitem__(&self, key: PyObject) -> PyResult { - Python::with_gil(|py| { - if let Ok(key) = key.extract::<&str>(py) { - self.select_columns(vec![key]) - } else if let Ok(tuple) = key.extract::<&PyTuple>(py) { - let keys = tuple - .iter() - .map(|item| item.extract::<&str>()) - .collect::>>()?; - self.select_columns(keys) - } else if let Ok(keys) = key.extract::>(py) { - self.select_columns(keys) - } else { - let message = "DataFrame can only be indexed by string index or indices"; - Err(PyTypeError::new_err(message)) - } - }) - } - - /// Returns the schema from the logical plan - fn schema(&self) -> Schema { - self.df.schema().into() - } - - #[args(args = "*")] - fn select_columns(&self, args: Vec<&str>) -> PyResult { - let df = self.df.select_columns(&args)?; - Ok(Self::new(df)) - } - - #[args(args = "*")] - fn select(&self, args: Vec) -> PyResult { - let expr = args.into_iter().map(|e| e.into()).collect(); - let df = self.df.select(expr)?; - Ok(Self::new(df)) - } - - fn filter(&self, predicate: PyExpr) -> PyResult { - let df = self.df.filter(predicate.into())?; - Ok(Self::new(df)) - } - - fn aggregate(&self, group_by: Vec, aggs: Vec) -> PyResult { - let group_by = group_by.into_iter().map(|e| e.into()).collect(); - let aggs = aggs.into_iter().map(|e| e.into()).collect(); - let df = self.df.aggregate(group_by, aggs)?; - Ok(Self::new(df)) - } - - #[args(exprs = "*")] - fn sort(&self, exprs: Vec) -> PyResult { - let exprs = exprs.into_iter().map(|e| e.into()).collect(); - let df = self.df.sort(exprs)?; - Ok(Self::new(df)) - } - - fn limit(&self, count: usize) -> PyResult { - let df = self.df.limit(count)?; - Ok(Self::new(df)) - } - - /// Executes the plan, returning a list of `RecordBatch`es. - /// Unless some order is specified in the plan, there is no - /// guarantee of the order of the result. - fn collect(&self, py: Python) -> PyResult> { - let batches = wait_for_future(py, self.df.collect())?; - // cannot use PyResult> return type due to - // https://github.com/PyO3/pyo3/issues/1813 - batches.into_iter().map(|rb| rb.to_pyarrow(py)).collect() - } - - /// Print the result, 20 lines by default - #[args(num = "20")] - fn show(&self, py: Python, num: usize) -> PyResult<()> { - let df = self.df.limit(num)?; - let batches = wait_for_future(py, df.collect())?; - Ok(pretty::print_batches(&batches)?) - } - - fn join( - &self, - right: PyDataFrame, - join_keys: (Vec<&str>, Vec<&str>), - how: &str, - ) -> PyResult { - let join_type = match how { - "inner" => JoinType::Inner, - "left" => JoinType::Left, - "right" => JoinType::Right, - "full" => JoinType::Full, - "semi" => JoinType::Semi, - "anti" => JoinType::Anti, - how => { - return Err(DataFusionError::Common(format!( - "The join type {} does not exist or is not implemented", - how - )) - .into()) - } - }; - - let df = self - .df - .join(right.df, join_type, &join_keys.0, &join_keys.1, None)?; - Ok(Self::new(df)) - } - - /// Print the query plan - #[args(verbose = false, analyze = false)] - fn explain(&self, py: Python, verbose: bool, analyze: bool) -> PyResult<()> { - let df = self.df.explain(verbose, analyze)?; - let batches = wait_for_future(py, df.collect())?; - Ok(pretty::print_batches(&batches)?) - } -} diff --git a/ballista-python/src/errors.rs b/ballista-python/src/errors.rs deleted file mode 100644 index 7aecbed90..000000000 --- a/ballista-python/src/errors.rs +++ /dev/null @@ -1,95 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use core::fmt; - -use ballista::prelude::BallistaError as InnerBallistaError; -use datafusion::arrow::error::ArrowError; -use datafusion::error::DataFusionError as InnerDataFusionError; -use pyo3::{exceptions::PyException, PyErr}; - -#[derive(Debug)] -pub enum DataFusionError { - ExecutionError(InnerDataFusionError), - ArrowError(ArrowError), - Common(String), -} - -impl fmt::Display for DataFusionError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - DataFusionError::ExecutionError(e) => write!(f, "DataFusion error: {:?}", e), - DataFusionError::ArrowError(e) => write!(f, "Arrow error: {:?}", e), - DataFusionError::Common(e) => write!(f, "{}", e), - } - } -} - -impl From for DataFusionError { - fn from(err: ArrowError) -> DataFusionError { - DataFusionError::ArrowError(err) - } -} - -impl From for DataFusionError { - fn from(err: InnerDataFusionError) -> DataFusionError { - DataFusionError::ExecutionError(err) - } -} - -impl From for PyErr { - fn from(err: DataFusionError) -> PyErr { - PyException::new_err(err.to_string()) - } -} - -impl From for BallistaError { - fn from(err: InnerDataFusionError) -> BallistaError { - BallistaError::DataFusionExecutionError(err) - } -} - -impl From for BallistaError { - fn from(err: InnerBallistaError) -> BallistaError { - BallistaError::ExecutionError(err) - } -} - -impl fmt::Display for BallistaError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - BallistaError::DataFusionExecutionError(e) => write!(f, "Datafusion error: {:?}", e), - BallistaError::ExecutionError(e) => write!(f, "Ballista error: {:?}", e), - BallistaError::ArrowError(e) => write!(f, "Arrow error: {:?}", e), - BallistaError::Common(e) => write!(f, "{}", e), - } - } -} - -#[derive(Debug)] -pub enum BallistaError { - DataFusionExecutionError(InnerDataFusionError), - ExecutionError(InnerBallistaError), - ArrowError(ArrowError), - Common(String), -} - -impl From for PyErr { - fn from(err: BallistaError) -> PyErr { - PyException::new_err(err.to_string()) - } -} diff --git a/ballista-python/src/expression.rs b/ballista-python/src/expression.rs deleted file mode 100644 index b3275ccf0..000000000 --- a/ballista-python/src/expression.rs +++ /dev/null @@ -1,137 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use pyo3::{basic::CompareOp, prelude::*}; -use std::convert::{From, Into}; - -use datafusion::arrow::datatypes::DataType; -use datafusion::logical_plan::{col, lit, Expr}; - -use datafusion::scalar::ScalarValue; - -/// An PyExpr that can be used on a DataFrame -#[pyclass(name = "Expression", module = "ballista", subclass)] -#[derive(Debug, Clone)] -pub(crate) struct PyExpr { - pub(crate) expr: Expr, -} - -impl From for Expr { - fn from(expr: PyExpr) -> Expr { - expr.expr - } -} - -impl From for PyExpr { - fn from(expr: Expr) -> PyExpr { - PyExpr { expr } - } -} - -#[pymethods] -impl PyExpr { - fn __richcmp__(&self, other: PyExpr, op: CompareOp) -> PyExpr { - let expr = match op { - CompareOp::Lt => self.expr.clone().lt(other.expr), - CompareOp::Le => self.expr.clone().lt_eq(other.expr), - CompareOp::Eq => self.expr.clone().eq(other.expr), - CompareOp::Ne => self.expr.clone().not_eq(other.expr), - CompareOp::Gt => self.expr.clone().gt(other.expr), - CompareOp::Ge => self.expr.clone().gt_eq(other.expr), - }; - expr.into() - } - - fn __str__(&self) -> PyResult { - Ok(format!("{}", self.expr)) - } - - fn __add__(&self, rhs: PyExpr) -> PyResult { - Ok((self.expr.clone() + rhs.expr).into()) - } - - fn __sub__(&self, rhs: PyExpr) -> PyResult { - Ok((self.expr.clone() - rhs.expr).into()) - } - - fn __truediv__(&self, rhs: PyExpr) -> PyResult { - Ok((self.expr.clone() / rhs.expr).into()) - } - - fn __mul__(&self, rhs: PyExpr) -> PyResult { - Ok((self.expr.clone() * rhs.expr).into()) - } - - fn __mod__(&self, rhs: PyExpr) -> PyResult { - Ok(self.expr.clone().modulus(rhs.expr).into()) - } - - fn __and__(&self, rhs: PyExpr) -> PyResult { - Ok(self.expr.clone().and(rhs.expr).into()) - } - - fn __or__(&self, rhs: PyExpr) -> PyResult { - Ok(self.expr.clone().or(rhs.expr).into()) - } - - fn __invert__(&self) -> PyResult { - Ok(self.expr.clone().not().into()) - } - - fn __getitem__(&self, key: &str) -> PyResult { - Ok(Expr::GetIndexedField { - expr: Box::new(self.expr.clone()), - key: ScalarValue::Utf8(Some(key.to_string())), - } - .into()) - } - - #[staticmethod] - pub fn literal(value: ScalarValue) -> PyExpr { - lit(value).into() - } - - #[staticmethod] - pub fn column(value: &str) -> PyExpr { - col(value).into() - } - - /// assign a name to the PyExpr - pub fn alias(&self, name: &str) -> PyExpr { - self.expr.clone().alias(name).into() - } - - /// Create a sort PyExpr from an existing PyExpr. - #[args(ascending = true, nulls_first = true)] - pub fn sort(&self, ascending: bool, nulls_first: bool) -> PyExpr { - self.expr.clone().sort(ascending, nulls_first).into() - } - - pub fn is_null(&self) -> PyExpr { - self.expr.clone().is_null().into() - } - - pub fn cast(&self, to: DataType) -> PyExpr { - // self.expr.cast_to() requires DFSchema to validate that the cast - // is supported, omit that for now - let expr = Expr::Cast { - expr: Box::new(self.expr.clone()), - data_type: to, - }; - expr.into() - } -} diff --git a/ballista-python/src/functions.rs b/ballista-python/src/functions.rs deleted file mode 100644 index 1a2c4ed73..000000000 --- a/ballista-python/src/functions.rs +++ /dev/null @@ -1,338 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use pyo3::{prelude::*, wrap_pyfunction}; - -use datafusion::logical_plan; -use datafusion::physical_plan::aggregates::AggregateFunction; -use datafusion_expr::BuiltinScalarFunction; - -use crate::errors; -use crate::expression::PyExpr; - -#[pyfunction] -fn array(value: Vec) -> PyExpr { - PyExpr { - expr: logical_plan::array(value.into_iter().map(|x| x.expr).collect::>()), - } -} - -#[pyfunction] -fn in_list(expr: PyExpr, value: Vec, negated: bool) -> PyExpr { - logical_plan::in_list( - expr.expr, - value.into_iter().map(|x| x.expr).collect::>(), - negated, - ) - .into() -} - -/// Current date and time -#[pyfunction] -fn now() -> PyExpr { - PyExpr { - // here lit(0) is a stub for conform to arity - expr: logical_plan::now(logical_plan::lit(0)), - } -} - -/// Returns a random value in the range 0.0 <= x < 1.0 -#[pyfunction] -fn random() -> PyExpr { - PyExpr { - expr: logical_plan::random(), - } -} - -/// Computes a binary hash of the given data. type is the algorithm to use. -/// Standard algorithms are md5, sha224, sha256, sha384, sha512, blake2s, blake2b, and blake3. -#[pyfunction(value, method)] -fn digest(value: PyExpr, method: PyExpr) -> PyExpr { - PyExpr { - expr: logical_plan::digest(value.expr, method.expr), - } -} - -/// Concatenates the text representations of all the arguments. -/// NULL arguments are ignored. -#[pyfunction(args = "*")] -fn concat(args: Vec) -> PyResult { - let args = args.into_iter().map(|e| e.expr).collect::>(); - Ok(logical_plan::concat(&args).into()) -} - -/// Concatenates all but the first argument, with separators. -/// The first argument is used as the separator string, and should not be NULL. -/// Other NULL arguments are ignored. -#[pyfunction(sep, args = "*")] -fn concat_ws(sep: String, args: Vec) -> PyResult { - let args = args.into_iter().map(|e| e.expr).collect::>(); - Ok(logical_plan::concat_ws(sep, &args).into()) -} - -/// Creates a new Sort expression -#[pyfunction] -fn order_by(expr: PyExpr, asc: Option, nulls_first: Option) -> PyResult { - Ok(PyExpr { - expr: datafusion::logical_plan::Expr::Sort { - expr: Box::new(expr.expr), - asc: asc.unwrap_or(true), - nulls_first: nulls_first.unwrap_or(true), - }, - }) -} - -/// Creates a new Alias expression -#[pyfunction] -fn alias(expr: PyExpr, name: &str) -> PyResult { - Ok(PyExpr { - expr: datafusion::logical_plan::Expr::Alias(Box::new(expr.expr), String::from(name)), - }) -} - -/// Creates a new Window function expression -#[pyfunction] -fn window( - name: &str, - args: Vec, - partition_by: Option>, - order_by: Option>, -) -> PyResult { - use std::str::FromStr; - let fun = datafusion_expr::window_function::WindowFunction::from_str(name) - .map_err(|e| -> errors::DataFusionError { e.into() })?; - Ok(PyExpr { - expr: datafusion::logical_plan::Expr::WindowFunction { - fun, - args: args.into_iter().map(|x| x.expr).collect::>(), - partition_by: partition_by - .unwrap_or_default() - .into_iter() - .map(|x| x.expr) - .collect::>(), - order_by: order_by - .unwrap_or_default() - .into_iter() - .map(|x| x.expr) - .collect::>(), - window_frame: None, - }, - }) -} - -macro_rules! scalar_function { - ($NAME: ident, $FUNC: ident) => { - scalar_function!($NAME, $FUNC, stringify!($NAME)); - }; - ($NAME: ident, $FUNC: ident, $DOC: expr) => { - #[doc = $DOC] - #[pyfunction(args = "*")] - fn $NAME(args: Vec) -> PyExpr { - let expr = logical_plan::Expr::ScalarFunction { - fun: BuiltinScalarFunction::$FUNC, - args: args.into_iter().map(|e| e.into()).collect(), - }; - expr.into() - } - }; -} - -macro_rules! aggregate_function { - ($NAME: ident, $FUNC: ident) => { - aggregate_function!($NAME, $FUNC, stringify!($NAME)); - }; - ($NAME: ident, $FUNC: ident, $DOC: expr) => { - #[doc = $DOC] - #[pyfunction(args = "*", distinct = "false")] - fn $NAME(args: Vec, distinct: bool) -> PyExpr { - let expr = logical_plan::Expr::AggregateFunction { - fun: AggregateFunction::$FUNC, - args: args.into_iter().map(|e| e.into()).collect(), - distinct, - }; - expr.into() - } - }; -} - -scalar_function!(abs, Abs); -scalar_function!(acos, Acos); -scalar_function!(ascii, Ascii, "Returns the numeric code of the first character of the argument. In UTF8 encoding, returns the Unicode code point of the character. In other multibyte encodings, the argument must be an ASCII character."); -scalar_function!(asin, Asin); -scalar_function!(atan, Atan); -scalar_function!( - bit_length, - BitLength, - "Returns number of bits in the string (8 times the octet_length)." -); -scalar_function!(btrim, Btrim, "Removes the longest string containing only characters in characters (a space by default) from the start and end of string."); -scalar_function!(ceil, Ceil); -scalar_function!( - character_length, - CharacterLength, - "Returns number of characters in the string." -); -scalar_function!(chr, Chr, "Returns the character with the given code."); -scalar_function!(cos, Cos); -scalar_function!(exp, Exp); -scalar_function!(floor, Floor); -scalar_function!(initcap, InitCap, "Converts the first letter of each word to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters."); -scalar_function!(left, Left, "Returns first n characters in the string, or when n is negative, returns all but last |n| characters."); -scalar_function!(ln, Ln); -scalar_function!(log10, Log10); -scalar_function!(log2, Log2); -scalar_function!(lower, Lower, "Converts the string to all lower case"); -scalar_function!(lpad, Lpad, "Extends the string to length length by prepending the characters fill (a space by default). If the string is already longer than length then it is truncated (on the right)."); -scalar_function!(ltrim, Ltrim, "Removes the longest string containing only characters in characters (a space by default) from the start of string."); -scalar_function!( - md5, - MD5, - "Computes the MD5 hash of the argument, with the result written in hexadecimal." -); -scalar_function!(octet_length, OctetLength, "Returns number of bytes in the string. Since this version of the function accepts type character directly, it will not strip trailing spaces."); -scalar_function!(regexp_match, RegexpMatch); -scalar_function!( - regexp_replace, - RegexpReplace, - "Replaces substring(s) matching a POSIX regular expression" -); -scalar_function!( - repeat, - Repeat, - "Repeats string the specified number of times." -); -scalar_function!( - replace, - Replace, - "Replaces all occurrences in string of substring from with substring to." -); -scalar_function!( - reverse, - Reverse, - "Reverses the order of the characters in the string." -); -scalar_function!(right, Right, "Returns last n characters in the string, or when n is negative, returns all but first |n| characters."); -scalar_function!(round, Round); -scalar_function!(rpad, Rpad, "Extends the string to length length by appending the characters fill (a space by default). If the string is already longer than length then it is truncated."); -scalar_function!(rtrim, Rtrim, "Removes the longest string containing only characters in characters (a space by default) from the end of string."); -scalar_function!(sha224, SHA224); -scalar_function!(sha256, SHA256); -scalar_function!(sha384, SHA384); -scalar_function!(sha512, SHA512); -scalar_function!(signum, Signum); -scalar_function!(sin, Sin); -scalar_function!( - split_part, - SplitPart, - "Splits string at occurrences of delimiter and returns the n'th field (counting from one)." -); -scalar_function!(sqrt, Sqrt); -scalar_function!( - starts_with, - StartsWith, - "Returns true if string starts with prefix." -); -scalar_function!(strpos, Strpos, "Returns starting index of specified substring within string, or zero if it's not present. (Same as position(substring in string), but note the reversed argument order.)"); -scalar_function!(substr, Substr); -scalar_function!(tan, Tan); -scalar_function!( - to_hex, - ToHex, - "Converts the number to its equivalent hexadecimal representation." -); -scalar_function!(to_timestamp, ToTimestamp); -scalar_function!(translate, Translate, "Replaces each character in string that matches a character in the from set with the corresponding character in the to set. If from is longer than to, occurrences of the extra characters in from are deleted."); -scalar_function!(trim, Trim, "Removes the longest string containing only characters in characters (a space by default) from the start, end, or both ends (BOTH is the default) of string."); -scalar_function!(trunc, Trunc); -scalar_function!(upper, Upper, "Converts the string to all upper case."); - -aggregate_function!(avg, Avg); -aggregate_function!(count, Count); -aggregate_function!(max, Max); -aggregate_function!(min, Min); -aggregate_function!(sum, Sum); -aggregate_function!(approx_distinct, ApproxDistinct); - -pub(crate) fn init_module(m: &PyModule) -> PyResult<()> { - m.add_wrapped(wrap_pyfunction!(abs))?; - m.add_wrapped(wrap_pyfunction!(acos))?; - m.add_wrapped(wrap_pyfunction!(approx_distinct))?; - m.add_wrapped(wrap_pyfunction!(alias))?; - m.add_wrapped(wrap_pyfunction!(array))?; - m.add_wrapped(wrap_pyfunction!(ascii))?; - m.add_wrapped(wrap_pyfunction!(asin))?; - m.add_wrapped(wrap_pyfunction!(atan))?; - m.add_wrapped(wrap_pyfunction!(avg))?; - m.add_wrapped(wrap_pyfunction!(bit_length))?; - m.add_wrapped(wrap_pyfunction!(btrim))?; - m.add_wrapped(wrap_pyfunction!(ceil))?; - m.add_wrapped(wrap_pyfunction!(character_length))?; - m.add_wrapped(wrap_pyfunction!(chr))?; - m.add_wrapped(wrap_pyfunction!(concat_ws))?; - m.add_wrapped(wrap_pyfunction!(concat))?; - m.add_wrapped(wrap_pyfunction!(cos))?; - m.add_wrapped(wrap_pyfunction!(count))?; - m.add_wrapped(wrap_pyfunction!(digest))?; - m.add_wrapped(wrap_pyfunction!(exp))?; - m.add_wrapped(wrap_pyfunction!(floor))?; - m.add_wrapped(wrap_pyfunction!(in_list))?; - m.add_wrapped(wrap_pyfunction!(initcap))?; - m.add_wrapped(wrap_pyfunction!(left))?; - m.add_wrapped(wrap_pyfunction!(ln))?; - m.add_wrapped(wrap_pyfunction!(log10))?; - m.add_wrapped(wrap_pyfunction!(log2))?; - m.add_wrapped(wrap_pyfunction!(lower))?; - m.add_wrapped(wrap_pyfunction!(lpad))?; - m.add_wrapped(wrap_pyfunction!(ltrim))?; - m.add_wrapped(wrap_pyfunction!(max))?; - m.add_wrapped(wrap_pyfunction!(md5))?; - m.add_wrapped(wrap_pyfunction!(min))?; - m.add_wrapped(wrap_pyfunction!(now))?; - m.add_wrapped(wrap_pyfunction!(octet_length))?; - m.add_wrapped(wrap_pyfunction!(order_by))?; - m.add_wrapped(wrap_pyfunction!(random))?; - m.add_wrapped(wrap_pyfunction!(regexp_match))?; - m.add_wrapped(wrap_pyfunction!(regexp_replace))?; - m.add_wrapped(wrap_pyfunction!(repeat))?; - m.add_wrapped(wrap_pyfunction!(replace))?; - m.add_wrapped(wrap_pyfunction!(reverse))?; - m.add_wrapped(wrap_pyfunction!(right))?; - m.add_wrapped(wrap_pyfunction!(round))?; - m.add_wrapped(wrap_pyfunction!(rpad))?; - m.add_wrapped(wrap_pyfunction!(rtrim))?; - m.add_wrapped(wrap_pyfunction!(sha224))?; - m.add_wrapped(wrap_pyfunction!(sha256))?; - m.add_wrapped(wrap_pyfunction!(sha384))?; - m.add_wrapped(wrap_pyfunction!(sha512))?; - m.add_wrapped(wrap_pyfunction!(signum))?; - m.add_wrapped(wrap_pyfunction!(sin))?; - m.add_wrapped(wrap_pyfunction!(split_part))?; - m.add_wrapped(wrap_pyfunction!(sqrt))?; - m.add_wrapped(wrap_pyfunction!(starts_with))?; - m.add_wrapped(wrap_pyfunction!(strpos))?; - m.add_wrapped(wrap_pyfunction!(substr))?; - m.add_wrapped(wrap_pyfunction!(sum))?; - m.add_wrapped(wrap_pyfunction!(tan))?; - m.add_wrapped(wrap_pyfunction!(to_hex))?; - m.add_wrapped(wrap_pyfunction!(to_timestamp))?; - m.add_wrapped(wrap_pyfunction!(translate))?; - m.add_wrapped(wrap_pyfunction!(trim))?; - m.add_wrapped(wrap_pyfunction!(trunc))?; - m.add_wrapped(wrap_pyfunction!(upper))?; - m.add_wrapped(wrap_pyfunction!(window))?; - Ok(()) -} diff --git a/ballista-python/src/lib.rs b/ballista-python/src/lib.rs deleted file mode 100644 index 70400f3f9..000000000 --- a/ballista-python/src/lib.rs +++ /dev/null @@ -1,48 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use mimalloc::MiMalloc; -use pyo3::prelude::*; - -mod ballista_context; -mod dataframe; -pub mod errors; -mod expression; -mod functions; -pub mod utils; - -#[global_allocator] -static GLOBAL: MiMalloc = MiMalloc; - -/// Low-level DataFusion internal package. -/// -/// The higher-level public API is defined in pure python files under the -/// datafusion directory. -#[pymodule] -fn _internal(py: Python, m: &PyModule) -> PyResult<()> { - // Register the python classes - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - - // Register the functions as a submodule - let funcs = PyModule::new(py, "functions")?; - functions::init_module(funcs)?; - m.add_submodule(funcs)?; - - Ok(()) -} diff --git a/ballista-python/src/utils.rs b/ballista-python/src/utils.rs deleted file mode 100644 index 795058bc9..000000000 --- a/ballista-python/src/utils.rs +++ /dev/null @@ -1,30 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use pyo3::prelude::*; -use std::future::Future; -use tokio::runtime::Runtime; - -/// Utility to collect rust futures with GIL released -pub fn wait_for_future(py: Python, f: F) -> F::Output -where - F: Send, - F::Output: Send, -{ - let rt = Runtime::new().unwrap(); - py.allow_threads(|| rt.block_on(f)) -}