Skip to content

Commit

Permalink
Merge branch 'add-pypi-release-workflow' of github.com:ayushdg/dask-s…
Browse files Browse the repository at this point in the history
…ql into add-pypi-release-workflow
  • Loading branch information
ayushdg committed Oct 18, 2022
2 parents d97e3c2 + 6d02618 commit f36a411
Show file tree
Hide file tree
Showing 56 changed files with 433 additions and 326 deletions.
27 changes: 27 additions & 0 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,25 @@ env:
RUSTFLAGS: "-C debuginfo=1"

jobs:
detect-ci-trigger:
name: Check for upstream trigger phrase
runs-on: ubuntu-latest
if: github.repository == 'dask-contrib/dask-sql'
outputs:
triggered: ${{ steps.detect-trigger.outputs.trigger-found }}
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 2
- uses: xarray-contrib/[email protected]
id: detect-trigger
with:
keyword: "[test-df-upstream]"

# Check crate compiles
linux-build-lib:
name: cargo check
needs: [detect-ci-trigger]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
Expand All @@ -27,6 +43,11 @@ jobs:
with:
path: /home/runner/.cargo
key: cargo-cache
- name: Optionally update upstream dependencies
if: needs.detect-ci-trigger.outputs.triggered == 'true'
run: |
cd dask_planner
bash update-dependencies.sh
- name: Check workspace in debug mode
run: |
cd dask_planner
Expand All @@ -39,6 +60,7 @@ jobs:
# test the crate
linux-test:
name: cargo test
needs: [detect-ci-trigger]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
Expand All @@ -49,6 +71,11 @@ jobs:
with:
path: /home/runner/.cargo
key: cargo-cache
- name: Optionally update upstream dependencies
if: needs.detect-ci-trigger.outputs.triggered == 'true'
run: |
cd dask_planner
bash update-dependencies.sh
- name: Run tests
run: |
cd dask_planner
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/style.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,8 @@ jobs:
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
- uses: actions-rs/toolchain@v1
with:
toolchain: nightly
components: rustfmt
- uses: pre-commit/[email protected]
41 changes: 38 additions & 3 deletions .github/workflows/test-upstream.yml
Original file line number Diff line number Diff line change
@@ -1,14 +1,29 @@
name: Nightly upstream testing
on:
schedule:
- cron: "0 0 * * *" # Daily “At 00:00” UTC
- cron: "0 0 * * *" # Daily “At 00:00” UTC for upstream dask testing
- cron: "0 3 * * *" # Daily "At 03:00" UTC for upstream datafusion testing
workflow_dispatch: # allows you to trigger the workflow run manually

inputs:
upstreamLib:
type: choice
description: 'Library to update for upstream testing'
required: false
default: 'Dask'
options:
- Dask
- DataFusion
# Required shell entrypoint to have properly activated conda environments
defaults:
run:
shell: bash -l {0}

env:
which_upstream: |
(github.event.schedule == '0 3 * * *' && 'DataFusion')
|| (github.event.schedule == '0 0 * * *' && 'Dask')
|| (github.event.inputs.upstreamLib)
jobs:
test-dev:
name: "Test upstream dev (${{ matrix.os }}, python: ${{ matrix.python }})"
Expand Down Expand Up @@ -38,6 +53,11 @@ jobs:
channels: dask/label/dev,conda-forge,nodefaults
activate-environment: dask-sql
environment-file: ${{ env.CONDA_FILE }}
- name: Optionally update upstream cargo dependencies
if: env.which_upstream == 'DataFusion'
run: |
cd dask_planner
bash update-dependencies.sh
- name: Build the Rust DataFusion bindings
run: |
python setup.py build install
Expand All @@ -48,6 +68,7 @@ jobs:
docker pull bde2020/hive:2.3.2-postgresql-metastore
docker pull bde2020/hive-metastore-postgresql:2.3.0
- name: Install upstream dev Dask / dask-ml
if: env.which_upstream == 'Dask'
run: |
mamba update dask
python -m pip install --no-deps git+https://github.com/dask/dask-ml
Expand All @@ -70,6 +91,11 @@ jobs:
channels: dask/label/dev,conda-forge,nodefaults
activate-environment: dask-sql
environment-file: continuous_integration/environment-3.9-dev.yaml
- name: Optionally update upstream cargo dependencies
if: env.which_upstream == 'DataFusion'
run: |
cd dask_planner
bash update-dependencies.sh
- name: Build the Rust DataFusion bindings
run: |
python setup.py build install
Expand All @@ -81,6 +107,7 @@ jobs:
pip list
mamba list
- name: Install upstream dev dask-ml
if: env.which_upstream == 'Dask'
run: |
mamba update dask
python -m pip install --no-deps git+https://github.com/dask/dask-ml
Expand Down Expand Up @@ -109,6 +136,13 @@ jobs:
mamba-version: "*"
channels: conda-forge,nodefaults
channel-priority: strict
- name: Optionally update upstream cargo dependencies
if: env.which_upstream == 'DataFusion'
env:
UPDATE_ALL_CARGO_DEPS: false
run: |
cd dask_planner
bash update-dependencies.sh
- name: Install dependencies and nothing else
run: |
mamba install setuptools-rust
Expand All @@ -118,6 +152,7 @@ jobs:
pip list
mamba list
- name: Install upstream dev Dask / dask-ml
if: env.which_upstream == 'Dask'
run: |
python -m pip install --no-deps git+https://github.com/dask/dask
python -m pip install --no-deps git+https://github.com/dask/distributed
Expand All @@ -142,7 +177,7 @@ jobs:
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
const title = "⚠️ Upstream CI failed ⚠️"
const title = "⚠️ Upstream CI ${{ env.which_upstream }} failed ⚠️"
const workflow_url = `https://github.com/${process.env.GITHUB_REPOSITORY}/actions/runs/${process.env.GITHUB_RUN_ID}`
const issue_body = `[Workflow Run URL](${workflow_url})`
// Run GraphQL query against GitHub API to find the most recent open issue used for reporting failures
Expand Down
11 changes: 9 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@ repos:
- repo: https://github.com/doublify/pre-commit-rust
rev: v1.0
hooks:
- id: fmt
args: ['--manifest-path', './dask_planner/Cargo.toml', '--verbose', '--']
- id: cargo-check
args: ['--manifest-path', './dask_planner/Cargo.toml', '--verbose', '--']
- id: clippy
Expand All @@ -33,3 +31,12 @@ repos:
- id: check-yaml
exclude: ^continuous_integration/recipe/
- id: check-added-large-files
- repo: local
hooks:
- id: cargo-fmt
name: cargo fmt
description: Format files with cargo fmt.
entry: cargo +nightly fmt
language: system
types: [rust]
args: ['--manifest-path', './dask_planner/Cargo.toml', '--verbose', '--']
7 changes: 5 additions & 2 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,15 @@

The environment used for development and CI consists of:

- a system installation of [`rustup`](https://rustup.rs/)
- a system installation of [`rustup`](https://rustup.rs/) with:
- the latest stable toolchain
- the latest nightly `rustfmt`
- a [conda](https://docs.conda.io/projects/conda/en/latest/user-guide/install/index.html) environment containing all required Python packages

Once `rustup` is installed, ensure that the latest stable toolchain is available by running
Once `rustup` is installed, ensure that the latest stable toolchain and nightly `rustfmt` are available by running

```
rustup toolchain install nightly -c rustfmt --profile minimal
rustup update
```

Expand Down
2 changes: 1 addition & 1 deletion continuous_integration/gpuci/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ function hasArg {
}

# Set path and build parallel level
export PATH=/opt/conda/bin:/usr/local/cuda/bin:$PATH
export PATH=/opt/cargo/bin:/opt/conda/bin:/usr/local/cuda/bin:$PATH
export PARALLEL_LEVEL=${PARALLEL_LEVEL:-4}

# Set home to the job's workspace
Expand Down
8 changes: 4 additions & 4 deletions dask_planner/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions dask_planner/src/dialect.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use core::iter::Peekable;
use core::str::Chars;
use core::{iter::Peekable, str::Chars};

use datafusion_sql::sqlparser::dialect::Dialect;

#[derive(Debug)]
Expand Down
6 changes: 3 additions & 3 deletions dask_planner/src/error.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use std::fmt::{Display, Formatter};

use datafusion_common::DataFusionError;
use datafusion_sql::sqlparser::parser::ParserError;
use datafusion_sql::sqlparser::tokenizer::TokenizerError;
use datafusion_sql::sqlparser::{parser::ParserError, tokenizer::TokenizerError};
use pyo3::PyErr;
use std::fmt::{Display, Formatter};

pub type Result<T> = std::result::Result<T, DaskPlannerError>;

Expand Down
31 changes: 21 additions & 10 deletions dask_planner/src/expression.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,25 @@
use crate::error::{DaskPlannerError, Result};
use crate::sql::exceptions::{py_runtime_err, py_type_err};
use crate::sql::logical;
use crate::sql::types::RexType;
use std::{convert::From, sync::Arc};

use arrow::datatypes::DataType;
use datafusion_common::{Column, DFField, DFSchema, ScalarValue};
use datafusion_expr::Operator;
use datafusion_expr::{lit, utils::exprlist_to_fields, BuiltinScalarFunction, Expr, LogicalPlan};
use datafusion_expr::{
lit,
utils::exprlist_to_fields,
BuiltinScalarFunction,
Expr,
LogicalPlan,
Operator,
};
use pyo3::prelude::*;
use std::convert::From;
use std::sync::Arc;

use crate::{
error::{DaskPlannerError, Result},
sql::{
exceptions::{py_runtime_err, py_type_err},
logical,
types::RexType,
},
};

/// An PyExpr that can be used on a DataFrame
#[pyclass(name = "Expression", module = "datafusion", subclass)]
Expand Down Expand Up @@ -805,11 +816,11 @@ pub fn expr_to_field(expr: &Expr, input_plan: &LogicalPlan) -> Result<DFField> {

#[cfg(test)]
mod test {
use crate::error::Result;
use crate::expression::PyExpr;
use datafusion_common::{Column, ScalarValue};
use datafusion_expr::Expr;

use crate::{error::Result, expression::PyExpr};

#[test]
fn get_value_u32() -> Result<()> {
test_get_value(ScalarValue::UInt32(None))?;
Expand Down
13 changes: 7 additions & 6 deletions dask_planner/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,20 @@
//!
//! Declares a SQL parser based on sqlparser that handles custom formats that we need.
use crate::sql::exceptions::py_type_err;
use crate::sql::types::SqlTypeName;
use pyo3::prelude::*;
use std::collections::VecDeque;

use crate::dialect::DaskDialect;
use crate::sql::parser_utils::DaskParserUtils;
use datafusion_sql::sqlparser::{
ast::{Expr, Ident, SelectItem, Statement as SQLStatement, UnaryOperator, Value},
dialect::{keywords::Keyword, Dialect},
parser::{Parser, ParserError},
tokenizer::{Token, Tokenizer},
};
use std::collections::VecDeque;
use pyo3::prelude::*;

use crate::{
dialect::DaskDialect,
sql::{exceptions::py_type_err, parser_utils::DaskParserUtils, types::SqlTypeName},
};

macro_rules! parser_err {
($MSG:expr) => {
Expand Down
Loading

0 comments on commit f36a411

Please sign in to comment.