Skip to content

Commit

Permalink
add csv mode to datafusion cli (#281)
Browse files Browse the repository at this point in the history
  • Loading branch information
jimexist authored May 24, 2021
1 parent eeb69af commit 68ad990
Show file tree
Hide file tree
Showing 8 changed files with 260 additions and 32 deletions.
90 changes: 72 additions & 18 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ on:
pull_request:

jobs:

# build the library, a compilation step used by multiple steps below
linux-build-lib:
name: Build Libraries on AMD64 Rust ${{ matrix.rust }}
Expand Down Expand Up @@ -61,17 +60,19 @@ jobs:
rustup component add rustfmt
- name: Build Workspace
run: |
export CARGO_HOME="/github/home/.cargo"
export CARGO_TARGET_DIR="/github/home/target"
cargo build
env:
CARGO_HOME: "/github/home/.cargo"
CARGO_TARGET_DIR: "/github/home/target"
# Ballista is currently not part of the main workspace so requires a separate build step
- name: Build Ballista
run: |
export CARGO_HOME="/github/home/.cargo"
export CARGO_TARGET_DIR="/github/home/target"
cd ballista/rust
# snmalloc requires cmake so build without default features
cargo build --no-default-features
env:
CARGO_HOME: "/github/home/.cargo"
CARGO_TARGET_DIR: "/github/home/target"

# test the crate
linux-test:
Expand Down Expand Up @@ -111,8 +112,6 @@ jobs:
rustup component add rustfmt
- name: Run tests
run: |
export CARGO_HOME="/github/home/.cargo"
export CARGO_TARGET_DIR="/github/home/target"
export ARROW_TEST_DATA=$(pwd)/testing/data
export PARQUET_TEST_DATA=$(pwd)/parquet-testing/data
# run tests on all workspace members with default feature list
Expand All @@ -122,16 +121,69 @@ jobs:
cargo test --no-default-features
cargo run --example csv_sql
cargo run --example parquet_sql
env:
CARGO_HOME: "/github/home/.cargo"
CARGO_TARGET_DIR: "/github/home/target"
# Ballista is currently not part of the main workspace so requires a separate test step
- name: Run Ballista tests
run: |
export CARGO_HOME="/github/home/.cargo"
export CARGO_TARGET_DIR="/github/home/target"
export ARROW_TEST_DATA=$(pwd)/testing/data
export PARQUET_TEST_DATA=$(pwd)/parquet-testing/data
cd ballista/rust
# snmalloc requires cmake so build without default features
cargo test --no-default-features
env:
CARGO_HOME: "/github/home/.cargo"
CARGO_TARGET_DIR: "/github/home/target"

integration-test:
name: "Integration Test"
needs: [linux-build-lib]
runs-on: ubuntu-latest
services:
postgres:
image: postgres:13
env:
POSTGRES_PASSWORD: postgres
POSTGRES_DB: db_test
ports:
- 5432/tcp
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
with:
python-version: "3.8"
- name: Install Python dependencies
run: |
python -m pip install --upgrade pip setuptools wheel
python -m pip install --upgrade numpy==1.20.3 pandas==1.2.4
- name: Allow access of psql
run: |
# make sure psql can access the server
echo "$POSTGRES_HOST:$POSTGRES_PORT:$POSTGRES_DB:$POSTGRES_USER:$POSTGRES_PASSWORD" | tee ~/.pgpass
chmod 0600 ~/.pgpass
psql -d "$POSTGRES_DB" -h "$POSTGRES_HOST" -p "$POSTGRES_PORT" -U "$POSTGRES_USER" -c 'select now() as now'
env:
POSTGRES_HOST: localhost
POSTGRES_PORT: ${{ job.services.postgres.ports[5432] }}
POSTGRES_DB: db_test
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
- name: Build datafusion-cli
run: cargo build --bin datafusion-cli
- name: Test Psql Parity
run: python -m unittest -v integration-tests/test_psql_parity.py
env:
POSTGRES_HOST: localhost
POSTGRES_PORT: ${{ job.services.postgres.ports[5432] }}
POSTGRES_DB: db_test
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres

windows-and-macos:
name: Test on ${{ matrix.os }} Rust ${{ matrix.rust }}
Expand All @@ -156,9 +208,10 @@ jobs:
run: |
export ARROW_TEST_DATA=$(pwd)/testing/data
export PARQUET_TEST_DATA=$(pwd)/parquet-testing/data
# do not produce debug symbols to keep memory usage down
export RUSTFLAGS="-C debuginfo=0"
cargo test
env:
# do not produce debug symbols to keep memory usage down
RUSTFLAGS: "-C debuginfo=0"

lint:
name: Lint
Expand Down Expand Up @@ -212,9 +265,10 @@ jobs:
rustup component add rustfmt clippy
- name: Run clippy
run: |
export CARGO_HOME="/github/home/.cargo"
export CARGO_TARGET_DIR="/github/home/target"
cargo clippy --all-targets --workspace -- -D warnings
env:
CARGO_HOME: "/github/home/.cargo"
CARGO_TARGET_DIR: "/github/home/target"

miri-checks:
name: MIRI
Expand Down Expand Up @@ -242,9 +296,9 @@ jobs:
- name: Run Miri Checks
env:
RUST_BACKTRACE: full
RUST_LOG: 'trace'
RUST_LOG: "trace"
MIRIFLAGS: "-Zmiri-disable-isolation"
run: |
export MIRIFLAGS="-Zmiri-disable-isolation"
cargo miri setup
cargo clean
# Ignore MIRI errors until we can get a clean run
Expand Down Expand Up @@ -275,16 +329,16 @@ jobs:
key: ${{ runner.os }}-${{ matrix.arch }}-target-coverage-cache-${{ matrix.rust }}-
- name: Run coverage
run: |
export CARGO_HOME="/home/runner/.cargo"
export CARGO_TARGET_DIR="/home/runner/target"
export ARROW_TEST_DATA=$(pwd)/testing/data
export PARQUET_TEST_DATA=$(pwd)/parquet-testing/data
# 2020-11-15: There is a cargo-tarpaulin regression in 0.17.0
# see https://github.com/xd009642/tarpaulin/issues/618
cargo install --version 0.16.0 cargo-tarpaulin
cargo tarpaulin --out Xml
env:
CARGO_HOME: "/home/runner/.cargo"
CARGO_TARGET_DIR: "/home/runner/target"
- name: Report coverage
continue-on-error: true
run: bash <(curl -s https://codecov.io/bash)
27 changes: 15 additions & 12 deletions datafusion-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,10 @@
use clap::{crate_version, App, Arg};
use datafusion::error::Result;
use datafusion::execution::context::{ExecutionConfig, ExecutionContext};
use datafusion_cli::{print_format::PrintFormat, PrintOptions};
use datafusion_cli::{
print_format::{all_print_formats, PrintFormat},
PrintOptions,
};
use rustyline::Editor;
use std::env;
use std::fs::File;
Expand Down Expand Up @@ -63,14 +66,22 @@ pub async fn main() {
)
.arg(
Arg::with_name("format")
.help("Output format (possible values: table, csv, tsv, json)")
.help("Output format")
.long("format")
.default_value("table")
.validator(is_valid_format)
.possible_values(
&all_print_formats()
.iter()
.map(|format| format.to_string())
.collect::<Vec<_>>()
.iter()
.map(|i| i.as_str())
.collect::<Vec<_>>(),
)
.takes_value(true),
)
.arg(
Arg::with_name("quite")
Arg::with_name("quiet")
.help("Reduce printing other than the results and work quietly")
.short("q")
.long("quiet")
Expand Down Expand Up @@ -189,14 +200,6 @@ async fn exec_from_repl(execution_config: ExecutionConfig, print_options: PrintO
rl.save_history(".history").ok();
}

fn is_valid_format(format: String) -> std::result::Result<(), String> {
if format.parse::<PrintFormat>().is_ok() {
Ok(())
} else {
Err(format!("Format '{}' not supported", format))
}
}

fn is_valid_file(dir: String) -> std::result::Result<(), String> {
if Path::new(&dir).is_file() {
Ok(())
Expand Down
30 changes: 30 additions & 0 deletions datafusion-cli/src/print_format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ use arrow::json::ArrayWriter;
use datafusion::arrow::record_batch::RecordBatch;
use datafusion::arrow::util::pretty;
use datafusion::error::{DataFusionError, Result};
use std::fmt;
use std::str::FromStr;

/// Allow records to be printed in different formats
Expand All @@ -32,6 +33,16 @@ pub enum PrintFormat {
Json,
}

/// returns all print formats
pub fn all_print_formats() -> Vec<PrintFormat> {
vec![
PrintFormat::Csv,
PrintFormat::Tsv,
PrintFormat::Table,
PrintFormat::Json,
]
}

impl FromStr for PrintFormat {
type Err = ();
fn from_str(s: &str) -> std::result::Result<Self, ()> {
Expand All @@ -45,6 +56,17 @@ impl FromStr for PrintFormat {
}
}

impl fmt::Display for PrintFormat {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match *self {
Self::Csv => write!(f, "csv"),
Self::Tsv => write!(f, "tsv"),
Self::Table => write!(f, "table"),
Self::Json => write!(f, "json"),
}
}
}

fn print_batches_to_json(batches: &[RecordBatch]) -> Result<String> {
let mut bytes = vec![];
{
Expand Down Expand Up @@ -108,6 +130,14 @@ mod tests {
assert_eq!(PrintFormat::Table, format);
}

#[test]
fn test_to_str() {
assert_eq!("csv", PrintFormat::Csv.to_string());
assert_eq!("table", PrintFormat::Table.to_string());
assert_eq!("tsv", PrintFormat::Tsv.to_string());
assert_eq!("json", PrintFormat::Json.to_string());
}

#[test]
fn test_from_str_failure() {
assert_eq!(true, "pretty".parse::<PrintFormat>().is_err());
Expand Down
4 changes: 2 additions & 2 deletions datafusion/docs/cli.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ The DataFusion CLI is a command-line interactive SQL utility that allows queries

Use the following commands to clone this repository and run the CLI. This will require the Rust toolchain to be installed. Rust can be installed from [https://rustup.rs/](https://rustup.rs/).

```sh
```bash
git clone https://github.com/apache/arrow-datafusion
cd arrow-datafusion/datafusion-cli
cargo run --release
Expand All @@ -35,7 +35,7 @@ cargo run --release

Use the following commands to clone this repository and build a Docker image containing the CLI tool. Note that there is `.dockerignore` file in the root of the repository that may need to be deleted in order for this to work.

```sh
```bash
git clone https://github.com/apache/arrow-datafusion
cd arrow-datafusion
docker build -f datafusion-cli/Dockerfile . --tag datafusion-cli
Expand Down
15 changes: 15 additions & 0 deletions integration-tests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at

# http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
22 changes: 22 additions & 0 deletions integration-tests/sqls/simple_math_expressions.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
-- Licensed to the Apache Software Foundation (ASF) under one
-- or more contributor license agreements. See the NOTICE file
-- distributed with this work for additional information
-- regarding copyright ownership. The ASF licenses this file
-- to you under the Apache License, Version 2.0 (the
-- "License"); you may not use this file except in compliance
-- with the License. You may obtain a copy of the License at

-- http://www.apache.org/licenses/LICENSE-2.0

-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.

SELECT
abs(-1.1) as abs,
exp(2.0) as exp,
sin(3.0) as sin,
cos(4.0) as cos,
tan(5.0) as tan;
17 changes: 17 additions & 0 deletions integration-tests/sqls/simple_select.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
-- Licensed to the Apache Software Foundation (ASF) under one
-- or more contributor license agreements. See the NOTICE file
-- distributed with this work for additional information
-- regarding copyright ownership. The ASF licenses this file
-- to you under the Apache License, Version 2.0 (the
-- "License"); you may not use this file except in compliance
-- with the License. You may obtain a copy of the License at

-- http://www.apache.org/licenses/LICENSE-2.0

-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.

SELECT 1 as num;
Loading

0 comments on commit 68ad990

Please sign in to comment.