diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 544aff445949..748ea39048cb 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -66,6 +66,12 @@ dependencies = [ "libc", ] +[[package]] +name = "anstyle" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a30da5c5f2d5e72842e00bcb57657162cdabef0931f40e2deb9b4140440cecd" + [[package]] name = "arrayref" version = "0.3.7" @@ -289,6 +295,21 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "assert_cmd" +version = "2.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86d6b683edf8d1119fe420a94f8a7e389239666aa72e65495d91c00462510151" +dependencies = [ + "anstyle", + "bstr", + "doc-comment", + "predicates", + "predicates-core", + "predicates-tree", + "wait-timeout", +] + [[package]] name = "async-compression" version = "0.4.0" @@ -700,6 +721,18 @@ dependencies = [ "alloc-stdlib", ] +[[package]] +name = "bstr" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a246e68bb43f6cd9db24bea052a53e40405417c5fb372e3d1a8a7f770a564ef5" +dependencies = [ + "memchr", + "once_cell", + "regex-automata", + "serde", +] + [[package]] name = "bumpalo" version = "3.13.0" @@ -959,6 +992,16 @@ dependencies = [ "memchr", ] +[[package]] +name = "ctor" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1586fa608b1dab41f667475b4a41faec5ba680aee428bfa5de4ea520fdc6e901" +dependencies = [ + "quote", + "syn 2.0.18", +] + [[package]] name = "dashmap" version = "5.4.0" @@ -1024,16 +1067,20 @@ name = "datafusion-cli" version = "26.0.0" dependencies = [ "arrow", + "assert_cmd", "async-trait", "aws-config", "aws-credential-types", "clap", + "ctor", "datafusion", "dirs", "env_logger", "mimalloc", "object_store", "parking_lot", + "predicates", + "rstest", "rustyline", "tokio", "url", @@ -1150,6 +1197,12 @@ dependencies = [ "sqlparser", ] +[[package]] +name = "difflib" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8" + [[package]] name = "digest" version = "0.10.7" @@ -1319,6 +1372,15 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "float-cmp" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98de4bbd547a563b716d8dfa9aad1cb19bfab00f4fa09a6a4ed21dbcf44ce9c4" +dependencies = [ + "num-traits", +] + [[package]] name = "fnv" version = "1.0.7" @@ -1405,6 +1467,12 @@ version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65" +[[package]] +name = "futures-timer" +version = "3.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e64b03909df88034c26dc1547e8970b91f98bdb65165d6a4e9110d94263dbb2c" + [[package]] name = "futures-util" version = "0.3.28" @@ -1965,6 +2033,12 @@ dependencies = [ "static_assertions", ] +[[package]] +name = "normalize-line-endings" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be" + [[package]] name = "num" version = "0.4.0" @@ -2285,6 +2359,37 @@ version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" +[[package]] +name = "predicates" +version = "3.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09963355b9f467184c04017ced4a2ba2d75cbcb4e7462690d388233253d4b1a9" +dependencies = [ + "anstyle", + "difflib", + "float-cmp", + "itertools 0.10.5", + "normalize-line-endings", + "predicates-core", + "regex", +] + +[[package]] +name = "predicates-core" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b794032607612e7abeb4db69adb4e33590fa6cf1149e95fd7cb00e634b92f174" + +[[package]] +name = "predicates-tree" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368ba315fb8c5052ab692e68a0eefec6ec57b23a36959c14496f0b0df2c0cecf" +dependencies = [ + "predicates-core", + "termtree", +] + [[package]] name = "proc-macro-error" version = "1.0.4" @@ -2423,6 +2528,12 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" + [[package]] name = "regex-syntax" version = "0.7.2" @@ -2485,6 +2596,32 @@ dependencies = [ "winapi", ] +[[package]] +name = "rstest" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de1bb486a691878cd320c2f0d319ba91eeaa2e894066d8b5f8f117c000e9d962" +dependencies = [ + "futures", + "futures-timer", + "rstest_macros", + "rustc_version", +] + +[[package]] +name = "rstest_macros" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "290ca1a1c8ca7edb7c3283bd44dc35dd54fdec6253a3912e201ba1072018fca8" +dependencies = [ + "cfg-if", + "proc-macro2", + "quote", + "rustc_version", + "syn 1.0.109", + "unicode-ident", +] + [[package]] name = "rustc_version" version = "0.4.0" @@ -2917,6 +3054,12 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "termtree" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76" + [[package]] name = "textwrap" version = "0.16.0" @@ -3245,6 +3388,15 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c3082ca00d5a5ef149bb8b555a72ae84c9c59f7250f013ac822ac2e49b19c64" +[[package]] +name = "wait-timeout" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f200f5b12eb75f8c1ed65abd4b2db8a6e1b138a20de009dacee265a2498f3f6" +dependencies = [ + "libc", +] + [[package]] name = "walkdir" version = "2.3.3" diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index 43367eceb33b..bb55a764d913 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -43,3 +43,9 @@ parking_lot = { version = "0.12" } rustyline = "11.0" tokio = { version = "1.24", features = ["macros", "rt", "rt-multi-thread", "sync", "parking_lot"] } url = "2.2" + +[dev-dependencies] +assert_cmd = "2.0" +ctor = "0.2.0" +predicates = "3.0" +rstest = "0.17" diff --git a/datafusion-cli/src/exec.rs b/datafusion-cli/src/exec.rs index 1ccbfef982fa..ec7330f6eb9e 100644 --- a/datafusion-cli/src/exec.rs +++ b/datafusion-cli/src/exec.rs @@ -41,6 +41,20 @@ use std::time::Instant; use std::{fs::File, sync::Arc}; use url::Url; +/// run and execute SQL statements and commands, against a context with the given print options +pub async fn exec_from_commands( + ctx: &mut SessionContext, + print_options: &PrintOptions, + commands: Vec, +) { + for sql in commands { + match exec_and_print(ctx, print_options, sql).await { + Ok(_) => {} + Err(err) => println!("{err}"), + } + } +} + /// run and execute SQL statements and commands from a file, against a context with the given print options pub async fn exec_from_lines( ctx: &mut SessionContext, @@ -58,11 +72,8 @@ pub async fn exec_from_lines( let line = line.trim_end(); query.push_str(line); if line.ends_with(';') { - match unescape_input(line) { - Ok(sql) => match exec_and_print(ctx, print_options, sql).await { - Ok(_) => {} - Err(err) => eprintln!("{err}"), - }, + match exec_and_print(ctx, print_options, query).await { + Ok(_) => {} Err(err) => eprintln!("{err}"), } query = "".to_owned(); @@ -149,11 +160,8 @@ pub async fn exec_from_repl( } Ok(line) => { rl.add_history_entry(line.trim_end())?; - match unescape_input(&line) { - Ok(sql) => match exec_and_print(ctx, &print_options, sql).await { - Ok(_) => {} - Err(err) => eprintln!("{err}"), - }, + match exec_and_print(ctx, &print_options, line).await { + Ok(_) => {} Err(err) => eprintln!("{err}"), } } @@ -182,6 +190,7 @@ async fn exec_and_print( ) -> Result<()> { let now = Instant::now(); + let sql = unescape_input(&sql)?; let plan = ctx.state().create_logical_plan(&sql).await?; let df = match &plan { LogicalPlan::Ddl(DdlStatement::CreateExternalTable(cmd)) => { diff --git a/datafusion-cli/src/main.rs b/datafusion-cli/src/main.rs index 4c1dd2f94e05..aea499d60323 100644 --- a/datafusion-cli/src/main.rs +++ b/datafusion-cli/src/main.rs @@ -44,13 +44,21 @@ struct Args { data_path: Option, #[clap( - short = 'c', + short = 'b', long, help = "The batch size of each query, or use DataFusion default", validator(is_valid_batch_size) )] batch_size: Option, + #[clap( + short = 'c', + long, + multiple_values = true, + help = "Execute the given command string(s), then exit" + )] + command: Vec, + #[clap( short, long, @@ -116,6 +124,7 @@ pub async fn main() -> Result<()> { quiet: args.quiet, }; + let commands = args.command; let files = args.file; let rc = match args.rc { Some(file) => file, @@ -132,18 +141,25 @@ pub async fn main() -> Result<()> { } }; - if !files.is_empty() { - exec::exec_from_files(files, &mut ctx, &print_options).await; - Ok(()) - } else { + if commands.is_empty() && files.is_empty() { if !rc.is_empty() { exec::exec_from_files(rc, &mut ctx, &print_options).await } // TODO maybe we can have thiserror for cli but for now let's keep it simple - exec::exec_from_repl(&mut ctx, &mut print_options) + return exec::exec_from_repl(&mut ctx, &mut print_options) .await - .map_err(|e| DataFusionError::External(Box::new(e))) + .map_err(|e| DataFusionError::External(Box::new(e))); } + + if !files.is_empty() { + exec::exec_from_files(files, &mut ctx, &print_options).await; + } + + if !commands.is_empty() { + exec::exec_from_commands(&mut ctx, &print_options, commands).await; + } + + Ok(()) } fn create_runtime_env() -> Result { diff --git a/datafusion-cli/tests/cli_integration.rs b/datafusion-cli/tests/cli_integration.rs new file mode 100644 index 000000000000..c6bee274e93c --- /dev/null +++ b/datafusion-cli/tests/cli_integration.rs @@ -0,0 +1,52 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::process::Command; + +use assert_cmd::prelude::{CommandCargoExt, OutputAssertExt}; +use predicates::prelude::predicate; +use rstest::rstest; + +#[cfg(test)] +#[ctor::ctor] +fn init() { + // Enable RUST_LOG logging configuration for tests + let _ = env_logger::try_init(); +} + +#[rstest] +#[case::exec_from_commands( + ["--command", "select 1", "--format", "json", "-q"], + "[{\"Int64(1)\":1}]\n" +)] +#[case::exec_from_files( + ["--file", "tests/data/sql.txt", "--format", "json", "-q"], + "[{\"Int64(1)\":1}]\n" +)] +#[case::set_batch_size( + ["--command", "show datafusion.execution.batch_size", "--format", "json", "-q", "-b", "1"], + "[{\"name\":\"datafusion.execution.batch_size\",\"setting\":\"1\"}]\n" +)] +#[test] +fn cli_quick_test<'a>( + #[case] args: impl IntoIterator, + #[case] expected: &str, +) { + let mut cmd = Command::cargo_bin("datafusion-cli").unwrap(); + cmd.args(args); + cmd.assert().stdout(predicate::eq(expected)); +} diff --git a/datafusion-cli/tests/data/sql.txt b/datafusion-cli/tests/data/sql.txt new file mode 100644 index 000000000000..9e13a3eff4a7 --- /dev/null +++ b/datafusion-cli/tests/data/sql.txt @@ -0,0 +1 @@ +select 1; \ No newline at end of file diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt index e02ecc93b884..6e859351156f 100644 --- a/dev/release/rat_exclude_files.txt +++ b/dev/release/rat_exclude_files.txt @@ -115,6 +115,7 @@ python/requirements*.txt benchmarks/queries/* benchmarks/expected-plans/* benchmarks/data/* +datafusion-cli/tests/data/* ci/* **/*.svg **/*.csv