diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 87ef19773fda4..bdb0fdbaee371 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -260,6 +260,7 @@ dependencies = [ "iana-time-zone", "num-integer", "num-traits", + "serde", "winapi", ] @@ -462,8 +463,10 @@ dependencies = [ "dirs", "env_logger", "mimalloc", + "object_store", "rustyline", "tokio", + "url", ] [[package]] @@ -608,6 +611,15 @@ version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "90e5c1c8368803113bf0c9584fc495a58b86dc8a29edbf8fe877d21d9507e797" +[[package]] +name = "encoding_rs" +version = "0.8.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9852635589dc9f9ea1b6fe9f05b50ef208c85c834a562f0c6abb1c475736ec2b" +dependencies = [ + "cfg-if", +] + [[package]] name = "endian-type" version = "0.1.2" @@ -699,6 +711,12 @@ dependencies = [ "miniz_oxide", ] +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + [[package]] name = "form_urlencoded" version = "1.1.0" @@ -824,6 +842,25 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" +[[package]] +name = "h2" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ca32592cf21ac7ccab1825cd87f6c9b3d9022c44d086172ed0966bec8af30be" +dependencies = [ + "bytes", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "half" version = "2.1.0" @@ -858,22 +895,92 @@ dependencies = [ "libc", ] +[[package]] +name = "http" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75f43d41e26995c17e71ee126451dd3941010b0514a81a9d11f3b341debc2399" +dependencies = [ + "bytes", + "fnv", + "itoa 1.0.3", +] + +[[package]] +name = "http-body" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1" +dependencies = [ + "bytes", + "http", + "pin-project-lite", +] + +[[package]] +name = "httparse" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" + +[[package]] +name = "httpdate" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421" + [[package]] name = "humantime" version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" +[[package]] +name = "hyper" +version = "0.14.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02c929dc5c39e335a03c405292728118860721b10190d98c2a0f0efd5baafbac" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "httparse", + "httpdate", + "itoa 1.0.3", + "pin-project-lite", + "socket2", + "tokio", + "tower-service", + "tracing", + "want", +] + +[[package]] +name = "hyper-rustls" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d87c48c02e0dc5e3b849a2041db3029fd066650f8f717c07bf8ed78ccb895cac" +dependencies = [ + "http", + "hyper", + "rustls", + "tokio", + "tokio-rustls", +] + [[package]] name = "iana-time-zone" -version = "0.1.48" +version = "0.1.49" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "237a0714f28b1ee39ccec0770ccb544eb02c9ef2c82bb096230eefcffa6468b0" +checksum = "3bbaead50122b06e9a973ac20bc7445074d99ad9a0a0654934876908a9cec82c" dependencies = [ "android_system_properties", "core-foundation-sys", "js-sys", - "once_cell", "wasm-bindgen", "winapi", ] @@ -919,6 +1026,12 @@ version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1ea37f355c05dde75b84bba2d767906ad522e97cd9e2eef2be7a4ab7fb442c06" +[[package]] +name = "ipnet" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "879d54834c8c76457ef4293a689b2a8c59b076067ad77b15efafbb05f92a592b" + [[package]] name = "itertools" version = "0.10.5" @@ -942,9 +1055,9 @@ checksum = "6c8af84674fe1f223a982c933a0ee1086ac4d4052aa0fb8060c12c6ad838e754" [[package]] name = "jobserver" -version = "0.1.24" +version = "0.1.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af25a77299a7f711a01975c35a6a424eb6862092cc2d6c72c4ed6cbc56dfc1fa" +checksum = "068b1ee6743e4d11fb9c6a1e6064b3693a1b600e7f5f5988047d98b3dc9fb90b" dependencies = [ "libc", ] @@ -1118,6 +1231,12 @@ dependencies = [ "libmimalloc-sys", ] +[[package]] +name = "mime" +version = "0.3.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a60c7ce501c71e03a9c9c0d35b861413ae925bd979cc7a4e30d060069aaac8d" + [[package]] name = "miniz_oxide" version = "0.5.4" @@ -1127,6 +1246,18 @@ dependencies = [ "adler", ] +[[package]] +name = "mio" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57ee1c23c7c63b0c9250c339ffdc69255f110b298b901b9f6c82547b7b87caaf" +dependencies = [ + "libc", + "log", + "wasi", + "windows-sys", +] + [[package]] name = "multiversion" version = "0.6.1" @@ -1261,12 +1392,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2168fee79ee3e7695905bc3a48777d807f82d956f821186fa7a2601c1295a73e" dependencies = [ "async-trait", + "base64", "bytes", "chrono", "futures", "itertools", "parking_lot", "percent-encoding", + "quick-xml", + "rand", + "reqwest", + "ring", + "rustls-pemfile", + "serde", + "serde_json", "snafu", "tokio", "tracing", @@ -1422,6 +1561,16 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "quick-xml" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37dddbbe9df96afafcb8027fcf263971b726530e12f0787f620a7ba5b4846081" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "quote" version = "1.0.21" @@ -1523,6 +1672,61 @@ dependencies = [ "winapi", ] +[[package]] +name = "reqwest" +version = "0.11.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "431949c384f4e2ae07605ccaa56d1d9d2ecdb5cadd4f9577ccfab29f2e5149fc" +dependencies = [ + "base64", + "bytes", + "encoding_rs", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "hyper", + "hyper-rustls", + "ipnet", + "js-sys", + "log", + "mime", + "once_cell", + "percent-encoding", + "pin-project-lite", + "rustls", + "rustls-pemfile", + "serde", + "serde_json", + "serde_urlencoded", + "tokio", + "tokio-rustls", + "tokio-util", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "webpki-roots", + "winreg", +] + +[[package]] +name = "ring" +version = "0.16.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3053cf52e236a3ed746dfc745aa9cacf1b791d846bdaf412f60a8d7d6e17c8fc" +dependencies = [ + "cc", + "libc", + "once_cell", + "spin", + "untrusted", + "web-sys", + "winapi", +] + [[package]] name = "rustix" version = "0.35.10" @@ -1537,6 +1741,27 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "rustls" +version = "0.20.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aab8ee6c7097ed6057f43c187a62418d0c05a4bd5f18b3571db50ee0f9ce033" +dependencies = [ + "log", + "ring", + "sct", + "webpki", +] + +[[package]] +name = "rustls-pemfile" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0864aeff53f8c05aa08d86e5ef839d3dfcf07aeba2db32f12db0ef716e87bd55" +dependencies = [ + "base64", +] + [[package]] name = "rustversion" version = "1.0.9" @@ -1587,6 +1812,16 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" +[[package]] +name = "sct" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d53dcdb7c9f8158937a7981b48accfd39a43af418591a5d008c7b22b5e1b7ca4" +dependencies = [ + "ring", + "untrusted", +] + [[package]] name = "seq-macro" version = "0.3.1" @@ -1595,18 +1830,18 @@ checksum = "0772c5c30e1a0d91f6834f8e545c69281c099dfa9a3ac58d96a9fd629c8d4898" [[package]] name = "serde" -version = "1.0.144" +version = "1.0.145" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f747710de3dcd43b88c9168773254e809d8ddbdf9653b84e2554ab219f17860" +checksum = "728eb6351430bccb993660dfffc5a72f91ccc1295abaa8ce19b27ebe4f75568b" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.144" +version = "1.0.145" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94ed3a816fb1d101812f83e789f888322c34e291f894f19590dc310963e87a00" +checksum = "81fa1584d3d1bcacd84c277a0dfe21f5b0f6accf4a23d04d4c6d61f1af522b4c" dependencies = [ "proc-macro2", "quote", @@ -1624,6 +1859,18 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_urlencoded" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" +dependencies = [ + "form_urlencoded", + "itoa 1.0.3", + "ryu", + "serde", +] + [[package]] name = "sha2" version = "0.10.6" @@ -1678,6 +1925,22 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "45456094d1983e2ee2a18fdfebce3189fa451699d0502cb8e3b49dba5ba41451" +[[package]] +name = "socket2" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02e2d2db9033d13a1567121ddd7a095ee144db4e1ca1b1bda3419bc0da294ebd" +dependencies = [ + "libc", + "winapi", +] + +[[package]] +name = "spin" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" + [[package]] name = "sqlparser" version = "0.23.0" @@ -1833,12 +2096,16 @@ checksum = "0020c875007ad96677dcc890298f4b942882c5d4eb7cc8f439fc3bf813dc9c95" dependencies = [ "autocfg", "bytes", + "libc", "memchr", + "mio", "num_cpus", "once_cell", "parking_lot", "pin-project-lite", + "socket2", "tokio-macros", + "winapi", ] [[package]] @@ -1852,6 +2119,17 @@ dependencies = [ "syn", ] +[[package]] +name = "tokio-rustls" +version = "0.23.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c43ee83903113e03984cb9e5cebe6c04a5116269e900e3ddba8f068a62adda59" +dependencies = [ + "rustls", + "tokio", + "webpki", +] + [[package]] name = "tokio-stream" version = "0.1.10" @@ -1863,6 +2141,26 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-util" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bb2e075f03b3d66d8d8785356224ba688d2906a371015e225beeb65ca92c740" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", + "tracing", +] + +[[package]] +name = "tower-service" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" + [[package]] name = "tracing" version = "0.1.36" @@ -1895,6 +2193,12 @@ dependencies = [ "once_cell", ] +[[package]] +name = "try-lock" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59547bce71d9c38b83d9c0e92b6066c4253371f15005def0c30d9657f50c7642" + [[package]] name = "typenum" version = "1.15.0" @@ -1934,6 +2238,12 @@ version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" +[[package]] +name = "untrusted" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" + [[package]] name = "url" version = "2.3.1" @@ -1977,6 +2287,16 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "want" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ce8a968cb1cd110d136ff8b819a556d6fb6d919363c61534f6860c7eb172ba0" +dependencies = [ + "log", + "try-lock", +] + [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" @@ -2008,6 +2328,18 @@ dependencies = [ "wasm-bindgen-shared", ] +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23639446165ca5a5de86ae1d8896b737ae80319560fbaa4c2887b7da6e7ebd7d" +dependencies = [ + "cfg-if", + "js-sys", + "wasm-bindgen", + "web-sys", +] + [[package]] name = "wasm-bindgen-macro" version = "0.2.83" @@ -2037,6 +2369,35 @@ version = "0.2.83" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1c38c045535d93ec4f0b4defec448e4291638ee608530863b1e2ba115d4fff7f" +[[package]] +name = "web-sys" +version = "0.3.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bcda906d8be16e728fd5adc5b729afad4e444e106ab28cd1c7256e54fa61510f" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "webpki" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f095d78192e208183081cc07bc5515ef55216397af48b873e5edcd72637fa1bd" +dependencies = [ + "ring", + "untrusted", +] + +[[package]] +name = "webpki-roots" +version = "0.22.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1c760f0d366a6c24a02ed7816e23e691f5d92291f94d15e836006fd11b04daf" +dependencies = [ + "webpki", +] + [[package]] name = "winapi" version = "0.3.9" @@ -2111,6 +2472,15 @@ version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" +[[package]] +name = "winreg" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80d0f4e272c85def139476380b12f9ac60926689dd2e01d4923222f40580869d" +dependencies = [ + "winapi", +] + [[package]] name = "zstd" version = "0.11.2+zstd.1.5.2" diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index 2c70224ec61dc..e74b81a36baf7 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -35,5 +35,7 @@ datafusion = { path = "../datafusion/core", version = "12.0.0" } dirs = "4.0.0" env_logger = "0.9" mimalloc = { version = "0.1", default-features = false } +object_store = { version = "0.5.0", features = ["aws", "gcp"] } rustyline = "10.0" tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync", "parking_lot"] } +url = "2.2" diff --git a/datafusion-cli/src/lib.rs b/datafusion-cli/src/lib.rs index 743556533b4b2..44d8f06107f94 100644 --- a/datafusion-cli/src/lib.rs +++ b/datafusion-cli/src/lib.rs @@ -22,5 +22,6 @@ pub mod command; pub mod exec; pub mod functions; pub mod helper; +pub mod object_storage; pub mod print_format; pub mod print_options; diff --git a/datafusion-cli/src/main.rs b/datafusion-cli/src/main.rs index 98ae274cabecb..cbb74bbf9b4ce 100644 --- a/datafusion-cli/src/main.rs +++ b/datafusion-cli/src/main.rs @@ -16,15 +16,19 @@ // under the License. use clap::Parser; +use datafusion::datasource::object_store::ObjectStoreRegistry; use datafusion::error::{DataFusionError, Result}; use datafusion::execution::context::SessionConfig; +use datafusion::execution::runtime_env::{RuntimeConfig, RuntimeEnv}; use datafusion::prelude::SessionContext; +use datafusion_cli::object_storage::DatafusionCliObjectStoreProvider; use datafusion_cli::{ exec, print_format::PrintFormat, print_options::PrintOptions, DATAFUSION_CLI_VERSION, }; use mimalloc::MiMalloc; use std::env; use std::path::Path; +use std::sync::Arc; #[global_allocator] static GLOBAL: MiMalloc = MiMalloc; @@ -98,7 +102,9 @@ pub async fn main() -> Result<()> { session_config = session_config.with_batch_size(batch_size); }; - let mut ctx = SessionContext::with_config(session_config.clone()); + let runtime_env = create_runtime_env()?; + let mut ctx = + SessionContext::with_config_rt(session_config.clone(), Arc::new(runtime_env)); let mut print_options = PrintOptions { format: args.format, @@ -120,6 +126,7 @@ pub async fn main() -> Result<()> { files } }; + if !files.is_empty() { exec::exec_from_files(files, &mut ctx, &print_options).await; Ok(()) @@ -134,6 +141,15 @@ pub async fn main() -> Result<()> { } } +fn create_runtime_env() -> Result { + let object_store_provider = DatafusionCliObjectStoreProvider {}; + let object_store_registry = + ObjectStoreRegistry::new_with_provider(Some(Arc::new(object_store_provider))); + let rn_config = + RuntimeConfig::new().with_object_store_registry(Arc::new(object_store_registry)); + return RuntimeEnv::new(rn_config); +} + fn is_valid_file(dir: &str) -> std::result::Result<(), String> { if Path::new(dir).is_file() { Ok(()) diff --git a/datafusion-cli/src/object_storage.rs b/datafusion-cli/src/object_storage.rs new file mode 100644 index 0000000000000..66ae4c0542ad5 --- /dev/null +++ b/datafusion-cli/src/object_storage.rs @@ -0,0 +1,152 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion::error::Result; +use std::{env, str::FromStr, sync::Arc}; + +use datafusion::{datasource::object_store::ObjectStoreProvider, error::DataFusionError}; +use object_store::{aws::AmazonS3Builder, gcp::GoogleCloudStorageBuilder}; +use url::Url; + +#[derive(Debug, PartialEq, Eq, clap::ArgEnum, Clone)] +pub enum ObjectStoreRegistrationOptions { + S3, + GCS, +} + +impl FromStr for ObjectStoreRegistrationOptions { + type Err = DataFusionError; + + fn from_str(input: &str) -> Result { + match input { + "s3" => Ok(ObjectStoreRegistrationOptions::S3), + "gcs" => Ok(ObjectStoreRegistrationOptions::GCS), + _ => Err(DataFusionError::Execution(format!( + "Unsupported object store scheme {}", + input + ))), + } + } +} + +#[derive(Debug)] +pub struct DatafusionCliObjectStoreProvider {} + +/// ObjectStoreProvider for S3 and GCS +impl ObjectStoreProvider for DatafusionCliObjectStoreProvider { + fn get_by_url(&self, url: &Url) -> Result> { + ObjectStoreRegistrationOptions::from_str(url.scheme()).map( + |scheme| match scheme { + ObjectStoreRegistrationOptions::S3 => build_s3_object_store(url), + ObjectStoreRegistrationOptions::GCS => build_gcs_object_store(url), + }, + )? + } +} + +fn build_s3_object_store(url: &Url) -> Result> { + let host = get_host_name(url)?; + match AmazonS3Builder::from_env().with_bucket_name(host).build() { + Ok(s3) => Ok(Arc::new(s3)), + Err(err) => Err(DataFusionError::Execution(err.to_string())), + } +} + +fn build_gcs_object_store(url: &Url) -> Result> { + let host = get_host_name(url)?; + let mut builder = GoogleCloudStorageBuilder::new().with_bucket_name(host); + + if let Some(path) = env::var("GCP_SERVICE_ACCOUNT_PATH").ok() { + builder = builder.with_service_account_path(path); + } + match builder.build() { + Ok(gcs) => Ok(Arc::new(gcs)), + Err(err) => Err(DataFusionError::Execution(err.to_string())), + } +} + +fn get_host_name(url: &Url) -> Result<&str> { + url.host_str().ok_or(DataFusionError::Execution(format!( + "Not able to parse hostname from url, {}", + url.as_str() + ))) +} + +#[cfg(test)] +mod tests { + use std::{env, str::FromStr}; + + use datafusion::datasource::object_store::ObjectStoreProvider; + use url::Url; + + use super::DatafusionCliObjectStoreProvider; + + #[test] + fn s3_provider_no_host() { + let no_host_url = "s3:///"; + let provider = DatafusionCliObjectStoreProvider {}; + let err = provider + .get_by_url(&Url::from_str(no_host_url).unwrap()) + .unwrap_err(); + assert!(err + .to_string() + .contains("Not able to parse hostname from url")) + } + + #[test] + fn gcs_provider_no_host() { + let no_host_url = "gcs:///"; + let provider = DatafusionCliObjectStoreProvider {}; + let err = provider + .get_by_url(&Url::from_str(no_host_url).unwrap()) + .unwrap_err(); + assert!(err + .to_string() + .contains("Not able to parse hostname from url")) + } + + #[test] + fn unknown_object_store_type() { + let unknown = "unknown://bucket_name/path"; + let provider = DatafusionCliObjectStoreProvider {}; + let err = provider + .get_by_url(&Url::from_str(unknown).unwrap()) + .unwrap_err(); + assert!(err + .to_string() + .contains("Unsupported object store scheme unknown")) + } + + #[test] + fn s3_build_error() { + let s3 = "s3://bucket_name/path"; + let provider = DatafusionCliObjectStoreProvider {}; + let err = provider + .get_by_url(&Url::from_str(s3).unwrap()) + .unwrap_err(); + assert!(err.to_string().contains("Generic S3 error: Missing region")); + } + + #[test] + fn s3_success() { + let s3 = "s3://bucket_name/path"; + env::set_var("AWS_DEFAULT_REGION", "us-east-1"); + let provider = DatafusionCliObjectStoreProvider {}; + assert!(provider.get_by_url(&Url::from_str(s3).unwrap()).is_ok()); + env::remove_var("AWS_DEFAULT_REGION"); + } +}