-
Notifications
You must be signed in to change notification settings - Fork 1.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Extract Listing URI logic into ListingTableUri structure #2578
Changes from 3 commits
4314a3b
7dc7eef
18ceed5
9c8b546
230e42b
deef0b6
6c311ba
9c9b4b2
63df81c
938e916
c1cf2da
c32e27e
61d67cf
a504fe8
0ec7433
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -67,7 +67,9 @@ datafusion-physical-expr = { path = "../physical-expr", version = "8.0.0" } | |
datafusion-row = { path = "../row", version = "8.0.0" } | ||
datafusion-sql = { path = "../sql", version = "8.0.0" } | ||
futures = "0.3" | ||
glob = "0.3.0" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is moved from data_access |
||
hashbrown = { version = "0.12", features = ["raw"] } | ||
itertools = "0.10" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is technically a new dependency of this crate, but it is so ubiquitous and is a dependency of prost, etc.. so this is probably ok There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But datafusion doesn't depend on prost, right? If we could avoid another dependency that would be goo in my opinion There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well once we bring in the new object_store we will depend on it... Ultimately it is such a fundamental crate, like bytes, parking_lot, etc... I'm inclined to think it isn't really a problem... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok, I still cringe a little, but I vote |
||
lazy_static = { version = "^1.4.0" } | ||
log = "^0.4" | ||
num-traits = { version = "0.2", optional = true } | ||
|
@@ -85,6 +87,7 @@ sqlparser = "0.17" | |
tempfile = "3" | ||
tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync", "fs", "parking_lot"] } | ||
tokio-stream = "0.1" | ||
url = "2.2" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a new dependency, it is brought in by hyper and friends and so again is pretty ubiquitous |
||
uuid = { version = "1.0", features = ["v4"] } | ||
|
||
[dev-dependencies] | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,7 +23,7 @@ use std::any::Any; | |
use std::collections::HashMap; | ||
use std::sync::Arc; | ||
|
||
use crate::datasource::listing::{ListingTable, ListingTableConfig}; | ||
use crate::datasource::listing::{ListingTable, ListingTableConfig, ListingTableUrl}; | ||
use crate::datasource::object_store_registry::ObjectStoreRegistry; | ||
use crate::datasource::TableProvider; | ||
use crate::error::{DataFusionError, Result}; | ||
|
@@ -157,10 +157,7 @@ impl ObjectStoreSchemaProvider { | |
} | ||
|
||
/// Retrieves a `ObjectStore` instance by scheme | ||
pub fn object_store<'a>( | ||
&self, | ||
uri: &'a str, | ||
) -> Result<(Arc<dyn ObjectStore>, &'a str)> { | ||
pub fn object_store(&self, uri: &ListingTableUrl) -> Result<Arc<dyn ObjectStore>> { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The scheme stripping logic is now part of ListingTableUrl |
||
self.object_store_registry | ||
.lock() | ||
.get_by_uri(uri) | ||
|
@@ -173,13 +170,13 @@ impl ObjectStoreSchemaProvider { | |
pub async fn register_listing_table( | ||
&self, | ||
name: &str, | ||
uri: &str, | ||
uri: ListingTableUrl, | ||
config: Option<ListingTableConfig>, | ||
) -> Result<()> { | ||
let config = match config { | ||
Some(cfg) => cfg, | ||
None => { | ||
let (object_store, _path) = self.object_store(uri)?; | ||
let object_store = self.object_store(&uri)?; | ||
ListingTableConfig::new(object_store, uri).infer().await? | ||
} | ||
}; | ||
|
@@ -255,6 +252,7 @@ mod tests { | |
use crate::datasource::empty::EmptyTable; | ||
use crate::execution::context::SessionContext; | ||
|
||
use crate::datasource::listing::ListingTableUrl; | ||
use futures::StreamExt; | ||
|
||
#[tokio::test] | ||
|
@@ -280,12 +278,13 @@ mod tests { | |
async fn test_schema_register_listing_table() { | ||
let testdata = crate::test_util::parquet_test_data(); | ||
let filename = format!("{}/{}", testdata, "alltypes_plain.parquet"); | ||
let uri = ListingTableUrl::parse(filename).unwrap(); | ||
|
||
let schema = ObjectStoreSchemaProvider::new(); | ||
let _store = schema.register_object_store("test", Arc::new(LocalFileSystem {})); | ||
|
||
schema | ||
.register_listing_table("alltypes_plain", &filename, None) | ||
.register_listing_table("alltypes_plain", uri, None) | ||
.await | ||
.unwrap(); | ||
|
||
|
@@ -338,8 +337,9 @@ mod tests { | |
|| file == OsStr::new("alltypes_plain.parquet") | ||
{ | ||
let name = path.file_stem().unwrap().to_str().unwrap(); | ||
let path = ListingTableUrl::parse(&sized_file.path).unwrap(); | ||
schema | ||
.register_listing_table(name, &sized_file.path, None) | ||
.register_listing_table(name, path, None) | ||
.await | ||
.unwrap(); | ||
} | ||
|
@@ -360,17 +360,18 @@ mod tests { | |
async fn test_schema_register_same_listing_table() { | ||
let testdata = crate::test_util::parquet_test_data(); | ||
let filename = format!("{}/{}", testdata, "alltypes_plain.parquet"); | ||
let uri = ListingTableUrl::parse(filename).unwrap(); | ||
|
||
let schema = ObjectStoreSchemaProvider::new(); | ||
let _store = schema.register_object_store("test", Arc::new(LocalFileSystem {})); | ||
|
||
schema | ||
.register_listing_table("alltypes_plain", &filename, None) | ||
.register_listing_table("alltypes_plain", uri.clone(), None) | ||
.await | ||
.unwrap(); | ||
|
||
schema | ||
.register_listing_table("alltypes_plain", &filename, None) | ||
.register_listing_table("alltypes_plain", uri, None) | ||
.await | ||
.unwrap(); | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wonder if it is time to move
datasource
to its own crate, if possible 🤔