Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into parquet_parallel_…
Browse files Browse the repository at this point in the history
…scan
  • Loading branch information
korowa committed Jan 27, 2023
2 parents 44b4284 + ad3cc24 commit 659d9dc
Show file tree
Hide file tree
Showing 76 changed files with 1,613 additions and 975 deletions.
6 changes: 3 additions & 3 deletions benchmarks/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
[package]
name = "datafusion-benchmarks"
description = "DataFusion Benchmarks"
version = "16.0.0"
version = "17.0.0"
edition = "2021"
authors = ["Apache Arrow <[email protected]>"]
homepage = "https://github.com/apache/arrow-datafusion"
Expand All @@ -34,7 +34,7 @@ snmalloc = ["snmalloc-rs"]

[dependencies]
arrow = "31.0.0"
datafusion = { path = "../datafusion/core", version = "16.0.0", features = ["scheduler"] }
datafusion = { path = "../datafusion/core", version = "17.0.0", features = ["scheduler"] }
env_logger = "0.10"
futures = "0.3"
mimalloc = { version = "0.1", optional = true, default-features = false }
Expand All @@ -51,4 +51,4 @@ test-utils = { path = "../test-utils/", version = "0.1.0" }
tokio = { version = "^1.0", features = ["macros", "rt", "rt-multi-thread", "parking_lot"] }

[dev-dependencies]
datafusion-proto = { path = "../datafusion/proto", version = "16.0.0" }
datafusion-proto = { path = "../datafusion/proto", version = "17.0.0" }
16 changes: 8 additions & 8 deletions datafusion-cli/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions datafusion-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
[package]
name = "datafusion-cli"
description = "Command Line Client for DataFusion query engine."
version = "16.0.0"
version = "17.0.0"
authors = ["Apache Arrow <[email protected]>"]
edition = "2021"
keywords = [ "arrow", "datafusion", "query", "sql" ]
Expand All @@ -32,7 +32,7 @@ readme = "README.md"
arrow = "31.0.0"
async-trait = "0.1.41"
clap = { version = "3", features = ["derive", "cargo"] }
datafusion = { path = "../datafusion/core", version = "16.0.0" }
datafusion = { path = "../datafusion/core", version = "17.0.0" }
dirs = "4.0.0"
env_logger = "0.9"
mimalloc = { version = "0.1", default-features = false }
Expand Down
2 changes: 1 addition & 1 deletion datafusion-examples/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
[package]
name = "datafusion-examples"
description = "DataFusion usage examples"
version = "16.0.0"
version = "17.0.0"
homepage = "https://github.com/apache/arrow-datafusion"
repository = "https://github.com/apache/arrow-datafusion"
authors = ["Apache Arrow <[email protected]>"]
Expand Down
2 changes: 1 addition & 1 deletion datafusion-examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ Run `git submodule update --init` to init test files.

- [`avro_sql.rs`](examples/avro_sql.rs): Build and run a query plan from a SQL statement against a local AVRO file
- [`csv_sql.rs`](examples/csv_sql.rs): Build and run a query plan from a SQL statement against a local CSV file
- [`custom_datasource.rs`](examples/custom_datasource.rs): Run queris against a custom datasource (TableProvider)
- [`custom_datasource.rs`](examples/custom_datasource.rs): Run queries against a custom datasource (TableProvider)
- [`dataframe.rs`](examples/dataframe.rs): Run a query using a DataFrame against a local parquet file
- [`dataframe_in_memory.rs`](examples/dataframe_in_memory.rs): Run a query using a DataFrame against data in memory
- [`deserialize_to_struct.rs`](examples/deserialize_to_struct.rs): Convert query results into rust structs using serde
Expand Down
2 changes: 1 addition & 1 deletion datafusion-examples/examples/avro_sql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ async fn main() -> Result<()> {
let testdata = datafusion::test_util::arrow_test_data();

// register avro file with the execution context
let avro_file = &format!("{}/avro/alltypes_plain.avro", testdata);
let avro_file = &format!("{testdata}/avro/alltypes_plain.avro");
ctx.register_avro("alltypes_plain", avro_file, AvroReadOptions::default())
.await?;

Expand Down
147 changes: 147 additions & 0 deletions datafusion/CHANGELOG.md

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion datafusion/common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
[package]
name = "datafusion-common"
description = "Common functionality for DataFusion query engine"
version = "16.0.0"
version = "17.0.0"
homepage = "https://github.com/apache/arrow-datafusion"
repository = "https://github.com/apache/arrow-datafusion"
readme = "README.md"
Expand Down
3 changes: 1 addition & 2 deletions datafusion/common/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -344,8 +344,7 @@ impl ConfigField for ConfigOptions {
"optimizer" => self.optimizer.set(rem, value),
"explain" => self.explain.set(rem, value),
_ => Err(DataFusionError::Internal(format!(
"Config value \"{}\" not found on ConfigOptions",
key
"Config value \"{key}\" not found on ConfigOptions"
))),
}
}
Expand Down
2 changes: 1 addition & 1 deletion datafusion/common/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ impl Display for DataFusionError {
}
#[cfg(feature = "avro")]
DataFusionError::AvroError(ref desc) => {
write!(f, "Avro error: {}", desc)
write!(f, "Avro error: {desc}")
}
DataFusionError::IoError(ref desc) => write!(f, "IO error: {desc}"),
DataFusionError::SQL(ref desc) => {
Expand Down
5 changes: 2 additions & 3 deletions datafusion/common/src/pyarrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,14 +94,13 @@ mod tests {
let python_path: Vec<&str> =
locals.get_item("python_path").unwrap().extract().unwrap();

panic!("pyarrow not found\nExecutable: {}\nPython path: {:?}\n\
panic!("pyarrow not found\nExecutable: {executable}\nPython path: {python_path:?}\n\
HINT: try `pip install pyarrow`\n\
NOTE: On Mac OS, you must compile against a Framework Python \
(default in python.org installers and brew, but not pyenv)\n\
NOTE: On Mac OS, PYO3 might point to incorrect Python library \
path when using virtual environments. Try \
`export PYTHONPATH=$(python -c \"import sys; print(sys.path[-1])\")`\n",
executable, python_path)
`export PYTHONPATH=$(python -c \"import sys; print(sys.path[-1])\")`\n")
}
})
}
Expand Down
16 changes: 8 additions & 8 deletions datafusion/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
[package]
name = "datafusion"
description = "DataFusion is an in-memory query engine that uses Apache Arrow as the memory model"
version = "16.0.0"
version = "17.0.0"
homepage = "https://github.com/apache/arrow-datafusion"
repository = "https://github.com/apache/arrow-datafusion"
readme = "../../README.md"
Expand Down Expand Up @@ -67,13 +67,13 @@ bytes = "1.1"
bzip2 = { version = "0.4.3", optional = true }
chrono = { version = "0.4.23", default-features = false }
dashmap = "5.4.0"
datafusion-common = { path = "../common", version = "16.0.0", features = ["parquet", "object_store"] }
datafusion-expr = { path = "../expr", version = "16.0.0" }
datafusion-jit = { path = "../jit", version = "16.0.0", optional = true }
datafusion-optimizer = { path = "../optimizer", version = "16.0.0" }
datafusion-physical-expr = { path = "../physical-expr", version = "16.0.0" }
datafusion-row = { path = "../row", version = "16.0.0" }
datafusion-sql = { path = "../sql", version = "16.0.0" }
datafusion-common = { path = "../common", version = "17.0.0", features = ["parquet", "object_store"] }
datafusion-expr = { path = "../expr", version = "17.0.0" }
datafusion-jit = { path = "../jit", version = "17.0.0", optional = true }
datafusion-optimizer = { path = "../optimizer", version = "17.0.0" }
datafusion-physical-expr = { path = "../physical-expr", version = "17.0.0" }
datafusion-row = { path = "../row", version = "17.0.0" }
datafusion-sql = { path = "../sql", version = "17.0.0" }
flate2 = { version = "1.0.24", optional = true }
futures = "0.3"
glob = "0.3.0"
Expand Down
30 changes: 11 additions & 19 deletions datafusion/core/src/avro_to_arrow/arrow_array_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,12 +97,10 @@ impl<'a, R: Read> AvroArrowArrayReader<'a, R> {
.map(|value| match value {
Ok(Value::Record(v)) => Ok(v),
Err(e) => Err(ArrowError::ParseError(format!(
"Failed to parse avro value: {:?}",
e
"Failed to parse avro value: {e:?}"
))),
other => Err(ArrowError::ParseError(format!(
"Row needs to be of type object, got: {:?}",
other
"Row needs to be of type object, got: {other:?}"
))),
})
.collect::<ArrowResult<Vec<Vec<(String, Value)>>>>()?;
Expand Down Expand Up @@ -237,8 +235,7 @@ impl<'a, R: Read> AvroArrowArrayReader<'a, R> {
self.list_array_string_array_builder::<UInt64Type>(&dtype, col_name, rows)
}
ref e => Err(SchemaError(format!(
"Data type is currently not supported for dictionaries in list : {:?}",
e
"Data type is currently not supported for dictionaries in list : {e:?}"
))),
}
}
Expand All @@ -265,8 +262,7 @@ impl<'a, R: Read> AvroArrowArrayReader<'a, R> {
}
e => {
return Err(SchemaError(format!(
"Nested list data builder type is not supported: {:?}",
e
"Nested list data builder type is not supported: {e:?}"
)))
}
};
Expand Down Expand Up @@ -331,8 +327,7 @@ impl<'a, R: Read> AvroArrowArrayReader<'a, R> {
}
e => {
return Err(SchemaError(format!(
"Nested list data builder type is not supported: {:?}",
e
"Nested list data builder type is not supported: {e:?}"
)))
}
}
Expand Down Expand Up @@ -564,8 +559,7 @@ impl<'a, R: Read> AvroArrowArrayReader<'a, R> {
}
datatype => {
return Err(ArrowError::SchemaError(format!(
"Nested list of {:?} not supported",
datatype
"Nested list of {datatype:?} not supported"
)));
}
};
Expand Down Expand Up @@ -673,8 +667,7 @@ impl<'a, R: Read> AvroArrowArrayReader<'a, R> {
),
t => {
return Err(ArrowError::SchemaError(format!(
"TimeUnit {:?} not supported with Time64",
t
"TimeUnit {t:?} not supported with Time64"
)))
}
},
Expand All @@ -691,8 +684,7 @@ impl<'a, R: Read> AvroArrowArrayReader<'a, R> {
),
t => {
return Err(ArrowError::SchemaError(format!(
"TimeUnit {:?} not supported with Time32",
t
"TimeUnit {t:?} not supported with Time32"
)))
}
},
Expand Down Expand Up @@ -755,7 +747,7 @@ impl<'a, R: Read> AvroArrowArrayReader<'a, R> {
bit_util::set_bit(null_buffer.as_slice_mut(), i);
value
} else {
panic!("expected struct got {:?}", v);
panic!("expected struct got {v:?}");
}
})
.collect::<Vec<&Vec<(String, Value)>>>();
Expand Down Expand Up @@ -877,7 +869,7 @@ fn resolve_string(v: &Value) -> ArrowResult<String> {
}
other => Err(AvroError::GetString(other.into())),
}
.map_err(|e| SchemaError(format!("expected resolvable string : {:?}", e)))
.map_err(|e| SchemaError(format!("expected resolvable string : {e:?}")))
}

fn resolve_u8(v: &Value) -> AvroResult<u8> {
Expand Down Expand Up @@ -983,7 +975,7 @@ mod test {

fn build_reader(name: &str, batch_size: usize) -> Reader<File> {
let testdata = crate::test_util::arrow_test_data();
let filename = format!("{}/avro/{}", testdata, name);
let filename = format!("{testdata}/avro/{name}");
let builder = ReaderBuilder::new()
.read_schema()
.with_batch_size(batch_size);
Expand Down
6 changes: 3 additions & 3 deletions datafusion/core/src/avro_to_arrow/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ use crate::arrow::datatypes::SchemaRef;
use crate::arrow::record_batch::RecordBatch;
use crate::error::Result;
use arrow::error::Result as ArrowResult;
use std::io::{Read, Seek, SeekFrom};
use std::io::{Read, Seek};
use std::sync::Arc;

/// Avro file reader builder
Expand Down Expand Up @@ -112,7 +112,7 @@ impl ReaderBuilder {
Some(schema) => schema,
None => Arc::new(super::read_avro_schema_from_reader(&mut source)?),
};
source.seek(SeekFrom::Start(0))?;
source.rewind()?;
Reader::try_new(source, schema, self.batch_size, self.projection)
}
}
Expand Down Expand Up @@ -178,7 +178,7 @@ mod tests {

fn build_reader(name: &str) -> Reader<File> {
let testdata = crate::test_util::arrow_test_data();
let filename = format!("{}/avro/{}", testdata, name);
let filename = format!("{testdata}/avro/{name}");
let builder = ReaderBuilder::new().read_schema().with_batch_size(64);
builder.build(File::open(filename).unwrap()).unwrap()
}
Expand Down
6 changes: 3 additions & 3 deletions datafusion/core/src/avro_to_arrow/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -424,9 +424,9 @@ mod test {
} ]
}"#,
);
assert!(schema.is_ok(), "{:?}", schema);
assert!(schema.is_ok(), "{schema:?}");
let arrow_schema = to_arrow_schema(&schema.unwrap());
assert!(arrow_schema.is_ok(), "{:?}", arrow_schema);
assert!(arrow_schema.is_ok(), "{arrow_schema:?}");
let expected = Schema::new(vec![
Field::new("id", Int32, true),
Field::new("bool_col", Boolean, true),
Expand All @@ -446,7 +446,7 @@ mod test {
#[test]
fn test_non_record_schema() {
let arrow_schema = to_arrow_schema(&AvroSchema::String);
assert!(arrow_schema.is_ok(), "{:?}", arrow_schema);
assert!(arrow_schema.is_ok(), "{arrow_schema:?}");
assert_eq!(
arrow_schema.unwrap(),
Schema::new(vec![Field::new("", Utf8, false)])
Expand Down
Loading

0 comments on commit 659d9dc

Please sign in to comment.