Skip to content

Commit

Permalink
Merge remote-tracking branch 'apache/main' into alamb/prepare_for_44
Browse files Browse the repository at this point in the history
  • Loading branch information
alamb committed Dec 24, 2024
2 parents 8d53dff + 3864b11 commit 5f7b916
Show file tree
Hide file tree
Showing 159 changed files with 834 additions and 401 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ version = "44.0.0"
# selectively turn them on if needed, since we can override default-features = true (from false)
# for the inherited dependency but cannot do the reverse (override from true to false).
#
# See for more detaiils: https://github.com/rust-lang/cargo/issues/11329
# See for more details: https://github.com/rust-lang/cargo/issues/11329
ahash = { version = "0.8", default-features = false, features = [
"runtime-rng",
] }
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ Default features:
- `regex_expressions`: regular expression functions, such as `regexp_match`
- `unicode_expressions`: Include unicode aware functions such as `character_length`
- `unparser`: enables support to reverse LogicalPlans back into SQL
- `recursive-protection`: uses [recursive](https://docs.rs/recursive/latest/recursive/) for stack overflow protection.
- `recursive_protection`: uses [recursive](https://docs.rs/recursive/latest/recursive/) for stack overflow protection.

Optional features:

Expand Down
1 change: 1 addition & 0 deletions datafusion-cli/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions datafusion-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ datafusion = { path = "../datafusion/core", version = "44.0.0", features = [
"datetime_expressions",
"encoding_expressions",
"parquet",
"recursive_protection",
"regex_expressions",
"unicode_expressions",
"compression",
Expand Down
2 changes: 1 addition & 1 deletion datafusion-cli/src/functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,7 @@ impl TableFunctionImpl for ParquetMetadataFunc {
Field::new("total_uncompressed_size", DataType::Int64, true),
]));

// construct recordbatch from metadata
// construct record batch from metadata
let mut filename_arr = vec![];
let mut row_group_id_arr = vec![];
let mut row_group_num_rows_arr = vec![];
Expand Down
2 changes: 1 addition & 1 deletion datafusion-examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
This crate includes end to end, highly commented examples of how to use
various DataFusion APIs to help you get started.

## Prerequisites:
## Prerequisites

Run `git submodule update --init` to init test files.

Expand Down
4 changes: 2 additions & 2 deletions datafusion-examples/examples/advanced_parquet_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ use url::Url;
/// Specifically, this example illustrates how to:
/// 1. Use [`ParquetFileReaderFactory`] to avoid re-reading parquet metadata on each query
/// 2. Use [`PruningPredicate`] for predicate analysis
/// 3. Pass a row group selection to [`ParuetExec`]
/// 3. Pass a row group selection to [`ParquetExec`]
/// 4. Pass a row selection (within a row group) to [`ParquetExec`]
///
/// Note this is a *VERY* low level example for people who want to build their
Expand Down Expand Up @@ -211,7 +211,7 @@ async fn main() -> Result<()> {
//
// Note: in order to prune pages, the Page Index must be loaded and the
// ParquetExec will load it on demand if not present. To avoid a second IO
// during query, this example loaded the Page Index pre-emptively by setting
// during query, this example loaded the Page Index preemptively by setting
// `ArrowReader::with_page_index` in `IndexedFile::try_new`
provider.set_use_row_selection(true);
println!("** Select data, predicate `id = 950`");
Expand Down
4 changes: 2 additions & 2 deletions datafusion-examples/examples/analyzer_rule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ impl AnalyzerRule for RowLevelAccessControl {
fn analyze(&self, plan: LogicalPlan, _config: &ConfigOptions) -> Result<LogicalPlan> {
// use the TreeNode API to recursively walk the LogicalPlan tree
// and all of its children (inputs)
let transfomed_plan = plan.transform(|plan| {
let transformed_plan = plan.transform(|plan| {
// This closure is called for each LogicalPlan node
// if it is a Scan node, add a filter to remove all managers
if is_employee_table_scan(&plan) {
Expand Down Expand Up @@ -166,7 +166,7 @@ impl AnalyzerRule for RowLevelAccessControl {
//
// This example does not need the value of either flag, so simply
// extract the LogicalPlan "data"
Ok(transfomed_plan.data)
Ok(transformed_plan.data)
}

fn name(&self) -> &str {
Expand Down
10 changes: 5 additions & 5 deletions datafusion-examples/examples/catalog.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,11 @@ async fn main() -> Result<()> {

let ctx = SessionContext::new();
let state = ctx.state();
let cataloglist = Arc::new(CustomCatalogProviderList::new());
let catalog_list = Arc::new(CustomCatalogProviderList::new());

// use our custom catalog list for context. each context has a single catalog list.
// context will by default have [`MemoryCatalogProviderList`]
ctx.register_catalog_list(cataloglist.clone());
ctx.register_catalog_list(catalog_list.clone());

// initialize our catalog and schemas
let catalog = DirCatalog::new();
Expand Down Expand Up @@ -81,7 +81,7 @@ async fn main() -> Result<()> {
ctx.register_catalog("dircat", Arc::new(catalog));
{
// catalog was passed down into our custom catalog list since we override the ctx's default
let catalogs = cataloglist.catalogs.read().unwrap();
let catalogs = catalog_list.catalogs.read().unwrap();
assert!(catalogs.contains_key("dircat"));
};

Expand Down Expand Up @@ -144,8 +144,8 @@ impl DirSchema {
async fn create(state: &SessionState, opts: DirSchemaOpts<'_>) -> Result<Arc<Self>> {
let DirSchemaOpts { ext, dir, format } = opts;
let mut tables = HashMap::new();
let direntries = std::fs::read_dir(dir).unwrap();
for res in direntries {
let dir_entries = std::fs::read_dir(dir).unwrap();
for res in dir_entries {
let entry = res.unwrap();
let filename = entry.file_name().to_str().unwrap().to_string();
if !filename.ends_with(ext) {
Expand Down
4 changes: 2 additions & 2 deletions datafusion-examples/examples/expr_api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ use datafusion_optimizer::analyzer::type_coercion::TypeCoercionRewriter;
/// 4. Simplify expressions: [`simplify_demo`]
/// 5. Analyze predicates for boundary ranges: [`range_analysis_demo`]
/// 6. Get the types of the expressions: [`expression_type_demo`]
/// 7. Apply type cocercion to expressions: [`type_coercion_demo`]
/// 7. Apply type coercion to expressions: [`type_coercion_demo`]
#[tokio::main]
async fn main() -> Result<()> {
// The easiest way to do create expressions is to use the
Expand Down Expand Up @@ -392,7 +392,7 @@ fn type_coercion_demo() -> Result<()> {
)?;
assert!(physical_expr.evaluate(&batch).is_ok());

// 4. Apply explict type coercion by manually rewriting the expression
// 4. Apply explicit type coercion by manually rewriting the expression
let coerced_expr = expr
.transform(|e| {
// Only type coerces binary expressions.
Expand Down
2 changes: 1 addition & 1 deletion datafusion-examples/examples/function_factory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ use datafusion_expr::{
///
/// Apart from [FunctionFactory], this example covers
/// [ScalarUDFImpl::simplify()] which is often used at the same time, to replace
/// a function call with another expression at rutime.
/// a function call with another expression at runtime.
///
/// This example is rather simple and does not cover all cases required for a
/// real implementation.
Expand Down
2 changes: 1 addition & 1 deletion datafusion-examples/examples/memtable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use std::sync::Arc;
use std::time::Duration;
use tokio::time::timeout;

/// This example demonstrates executing a simple query against a Memtable
/// This example demonstrates executing a simple query against a [`MemTable`]
#[tokio::main]
async fn main() -> Result<()> {
let mem_table = create_memtable()?;
Expand Down
2 changes: 1 addition & 1 deletion datafusion-examples/examples/optimizer_rule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ impl MyOptimizerRule {
// Closure called for each sub tree
match expr {
Expr::BinaryExpr(binary_expr) if is_binary_eq(&binary_expr) => {
// destruture the expression
// destructure the expression
let BinaryExpr { left, op: _, right } = binary_expr;
// rewrite to `my_eq(left, right)`
let udf = ScalarUDF::new_from_impl(MyEq::new());
Expand Down
2 changes: 1 addition & 1 deletion datafusion-examples/examples/plan_to_sql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ fn simple_expr_to_sql_demo() -> Result<()> {
Ok(())
}

/// DataFusioon can remove parentheses when converting an expression to SQL.
/// DataFusion can remove parentheses when converting an expression to SQL.
/// Note that output is intended for humans, not for other SQL engines,
/// as difference in precedence rules can cause expressions to be parsed differently.
fn simple_expr_to_pretty_sql_demo() -> Result<()> {
Expand Down
6 changes: 3 additions & 3 deletions datafusion-examples/examples/simple_udtf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ impl TableFunctionImpl for LocalCsvTableFunc {
let limit = exprs
.get(1)
.map(|expr| {
// try to simpify the expression, so 1+2 becomes 3, for example
// try to simplify the expression, so 1+2 becomes 3, for example
let execution_props = ExecutionProps::new();
let info = SimplifyContext::new(&execution_props);
let expr = ExprSimplifier::new(info).simplify(expr.clone())?;
Expand Down Expand Up @@ -173,8 +173,8 @@ fn read_csv_batches(csv_path: impl AsRef<Path>) -> Result<(SchemaRef, Vec<Record
.with_header(true)
.build(file)?;
let mut batches = vec![];
for bacth in reader {
batches.push(bacth?);
for batch in reader {
batches.push(batch?);
}
let schema = Arc::new(schema);
Ok((schema, batches))
Expand Down
3 changes: 1 addition & 2 deletions datafusion/common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,11 @@ name = "datafusion_common"
path = "src/lib.rs"

[features]
default = ["recursive-protection"]
avro = ["apache-avro"]
backtrace = []
pyarrow = ["pyo3", "arrow/pyarrow", "parquet"]
force_hash_collisions = []
recursive-protection = ["dep:recursive"]
recursive_protection = ["dep:recursive"]

[dependencies]
ahash = { workspace = true }
Expand Down
2 changes: 1 addition & 1 deletion datafusion/common/src/column.rs
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ impl Column {
.collect::<Vec<_>>();
for using_col in using_columns {
let all_matched = columns.iter().all(|c| using_col.contains(c));
// All matched fields belong to the same using column set, in orther words
// All matched fields belong to the same using column set, in other words
// the same join clause. We simply pick the qualifier from the first match.
if all_matched {
return Ok(columns[0].clone());
Expand Down
4 changes: 2 additions & 2 deletions datafusion/common/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -904,12 +904,12 @@ pub trait ConfigExtension: ExtensionOptions {
pub trait ExtensionOptions: Send + Sync + fmt::Debug + 'static {
/// Return `self` as [`Any`]
///
/// This is needed until trait upcasting is stabilised
/// This is needed until trait upcasting is stabilized
fn as_any(&self) -> &dyn Any;

/// Return `self` as [`Any`]
///
/// This is needed until trait upcasting is stabilised
/// This is needed until trait upcasting is stabilized
fn as_any_mut(&mut self) -> &mut dyn Any;

/// Return a deep clone of this [`ExtensionOptions`]
Expand Down
2 changes: 1 addition & 1 deletion datafusion/common/src/cse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ pub trait Normalizeable {
}

/// The `NormalizeEq` trait extends `Eq` and `Normalizeable` to provide a method for comparing
/// normlized nodes in optimizations like Common Subexpression Elimination (CSE).
/// normalized nodes in optimizations like Common Subexpression Elimination (CSE).
///
/// The `normalize_eq` method ensures that two nodes that are semantically equivalent (after normalization)
/// are considered equal in CSE optimization, even if their original forms differ.
Expand Down
2 changes: 1 addition & 1 deletion datafusion/common/src/dfschema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -955,7 +955,7 @@ pub trait ExprSchema: std::fmt::Debug {
/// Returns the column's optional metadata.
fn metadata(&self, col: &Column) -> Result<&HashMap<String, String>>;

/// Return the coulmn's datatype and nullability
/// Return the column's datatype and nullability
fn data_type_and_nullable(&self, col: &Column) -> Result<(&DataType, bool)>;
}

Expand Down
2 changes: 1 addition & 1 deletion datafusion/common/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ pub enum DataFusionError {
Execution(String),
/// [`JoinError`] during execution of the query.
///
/// This error can unoccur for unjoined tasks, such as execution shutdown.
/// This error can't occur for unjoined tasks, such as execution shutdown.
ExecutionJoin(JoinError),
/// Error when resources (such as memory of scratch disk space) are exhausted.
///
Expand Down
10 changes: 5 additions & 5 deletions datafusion/common/src/scalar/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2216,7 +2216,7 @@ impl ScalarValue {
///
/// Errors if `self` is
/// - a decimal that fails be converted to a decimal array of size
/// - a `Fixedsizelist` that fails to be concatenated into an array of size
/// - a `FixedsizeList` that fails to be concatenated into an array of size
/// - a `List` that fails to be concatenated into an array of size
/// - a `Dictionary` that fails be converted to a dictionary array of size
pub fn to_array_of_size(&self, size: usize) -> Result<ArrayRef> {
Expand Down Expand Up @@ -2925,7 +2925,7 @@ impl ScalarValue {
/// preferred over this function if at all possible as they can be
/// vectorized and are generally much faster.
///
/// This function has a few narrow usescases such as hash table key
/// This function has a few narrow use cases such as hash table key
/// comparisons where comparing a single row at a time is necessary.
///
/// # Errors
Expand Down Expand Up @@ -4465,7 +4465,7 @@ mod tests {
Ok(())
}

// Verifies that ScalarValue has the same behavior with compute kernal when it overflows.
// Verifies that ScalarValue has the same behavior with compute kernel when it overflows.
fn check_scalar_add_overflow<T>(left: ScalarValue, right: ScalarValue)
where
T: ArrowNumericType,
Expand Down Expand Up @@ -6150,9 +6150,9 @@ mod tests {
&DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()))
);

let newscalar = ScalarValue::try_from_array(&array, 0).unwrap();
let new_scalar = ScalarValue::try_from_array(&array, 0).unwrap();
assert_eq!(
newscalar.data_type(),
new_scalar.data_type(),
DataType::Timestamp(TimeUnit::Nanosecond, Some("UTC".into()))
);
}
Expand Down
18 changes: 9 additions & 9 deletions datafusion/common/src/tree_node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ pub trait TreeNode: Sized {
/// TreeNodeVisitor::f_up(ChildNode2)
/// TreeNodeVisitor::f_up(ParentNode)
/// ```
#[cfg_attr(feature = "recursive-protection", recursive::recursive)]
#[cfg_attr(feature = "recursive_protection", recursive::recursive)]
fn visit<'n, V: TreeNodeVisitor<'n, Node = Self>>(
&'n self,
visitor: &mut V,
Expand Down Expand Up @@ -174,7 +174,7 @@ pub trait TreeNode: Sized {
/// TreeNodeRewriter::f_up(ChildNode2)
/// TreeNodeRewriter::f_up(ParentNode)
/// ```
#[cfg_attr(feature = "recursive-protection", recursive::recursive)]
#[cfg_attr(feature = "recursive_protection", recursive::recursive)]
fn rewrite<R: TreeNodeRewriter<Node = Self>>(
self,
rewriter: &mut R,
Expand All @@ -197,7 +197,7 @@ pub trait TreeNode: Sized {
&'n self,
mut f: F,
) -> Result<TreeNodeRecursion> {
#[cfg_attr(feature = "recursive-protection", recursive::recursive)]
#[cfg_attr(feature = "recursive_protection", recursive::recursive)]
fn apply_impl<'n, N: TreeNode, F: FnMut(&'n N) -> Result<TreeNodeRecursion>>(
node: &'n N,
f: &mut F,
Expand Down Expand Up @@ -232,7 +232,7 @@ pub trait TreeNode: Sized {
self,
mut f: F,
) -> Result<Transformed<Self>> {
#[cfg_attr(feature = "recursive-protection", recursive::recursive)]
#[cfg_attr(feature = "recursive_protection", recursive::recursive)]
fn transform_down_impl<N: TreeNode, F: FnMut(N) -> Result<Transformed<N>>>(
node: N,
f: &mut F,
Expand All @@ -256,7 +256,7 @@ pub trait TreeNode: Sized {
self,
mut f: F,
) -> Result<Transformed<Self>> {
#[cfg_attr(feature = "recursive-protection", recursive::recursive)]
#[cfg_attr(feature = "recursive_protection", recursive::recursive)]
fn transform_up_impl<N: TreeNode, F: FnMut(N) -> Result<Transformed<N>>>(
node: N,
f: &mut F,
Expand Down Expand Up @@ -371,7 +371,7 @@ pub trait TreeNode: Sized {
mut f_down: FD,
mut f_up: FU,
) -> Result<Transformed<Self>> {
#[cfg_attr(feature = "recursive-protection", recursive::recursive)]
#[cfg_attr(feature = "recursive_protection", recursive::recursive)]
fn transform_down_up_impl<
N: TreeNode,
FD: FnMut(N) -> Result<Transformed<N>>,
Expand Down Expand Up @@ -995,11 +995,11 @@ impl<
/// construct a temporary container to be able to call `apply_ref_elements` on a
/// collection of tree node references. But in that case the container's temporary
/// lifetime is different to the lifetime of tree nodes that we put into it.
/// Please find an example usecase in `Expr::apply_children` with the `Expr::Case` case.
/// Please find an example use case in `Expr::apply_children` with the `Expr::Case` case.
///
/// Most of the cases we don't need to create a temporary container with
/// `TreeNodeRefContainer`, but we can just call `TreeNodeContainer::apply_elements`.
/// Please find an example usecase in `Expr::apply_children` with the `Expr::GroupingSet`
/// Please find an example use case in `Expr::apply_children` with the `Expr::GroupingSet`
/// case.
pub trait TreeNodeRefContainer<'a, T: 'a>: Sized {
/// Applies `f` to all elements of the container.
Expand Down Expand Up @@ -2349,7 +2349,7 @@ pub(crate) mod tests {
Ok(())
}

#[cfg(feature = "recursive-protection")]
#[cfg(feature = "recursive_protection")]
#[test]
fn test_large_tree() {
let mut item = TestTreeNode::new_leaf("initial".to_string());
Expand Down
4 changes: 2 additions & 2 deletions datafusion/common/src/utils/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.

//! This module provides a function to estimate the memory size of a HashTable prior to alloaction
//! This module provides a function to estimate the memory size of a HashTable prior to allocation
use crate::{DataFusionError, Result};
use std::mem::size_of;
Expand Down Expand Up @@ -79,7 +79,7 @@ pub fn estimate_memory_size<T>(num_elements: usize, fixed_size: usize) -> Result
// For the majority of cases hashbrown overestimates the bucket quantity
// to keep ~1/8 of them empty. We take this factor into account by
// multiplying the number of elements with a fixed ratio of 8/7 (~1.14).
// This formula leads to overallocation for small tables (< 8 elements)
// This formula leads to over-allocation for small tables (< 8 elements)
// but should be fine overall.
num_elements
.checked_mul(8)
Expand Down
Loading

0 comments on commit 5f7b916

Please sign in to comment.