Skip to content

Commit

Permalink
Merge branch 'main' into 13525/invariant-checking-for-implicit-LP-cha…
Browse files Browse the repository at this point in the history
…nges
  • Loading branch information
wiedld committed Dec 17, 2024
2 parents a7d9770 + 57d1309 commit 72718ad
Show file tree
Hide file tree
Showing 213 changed files with 5,608 additions and 3,164 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -585,9 +585,9 @@ jobs:
#
# To reproduce:
# 1. Install the version of Rust that is failing. Example:
# rustup install 1.80.0
# rustup install 1.80.1
# 2. Run the command that failed with that version. Example:
# cargo +1.80.0 check -p datafusion
# cargo +1.80.1 check -p datafusion
#
# To resolve, either:
# 1. Change your code to use older Rust features,
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ datafusion/sqllogictest/test_files/scratch*
# temp file for core
datafusion/core/*.parquet

# Generated core benchmark data
datafusion/core/benches/data/*

# rat
filtered_rat.txt
rat.txt
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ homepage = "https://datafusion.apache.org"
license = "Apache-2.0"
readme = "README.md"
repository = "https://github.com/apache/datafusion"
rust-version = "1.80"
rust-version = "1.80.1"
version = "43.0.0"

[workspace.dependencies]
Expand Down Expand Up @@ -93,7 +93,7 @@ arrow-ipc = { version = "53.3.0", default-features = false, features = [
arrow-ord = { version = "53.3.0", default-features = false }
arrow-schema = { version = "53.3.0", default-features = false }
async-trait = "0.1.73"
bigdecimal = "0.4.6"
bigdecimal = "0.4.7"
bytes = "1.4"
chrono = { version = "0.4.38", default-features = false }
ctor = "0.2.0"
Expand Down
20 changes: 13 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -126,16 +126,22 @@ Optional features:

## Rust Version Compatibility Policy

DataFusion's Minimum Required Stable Rust Version (MSRV) policy is to support stable [4 latest
Rust versions](https://releases.rs) OR the stable minor Rust version as of 4 months, whichever is lower.
The Rust toolchain releases are tracked at [Rust Versions](https://releases.rs) and follow
[semantic versioning](https://semver.org/). A Rust toolchain release can be identified
by a version string like `1.80.0`, or more generally `major.minor.patch`.

DataFusion's supports the last 4 stable Rust minor versions released and any such versions released within the last 4 months.

For example, given the releases `1.78.0`, `1.79.0`, `1.80.0`, `1.80.1` and `1.81.0` DataFusion will support 1.78.0, which is 3 minor versions prior to the most minor recent `1.81`.

If a hotfix is released for the minimum supported Rust version (MSRV), the MSRV will be the minor version with all hotfixes, even if it surpasses the four-month window.
Note: If a Rust hotfix is released for the current MSRV, the MSRV will be updated to the specific minor version that includes all applicable hotfixes preceding other policies.

DataFusion enforces MSRV policy using a [MSRV CI Check](https://github.com/search?q=repo%3Aapache%2Fdatafusion+rust-version+language%3ATOML+path%3A%2F%5ECargo.toml%2F&type=code)

We enforce this policy using a [MSRV CI Check](https://github.com/search?q=repo%3Aapache%2Fdatafusion+rust-version+language%3ATOML+path%3A%2F%5ECargo.toml%2F&type=code)
## DataFusion API Evolution and Deprecation Guidelines

## DataFusion API evolution policy
Public methods in Apache DataFusion evolve over time: while we try to maintain a
stable API, we also improve the API over time. As a result, we typically
deprecate methods before removing them, according to the [deprecation guidelines].

Public methods in Apache DataFusion are subject to evolve as part of the API lifecycle.
Deprecated methods will be phased out in accordance with the [policy](https://datafusion.apache.org/library-user-guide/api-health.html), ensuring the API is stable and healthy.
[deprecation guidelines]: https://datafusion.apache.org/library-user-guide/api-health.html
35 changes: 18 additions & 17 deletions benchmarks/src/bin/external_aggr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
use std::collections::HashMap;
use std::path::PathBuf;
use std::sync::Arc;
use std::sync::OnceLock;
use std::sync::LazyLock;
use structopt::StructOpt;

use arrow::record_batch::RecordBatch;
Expand All @@ -33,7 +33,8 @@ use datafusion::datasource::{MemTable, TableProvider};
use datafusion::error::Result;
use datafusion::execution::memory_pool::FairSpillPool;
use datafusion::execution::memory_pool::{human_readable_size, units};
use datafusion::execution::runtime_env::RuntimeConfig;
use datafusion::execution::runtime_env::RuntimeEnvBuilder;
use datafusion::execution::SessionStateBuilder;
use datafusion::physical_plan::display::DisplayableExecutionPlan;
use datafusion::physical_plan::{collect, displayable};
use datafusion::prelude::*;
Expand Down Expand Up @@ -90,7 +91,13 @@ struct QueryResult {
/// Memory limits to run: 64MiB, 32MiB, 16MiB
/// Q2 requires 250MiB for aggregation
/// Memory limits to run: 512MiB, 256MiB, 128MiB, 64MiB, 32MiB
static QUERY_MEMORY_LIMITS: OnceLock<HashMap<usize, Vec<u64>>> = OnceLock::new();
static QUERY_MEMORY_LIMITS: LazyLock<HashMap<usize, Vec<u64>>> = LazyLock::new(|| {
use units::*;
let mut map = HashMap::new();
map.insert(1, vec![64 * MB, 32 * MB, 16 * MB]);
map.insert(2, vec![512 * MB, 256 * MB, 128 * MB, 64 * MB, 32 * MB]);
map
});

impl ExternalAggrConfig {
const AGGR_TABLES: [&'static str; 1] = ["lineitem"];
Expand All @@ -113,16 +120,6 @@ impl ExternalAggrConfig {
"#,
];

fn init_query_memory_limits() -> &'static HashMap<usize, Vec<u64>> {
use units::*;
QUERY_MEMORY_LIMITS.get_or_init(|| {
let mut map = HashMap::new();
map.insert(1, vec![64 * MB, 32 * MB, 16 * MB]);
map.insert(2, vec![512 * MB, 256 * MB, 128 * MB, 64 * MB, 32 * MB]);
map
})
}

/// If `--query` and `--memory-limit` is not speicified, run all queries
/// with pre-configured memory limits
/// If only `--query` is specified, run the query with all memory limits
Expand Down Expand Up @@ -160,8 +157,7 @@ impl ExternalAggrConfig {
query_executions.push((query_id, limit));
}
None => {
let memory_limits_table = Self::init_query_memory_limits();
let memory_limits = memory_limits_table.get(&query_id).unwrap();
let memory_limits = QUERY_MEMORY_LIMITS.get(&query_id).unwrap();
for limit in memory_limits {
query_executions.push((query_id, *limit));
}
Expand Down Expand Up @@ -195,10 +191,15 @@ impl ExternalAggrConfig {
let query_name =
format!("Q{query_id}({})", human_readable_size(mem_limit as usize));
let config = self.common.config();
let runtime_config = RuntimeConfig::new()
let runtime_env = RuntimeEnvBuilder::new()
.with_memory_pool(Arc::new(FairSpillPool::new(mem_limit as usize)))
.build_arc()?;
let ctx = SessionContext::new_with_config_rt(config, runtime_config);
let state = SessionStateBuilder::new()
.with_config(config)
.with_runtime_env(runtime_env)
.with_default_features()
.build();
let ctx = SessionContext::from(state);

// register tables
self.register_tables(&ctx).await?;
Expand Down
10 changes: 6 additions & 4 deletions benchmarks/src/sort_tpch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ use datafusion::datasource::listing::{
};
use datafusion::datasource::{MemTable, TableProvider};
use datafusion::error::Result;
use datafusion::execution::runtime_env::RuntimeConfig;
use datafusion::execution::SessionStateBuilder;
use datafusion::physical_plan::display::DisplayableExecutionPlan;
use datafusion::physical_plan::{displayable, execute_stream};
use datafusion::prelude::*;
Expand Down Expand Up @@ -188,9 +188,11 @@ impl RunOpt {
/// Benchmark query `query_id` in `SORT_QUERIES`
async fn benchmark_query(&self, query_id: usize) -> Result<Vec<QueryResult>> {
let config = self.common.config();

let runtime_config = RuntimeConfig::new().build_arc()?;
let ctx = SessionContext::new_with_config_rt(config, runtime_config);
let state = SessionStateBuilder::new()
.with_config(config)
.with_default_features()
.build();
let ctx = SessionContext::from(state);

// register tables
self.register_tables(&ctx).await?;
Expand Down
Loading

0 comments on commit 72718ad

Please sign in to comment.