Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reorganize the project folders #2081

Merged
merged 3 commits into from
Mar 29, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
2 changes: 1 addition & 1 deletion .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,7 @@ jobs:
export PARQUET_TEST_DATA=$(pwd)/parquet-testing/data
cd datafusion
# Force all hash values to collide
cargo test --features=force_hash_collisions
cargo test --all --features=force_hash_collisions
env:
CARGO_HOME: "/github/home/.cargo"
CARGO_TARGET_DIR: "/github/home/target"
Expand Down
14 changes: 7 additions & 7 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,21 +17,21 @@

[workspace]
members = [
"datafusion",
"datafusion-common",
"datafusion-expr",
"datafusion-jit",
"datafusion-physical-expr",
"datafusion/core",
"datafusion/common",
"datafusion/expr",
"datafusion/jit",
"datafusion/physical-expr",
"datafusion/proto",
"datafusion-cli",
"datafusion-examples",
"datafusion-proto",
"datafusion-storage",
"benchmarks",
"ballista/rust/client",
"ballista/rust/core",
"ballista/rust/executor",
"ballista/rust/scheduler",
"ballista-examples",
"data-access",
]

[profile.release]
Expand Down
2 changes: 1 addition & 1 deletion ballista-examples/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ required-features = ["ballista/standalone"]

[dependencies]
ballista = { path = "../ballista/rust/client", version = "0.6.0" }
datafusion = { path = "../datafusion" }
datafusion = { path = "../datafusion/core" }
futures = "0.3"
num_cpus = "1.13.0"
prost = "0.9"
Expand Down
2 changes: 1 addition & 1 deletion ballista/rust/client/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ ballista-core = { path = "../core", version = "0.6.0" }
ballista-executor = { path = "../executor", version = "0.6.0", optional = true }
ballista-scheduler = { path = "../scheduler", version = "0.6.0", optional = true }

datafusion = { path = "../../../datafusion", version = "7.0.0" }
datafusion = { path = "../../../datafusion/core", version = "7.0.0" }
futures = "0.3"
log = "0.4"
parking_lot = "0.12"
Expand Down
6 changes: 4 additions & 2 deletions ballista/rust/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ edition = "2018"
build = "build.rs"

[features]
# Used for testing ONLY: causes all values to hash to the same value (test for collisions)
force_hash_collisions = ["datafusion/force_hash_collisions"]
simd = ["datafusion/simd"]

[dependencies]
Expand All @@ -36,8 +38,8 @@ arrow-flight = { version = "11" }
async-trait = "0.1.41"
chrono = { version = "0.4", default-features = false }
clap = { version = "3", features = ["derive", "cargo"] }
datafusion = { path = "../../../datafusion", version = "7.0.0" }
datafusion-proto = { path = "../../../datafusion-proto", version = "7.0.0" }
datafusion = { path = "../../../datafusion/core", version = "7.0.0" }
datafusion-proto = { path = "../../../datafusion/proto", version = "7.0.0" }
futures = "0.3"
hashbrown = "0.12"
log = "0.4"
Expand Down
4 changes: 4 additions & 0 deletions ballista/rust/core/src/execution_plans/shuffle_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,8 @@ mod tests {
use tempfile::TempDir;

#[tokio::test]
// number of rows in each partition is a function of the hash output, so don't test here
#[cfg(not(feature = "force_hash_collisions"))]
async fn test() -> Result<()> {
let session_ctx = SessionContext::new();
let task_ctx = session_ctx.task_ctx();
Expand Down Expand Up @@ -507,6 +509,8 @@ mod tests {
}

#[tokio::test]
// number of rows in each partition is a function of the hash output, so don't test here
#[cfg(not(feature = "force_hash_collisions"))]
async fn test_partitioned() -> Result<()> {
let session_ctx = SessionContext::new();
let task_ctx = session_ctx.task_ctx();
Expand Down
8 changes: 4 additions & 4 deletions ballista/rust/core/src/serde/logical_plan/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -922,7 +922,7 @@ mod roundtrip_tests {
use core::panic;
use datafusion::{
arrow::datatypes::{DataType, Field, Schema},
datafusion_storage::{
datafusion_data_access::{
self,
object_store::{
local::LocalFileSystem, FileMetaStream, ListEntryStream, ObjectReader,
Expand All @@ -949,7 +949,7 @@ mod roundtrip_tests {
async fn list_file(
&self,
_prefix: &str,
) -> datafusion_storage::Result<FileMetaStream> {
) -> datafusion_data_access::Result<FileMetaStream> {
Err(io::Error::new(
io::ErrorKind::Unsupported,
"this is only a test object store".to_string(),
Expand All @@ -960,7 +960,7 @@ mod roundtrip_tests {
&self,
_prefix: &str,
_delimiter: Option<String>,
) -> datafusion_storage::Result<ListEntryStream> {
) -> datafusion_data_access::Result<ListEntryStream> {
Err(io::Error::new(
io::ErrorKind::Unsupported,
"this is only a test object store".to_string(),
Expand All @@ -970,7 +970,7 @@ mod roundtrip_tests {
fn file_reader(
&self,
_file: SizedFile,
) -> datafusion_storage::Result<Arc<dyn ObjectReader>> {
) -> datafusion_data_access::Result<Arc<dyn ObjectReader>> {
Err(io::Error::new(
io::ErrorKind::Unsupported,
"this is only a test object store".to_string(),
Expand Down
4 changes: 2 additions & 2 deletions ballista/rust/core/src/serde/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,7 @@ fn str_to_byte(s: &str) -> Result<u8, BallistaError> {
mod tests {
use async_trait::async_trait;
use datafusion::arrow::datatypes::SchemaRef;
use datafusion::datafusion_storage::object_store::local::LocalFileSystem;
use datafusion::datafusion_data_access::object_store::local::LocalFileSystem;
use datafusion::error::DataFusionError;
use datafusion::execution::context::{QueryPlanner, SessionState, TaskContext};
use datafusion::execution::runtime_env::{RuntimeConfig, RuntimeEnv};
Expand Down Expand Up @@ -715,7 +715,7 @@ mod tests {

let scan = LogicalPlanBuilder::scan_csv(
store,
"../../../datafusion/tests/customer.csv",
"../../../datafusion/core/tests/customer.csv",
CsvReadOptions::default(),
None,
1,
Expand Down
2 changes: 1 addition & 1 deletion ballista/rust/core/src/serde/physical_plan/from_proto.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ use crate::serde::{from_proto_binary_op, proto_error, protobuf};
use crate::{convert_box_required, convert_required};
use chrono::{TimeZone, Utc};

use datafusion::datafusion_storage::{
use datafusion::datafusion_data_access::{
object_store::local::LocalFileSystem, FileMeta, SizedFile,
};
use datafusion::datasource::listing::PartitionedFile;
Expand Down
4 changes: 2 additions & 2 deletions ballista/rust/core/src/serde/physical_plan/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ use crate::serde::{
use crate::{convert_box_required, convert_required, into_physical_plan, into_required};
use datafusion::arrow::compute::SortOptions;
use datafusion::arrow::datatypes::SchemaRef;
use datafusion::datafusion_storage::object_store::local::LocalFileSystem;
use datafusion::datafusion_data_access::object_store::local::LocalFileSystem;
use datafusion::datasource::listing::PartitionedFile;
use datafusion::execution::runtime_env::RuntimeEnv;
use datafusion::logical_plan::window_frames::WindowFrame;
Expand Down Expand Up @@ -1020,7 +1020,7 @@ mod roundtrip_tests {
compute::kernels::sort::SortOptions,
datatypes::{DataType, Field, Schema},
},
datafusion_storage::object_store::local::LocalFileSystem,
datafusion_data_access::object_store::local::LocalFileSystem,
datasource::listing::PartitionedFile,
logical_plan::{JoinType, Operator},
physical_plan::{
Expand Down
2 changes: 1 addition & 1 deletion ballista/rust/executor/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ async-trait = "0.1.41"
ballista-core = { path = "../core", version = "0.6.0" }
chrono = { version = "0.4", default-features = false }
configure_me = "0.4.0"
datafusion = { path = "../../../datafusion", version = "7.0.0" }
datafusion = { path = "../../../datafusion/core", version = "7.0.0" }
env_logger = "0.9"
futures = "0.3"
hyper = "0.14.4"
Expand Down
2 changes: 1 addition & 1 deletion ballista/rust/scheduler/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ async-trait = "0.1.41"
ballista-core = { path = "../core", version = "0.6.0" }
clap = { version = "3", features = ["derive", "cargo"] }
configure_me = "0.4.0"
datafusion = { path = "../../../datafusion", version = "7.0.0" }
datafusion = { path = "../../../datafusion/core", version = "7.0.0" }
env_logger = "0.9"
etcd-client = { version = "0.8", optional = true }
futures = "0.3"
Expand Down
4 changes: 3 additions & 1 deletion ballista/rust/scheduler/src/scheduler_server/grpc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ use ballista_core::serde::scheduler::{
ExecutorData, ExecutorDataChange, ExecutorMetadata,
};
use ballista_core::serde::{AsExecutionPlan, AsLogicalPlan};
use datafusion::datafusion_storage::object_store::{local::LocalFileSystem, ObjectStore};
use datafusion::datafusion_data_access::object_store::{
local::LocalFileSystem, ObjectStore,
};
use datafusion::datasource::file_format::parquet::ParquetFormat;
use datafusion::datasource::file_format::FileFormat;
use futures::StreamExt;
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ snmalloc = ["snmalloc-rs"]

[dependencies]
ballista = { path = "../ballista/rust/client" }
datafusion = { path = "../datafusion" }
datafusion = { path = "../datafusion/core" }
env_logger = "0.9"
futures = "0.3"
mimalloc = { version = "0.1", optional = true, default-features = false }
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/src/bin/tpch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ use datafusion::{
};
use datafusion::{
arrow::util::pretty,
datafusion_storage::object_store::local::LocalFileSystem,
datafusion_data_access::object_store::local::LocalFileSystem,
datasource::listing::{ListingOptions, ListingTable, ListingTableConfig},
};

Expand Down
8 changes: 4 additions & 4 deletions datafusion-storage/Cargo.toml → data-access/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@
# under the License.

[package]
name = "datafusion-storage"
description = "Storage for DataFusion query engine"
version = "7.0.0"
name = "datafusion-data-access"
description = "General data access layer currently mainly based on the object store interfaces"
version = "1.0.0"
homepage = "https://github.com/apache/arrow-datafusion"
repository = "https://github.com/apache/arrow-datafusion"
readme = "README.md"
Expand All @@ -29,7 +29,7 @@ edition = "2021"
rust-version = "1.59"

[lib]
name = "datafusion_storage"
name = "datafusion_data_access"
path = "src/lib.rs"

[dependencies]
Expand Down
6 changes: 2 additions & 4 deletions datafusion-storage/README.md → data-access/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@
under the License.
-->

# DataFusion Storage
# Data Access Layer

This module contains an `async` API for the [DataFusion][df] to access data, either remotely or locally.

[df]: https://crates.io/crates/datafusion
This module contains an `async` API for accessing data, either remotely or locally. Currently, it's based on the object store interfaces. In the future, this module may include interfaces for accessing databases, or streaming data.
File renamed without changes.
2 changes: 1 addition & 1 deletion datafusion-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ rust-version = "1.59"
arrow = { version = "11" }
ballista = { path = "../ballista/rust/client", version = "0.6.0", optional = true }
clap = { version = "3", features = ["derive", "cargo"] }
datafusion = { path = "../datafusion", version = "7.0.0" }
datafusion = { path = "../datafusion/core", version = "7.0.0" }
dirs = "4.0.0"
env_logger = "0.9"
mimalloc = { version = "*", default-features = false }
Expand Down
2 changes: 1 addition & 1 deletion datafusion-examples/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ required-features = ["datafusion/avro"]
[dev-dependencies]
arrow-flight = { version = "11" }
async-trait = "0.1.41"
datafusion = { path = "../datafusion" }
datafusion = { path = "../datafusion/core" }
futures = "0.3"
num_cpus = "1.13.0"
prost = "0.9"
Expand Down
2 changes: 1 addition & 1 deletion datafusion-examples/examples/flight_server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use std::pin::Pin;
use std::sync::Arc;

use arrow_flight::SchemaAsIpc;
use datafusion::datafusion_storage::object_store::local::LocalFileSystem;
use datafusion::datafusion_data_access::object_store::local::LocalFileSystem;
use datafusion::datasource::file_format::parquet::ParquetFormat;
use datafusion::datasource::listing::ListingOptions;
use futures::Stream;
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
10 changes: 5 additions & 5 deletions datafusion/Cargo.toml → datafusion/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,11 @@ arrow = { version = "11", features = ["prettyprint"] }
async-trait = "0.1.41"
avro-rs = { version = "0.13", features = ["snappy"], optional = true }
chrono = { version = "0.4", default-features = false }
datafusion-common = { path = "../datafusion-common", version = "7.0.0", features = ["parquet"] }
datafusion-expr = { path = "../datafusion-expr", version = "7.0.0" }
datafusion-jit = { path = "../datafusion-jit", version = "7.0.0", optional = true }
datafusion-physical-expr = { path = "../datafusion-physical-expr", version = "7.0.0" }
datafusion-storage = { path = "../datafusion-storage", version = "7.0.0" }
datafusion-common = { path = "../common", version = "7.0.0", features = ["parquet"] }
datafusion-data-access = { path = "../../data-access", version = "1.0.0" }
datafusion-expr = { path = "../expr", version = "7.0.0" }
datafusion-jit = { path = "../jit", version = "7.0.0", optional = true }
datafusion-physical-expr = { path = "../physical-expr", version = "7.0.0" }
futures = "0.3"
hashbrown = { version = "0.12", features = ["raw"] }
lazy_static = { version = "^1.4.0" }
Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#[macro_use]
extern crate criterion;
use criterion::Criterion;
use datafusion::datafusion_storage::object_store::local::LocalFileSystem;
use datafusion::datafusion_data_access::object_store::local::LocalFileSystem;
use datafusion::datasource::file_format::csv::CsvFormat;
use datafusion::datasource::listing::{ListingOptions, ListingTable, ListingTableConfig};

Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ use crate::datasource::listing::{ListingTable, ListingTableConfig};
use crate::datasource::object_store_registry::ObjectStoreRegistry;
use crate::datasource::TableProvider;
use crate::error::{DataFusionError, Result};
use datafusion_storage::object_store::ObjectStore;
use datafusion_data_access::object_store::ObjectStore;

/// Represents a schema, comprising a number of named tables.
pub trait SchemaProvider: Sync + Send {
Expand Down Expand Up @@ -251,7 +251,7 @@ mod tests {
use crate::catalog::schema::{
MemorySchemaProvider, ObjectStoreSchemaProvider, SchemaProvider,
};
use crate::datafusion_storage::object_store::local::LocalFileSystem;
use crate::datafusion_data_access::object_store::local::LocalFileSystem;
use crate::datasource::empty::EmptyTable;
use crate::execution::context::SessionContext;

Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ use crate::logical_plan::Expr;
use crate::physical_plan::file_format::{AvroExec, FileScanConfig};
use crate::physical_plan::ExecutionPlan;
use crate::physical_plan::Statistics;
use datafusion_storage::object_store::{ObjectReader, ObjectReaderStream};
use datafusion_data_access::object_store::{ObjectReader, ObjectReaderStream};

/// The default file extension of avro files
pub const DEFAULT_AVRO_EXTENSION: &str = ".avro";
Expand Down Expand Up @@ -75,7 +75,7 @@ impl FileFormat for AvroFormat {
#[cfg(feature = "avro")]
mod tests {
use crate::{
datafusion_storage::object_store::local::{
datafusion_data_access::object_store::local::{
local_object_reader, local_object_reader_stream, LocalFileSystem,
},
physical_plan::collect,
Expand Down Expand Up @@ -394,7 +394,7 @@ mod tests {
mod tests {
use super::*;

use crate::datafusion_storage::object_store::local::local_object_reader_stream;
use crate::datafusion_data_access::object_store::local::local_object_reader_stream;
use crate::error::DataFusionError;

#[tokio::test]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ use crate::logical_plan::Expr;
use crate::physical_plan::file_format::{CsvExec, FileScanConfig};
use crate::physical_plan::ExecutionPlan;
use crate::physical_plan::Statistics;
use datafusion_storage::object_store::{ObjectReader, ObjectReaderStream};
use datafusion_data_access::object_store::{ObjectReader, ObjectReaderStream};

/// The default file extension of csv files
pub const DEFAULT_CSV_EXTENSION: &str = ".csv";
Expand Down Expand Up @@ -141,7 +141,7 @@ mod tests {
use crate::datasource::listing::local_unpartitioned_file;
use crate::prelude::{SessionConfig, SessionContext};
use crate::{
datafusion_storage::object_store::local::{
datafusion_data_access::object_store::local::{
local_object_reader, local_object_reader_stream, LocalFileSystem,
},
datasource::file_format::FileScanConfig,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ use crate::logical_plan::Expr;
use crate::physical_plan::file_format::NdJsonExec;
use crate::physical_plan::ExecutionPlan;
use crate::physical_plan::Statistics;
use datafusion_storage::object_store::{ObjectReader, ObjectReaderStream};
use datafusion_data_access::object_store::{ObjectReader, ObjectReaderStream};

/// The default file extension of json files
pub const DEFAULT_JSON_EXTENSION: &str = ".json";
Expand Down Expand Up @@ -104,7 +104,7 @@ mod tests {
use super::*;
use crate::prelude::{SessionConfig, SessionContext};
use crate::{
datafusion_storage::object_store::local::{
datafusion_data_access::object_store::local::{
local_object_reader, local_object_reader_stream, LocalFileSystem,
},
datasource::{file_format::FileScanConfig, listing::local_unpartitioned_file},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ use crate::physical_plan::{ExecutionPlan, Statistics};

use async_trait::async_trait;

use datafusion_storage::object_store::{ObjectReader, ObjectReaderStream};
use datafusion_data_access::object_store::{ObjectReader, ObjectReaderStream};

/// This trait abstracts all the file format specific implementations
/// from the `TableProvider`. This helps code re-utilization across
Expand Down
Loading