Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Define and use new AsAny trait #450

Merged
merged 5 commits into from
Nov 14, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 0 additions & 9 deletions kernel/src/engine/arrow_data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ use arrow_array::{Array, GenericListArray, MapArray, OffsetSizeTrait, RecordBatc
use arrow_schema::{ArrowError, DataType as ArrowDataType};
use tracing::{debug, warn};

use std::any::Any;
use std::collections::HashMap;
use std::sync::Arc;

Expand Down Expand Up @@ -48,14 +47,6 @@ impl EngineData for ArrowEngineData {
fn length(&self) -> usize {
self.data.num_rows()
}

fn as_any(&self) -> &dyn Any {
self
}

fn into_any(self: Box<Self>) -> Box<dyn Any> {
self
}
}

impl From<RecordBatch> for ArrowEngineData {
Expand Down
2 changes: 1 addition & 1 deletion kernel/src/engine/arrow_expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -534,7 +534,7 @@ pub struct DefaultExpressionEvaluator {
impl ExpressionEvaluator for DefaultExpressionEvaluator {
fn evaluate(&self, batch: &dyn EngineData) -> DeltaResult<Box<dyn EngineData>> {
let batch = batch
.as_any()
.any_ref()
.downcast_ref::<ArrowEngineData>()
.ok_or_else(|| Error::engine_data_type("ArrowEngineData"))?
.record_batch();
Expand Down
12 changes: 2 additions & 10 deletions kernel/src/engine_data.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
//! Traits that engines need to implement in order to pass data between themselves and kernel.

use crate::{schema::SchemaRef, DeltaResult, Error};
use crate::{schema::SchemaRef, AsAny, DeltaResult, Error};

use tracing::debug;

use std::any::Any;
use std::collections::HashMap;

/// a trait that an engine exposes to give access to a list
Expand Down Expand Up @@ -212,8 +211,6 @@ pub trait DataVisitor {
/// }
///
/// impl EngineData for MyDataType {
/// fn as_any(&self) -> &dyn Any { self }
/// fn into_any(self: Box<Self>) -> Box<dyn Any> { self }
/// fn extract(&self, schema: SchemaRef, visitor: &mut dyn DataVisitor) -> DeltaResult<()> {
/// let getters = self.do_extraction(); // do the extraction
/// let row_count = self.length();
Expand All @@ -226,7 +223,7 @@ pub trait DataVisitor {
/// }
/// }
/// ```
pub trait EngineData: Send + Sync {
pub trait EngineData: AsAny {
/// Request that the data be visited for the passed schema. The contract of this method is that
/// it will call back into the passed [`DataVisitor`]s `visit` method. The call to `visit` must
/// include `GetData` items for each leaf of the schema, as well as the number of rows in this
Expand All @@ -235,9 +232,4 @@ pub trait EngineData: Send + Sync {

/// Return the number of items (rows) in blob
fn length(&self) -> usize;

// TODO(nick) implement this and below here in the trait when it doesn't cause a compiler error
fn as_any(&self) -> &dyn Any;

fn into_any(self: Box<Self>) -> Box<dyn Any>;
}
131 changes: 125 additions & 6 deletions kernel/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
rust_2021_compatibility
)]

use std::any::Any;
use std::sync::Arc;
use std::{cmp::Ordering, ops::Range};

Expand Down Expand Up @@ -136,12 +137,130 @@ impl FileMeta {
}
}

/// Extension trait that makes it easier to work with traits objects that implement [`Any`],
/// implemented automatically for any type that satisfies `Any`, `Send`, and `Sync`. In particular,
/// given some `trait T: Any + Send + Sync`, it allows upcasting `T` to `dyn Any + Send + Sync`,
/// which in turn allows downcasting the result to a concrete type. For example:
///
/// ```
/// # use delta_kernel::AsAny;
/// # use std::any::Any;
/// # use std::sync::Arc;
/// trait Foo : AsAny {}
/// struct Bar;
/// impl Foo for Bar {}
///
/// let f: Arc<dyn Foo> = Arc::new(Bar);
/// let a: Arc<dyn Any + Send + Sync> = f.as_any();
/// let b: Arc<Bar> = a.downcast().unwrap();
/// ```
///
/// In contrast, very similer code that relies only on `Any` would fail to compile:
///
/// ```fail_compile
/// # use std::any::Any;
/// # use std::sync::Arc;
/// trait Foo: Any + Send + Sync {}
///
/// struct Bar;
/// impl Foo for Bar {}
///
/// let f: Arc<dyn Foo> = Arc::new(Bar);
/// let b: Arc<Bar> = f.downcast().unwrap(); // `Arc::downcast` method not found
/// ```
///
/// As would this:
///
/// ```fail_compile
/// # use std::any::Any;
/// # use std::sync::Arc;
/// trait Foo: Any + Send + Sync {}
///
/// struct Bar;
/// impl Foo for Bar {}
///
/// let f: Arc<dyn Foo> = Arc::new(Bar);
/// let a: Arc<dyn Any + Send + Sync> = f; // trait upcasting coercion is not stable rust
/// let f: Arc<Bar> = a.downcast().unwrap();
/// ```
///
/// NOTE: `AsAny` inherits the `Send + Sync` constraint from [`Arc::downcast`].
pub trait AsAny: Any + Send + Sync {
/// Obtains a `dyn Any` reference to the object:
///
/// ```
/// # use delta_kernel::AsAny;
/// # use std::any::Any;
/// # use std::sync::Arc;
/// trait Foo : AsAny {}
/// struct Bar;
/// impl Foo for Bar {}
///
/// let f: &dyn Foo = &Bar;
/// let a: &dyn Any = f.any_ref();
/// let b: &Bar = a.downcast_ref().unwrap();
/// ```
fn any_ref(&self) -> &(dyn Any + Send + Sync);

/// Obtains an `Arc<dyn Any>` reference to the object:
///
/// ```
/// # use delta_kernel::AsAny;
/// # use std::any::Any;
/// # use std::sync::Arc;
/// trait Foo : AsAny {}
/// struct Bar;
/// impl Foo for Bar {}
///
/// let f: Arc<dyn Foo> = Arc::new(Bar);
/// let a: Arc<dyn Any + Send + Sync> = f.as_any();
/// let b: Arc<Bar> = a.downcast().unwrap();
/// ```
fn as_any(self: Arc<Self>) -> Arc<dyn Any + Send + Sync>;

/// Converts the object to `Box<dyn Any>`:
///
/// ```
/// # use delta_kernel::AsAny;
/// # use std::any::Any;
/// # use std::sync::Arc;
/// trait Foo : AsAny {}
/// struct Bar;
/// impl Foo for Bar {}
///
/// let f: Box<dyn Foo> = Box::new(Bar);
/// let a: Box<dyn Any> = f.into_any();
/// let b: Box<Bar> = a.downcast().unwrap();
/// ```
fn into_any(self: Box<Self>) -> Box<dyn Any + Send + Sync>;

/// Convenient wrapper for [`std::any::type_name`], since [`Any`] does not provide it and
/// [`Any::type_id`] is useless as a debugging aid (its `Debug` is just a mess of hex digits).
fn type_name(&self) -> &'static str;
}

// Blanket implementation for all eligible types
impl<T: Any + Send + Sync> AsAny for T {
fn any_ref(&self) -> &(dyn Any + Send + Sync) {
self
}
fn as_any(self: Arc<Self>) -> Arc<dyn Any + Send + Sync> {
self
}
fn into_any(self: Box<Self>) -> Box<dyn Any + Send + Sync> {
self
}
fn type_name(&self) -> &'static str {
std::any::type_name::<Self>()
}
}

/// Trait for implementing an Expression evaluator.
///
/// It contains one Expression which can be evaluated on multiple ColumnarBatches.
/// Connectors can implement this trait to optimize the evaluation using the
/// connector specific capabilities.
pub trait ExpressionEvaluator: Send + Sync {
pub trait ExpressionEvaluator: AsAny {
/// Evaluate the expression on a given EngineData.
///
/// Contains one value for each row of the input.
Expand All @@ -153,7 +272,7 @@ pub trait ExpressionEvaluator: Send + Sync {
///
/// Delta Kernel can use this handler to evaluate predicate on partition filters,
/// fill up partition column values and any computation on data using Expressions.
pub trait ExpressionHandler: Send + Sync {
pub trait ExpressionHandler: AsAny {
/// Create an [`ExpressionEvaluator`] that can evaluate the given [`Expression`]
/// on columnar batches with the given [`Schema`] to produce data of [`DataType`].
///
Expand All @@ -178,7 +297,7 @@ pub trait ExpressionHandler: Send + Sync {
/// Delta Kernel uses this client whenever it needs to access the underlying
/// file system where the Delta table is present. Connector implementation of
/// this trait can hide filesystem specific details from Delta Kernel.
pub trait FileSystemClient: Send + Sync {
pub trait FileSystemClient: AsAny {
/// List the paths in the same directory that are lexicographically greater or equal to
/// (UTF-8 sorting) the given `path`. The result should also be sorted by the file name.
fn list_from(&self, path: &Url)
Expand All @@ -196,7 +315,7 @@ pub trait FileSystemClient: Send + Sync {
/// Delta Kernel can use this client to parse JSON strings into Row or read content from JSON files.
/// Connectors can leverage this trait to provide their best implementation of the JSON parsing
/// capability to Delta Kernel.
pub trait JsonHandler: Send + Sync {
pub trait JsonHandler: AsAny {
/// Parse the given json strings and return the fields requested by output schema as columns in [`EngineData`].
/// json_strings MUST be a single column batch of engine data, and the column type must be string
fn parse_json(
Expand Down Expand Up @@ -254,7 +373,7 @@ pub trait JsonHandler: Send + Sync {
///
/// Connectors can leverage this trait to provide their own custom
/// implementation of Parquet data file functionalities to Delta Kernel.
pub trait ParquetHandler: Send + Sync {
pub trait ParquetHandler: AsAny {
/// Read and parse the Parquet file at given locations and return the data as EngineData with
/// the columns requested by physical schema . The ParquetHandler _must_ return exactly the
/// columns specified in `physical_schema`, and they _must_ be in schema order.
Expand All @@ -277,7 +396,7 @@ pub trait ParquetHandler: Send + Sync {
///
/// Engines/Connectors are expected to pass an implementation of this trait when reading a Delta
/// table.
pub trait Engine: Send + Sync {
pub trait Engine: AsAny {
/// Get the connector provided [`ExpressionHandler`].
fn get_expression_handler(&self) -> Arc<dyn ExpressionHandler>;

Expand Down
Loading