diff --git a/Cargo.toml b/Cargo.toml index db2c13b..3d34cc4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,7 +26,7 @@ include = [ [features] default = [] -parse = ["dep:hex", "dep:thiserror", "semver"] +parse = ["dep:hex", "dep:thiserror", "dep:url", "semver"] protoc = ["dep:protobuf-src"] semver = ["dep:semver"] serde = ["dep:pbjson", "dep:pbjson-build", "dep:pbjson-types"] @@ -37,6 +37,7 @@ pbjson = { version = "0.6.0", optional = true } pbjson-types = { version = "0.6.0", optional = true } prost = "0.12.3" prost-types = "0.12.3" +url = { version = "2.5.0", optional = true } semver = { version = "1.0.22", optional = true } serde = { version = "1.0.197", features = ["derive"] } serde_json = "1.0.114" diff --git a/src/parse/context.rs b/src/parse/context.rs index 8c762d3..fe3ff14 100644 --- a/src/parse/context.rs +++ b/src/parse/context.rs @@ -2,7 +2,11 @@ //! A parse context. -use crate::parse::Parse; +use thiserror::Error; + +use crate::parse::{ + proto::extensions::SimpleExtensionUri, text::simple_extensions::SimpleExtensions, Anchor, Parse, +}; /// A parse context. /// @@ -18,12 +22,101 @@ pub trait Context { { item.parse(self) } + + /// Add a [SimpleExtensionUri] to this context. Must return an error for duplicate + /// anchors or when the URI is not supported. + /// + /// This function must eagerly resolve and parse the simple extension, returning an + /// error if either fails. + fn add_simple_extension_uri( + &mut self, + simple_extension_uri: &SimpleExtensionUri, + ) -> Result<&SimpleExtensions, ContextError>; + + /// Returns the simple extensions for the given simple extension anchor. + fn simple_extensions( + &self, + anchor: &Anchor, + ) -> Result<&SimpleExtensions, ContextError>; +} + +/// Parse context errors. +#[derive(Debug, Error, PartialEq)] +pub enum ContextError { + /// Undefined reference to simple extension. + #[error("undefined reference to simple extension with anchor `{0}`")] + UndefinedSimpleExtension(Anchor), + + /// Duplicate anchor for simple extension. + #[error("duplicate anchor `{0}` for simple extension")] + DuplicateSimpleExtension(Anchor), + + /// Unsupported simple extension URI. + #[error("unsupported simple extension URI: {0}")] + UnsupportedURI(String), } #[cfg(test)] pub(crate) mod tests { - #[derive(Default)] - pub struct Context {} + use std::collections::{hash_map::Entry, HashMap}; - impl super::Context for Context {} + use crate::parse::{ + context::ContextError, proto::extensions::SimpleExtensionUri, + text::simple_extensions::SimpleExtensions, Anchor, + }; + + /// A test context. + /// + /// This currently mocks support for simple extensions (does not resolve or + /// parse). + pub struct Context { + empty_simple_extensions: SimpleExtensions, + simple_extensions: HashMap, SimpleExtensionUri>, + } + + impl Default for Context { + fn default() -> Self { + Self { + empty_simple_extensions: SimpleExtensions {}, + simple_extensions: Default::default(), + } + } + } + + impl super::Context for Context { + fn add_simple_extension_uri( + &mut self, + simple_extension_uri: &crate::parse::proto::extensions::SimpleExtensionUri, + ) -> Result<&SimpleExtensions, ContextError> { + match self.simple_extensions.entry(simple_extension_uri.anchor()) { + Entry::Occupied(_) => Err(ContextError::DuplicateSimpleExtension( + simple_extension_uri.anchor(), + )), + Entry::Vacant(entry) => { + // This is where we would resolve and then parse. + // This check shows the use of the unsupported uri error. + if let "http" | "https" | "file" = simple_extension_uri.uri().scheme() { + entry.insert(simple_extension_uri.clone()); + // Here we just return an empty simple extensions. + Ok(&self.empty_simple_extensions) + } else { + Err(ContextError::UnsupportedURI(format!( + "`{}` scheme not supported", + simple_extension_uri.uri().scheme() + ))) + } + } + } + } + + fn simple_extensions( + &self, + anchor: &Anchor, + ) -> Result<&SimpleExtensions, ContextError> { + self.simple_extensions + .contains_key(anchor) + .then_some(&self.empty_simple_extensions) + .ok_or(ContextError::UndefinedSimpleExtension(*anchor)) + } + } } diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 4eb9d18..6bcb09d 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -18,6 +18,10 @@ mod context; pub use context::Context; pub mod proto; +pub mod text; + +mod typed; +pub use typed::Anchor; /// A parse trait. pub trait Parse: Debug + Sized { diff --git a/src/parse/proto/extensions/mod.rs b/src/parse/proto/extensions/mod.rs new file mode 100644 index 0000000..2527805 --- /dev/null +++ b/src/parse/proto/extensions/mod.rs @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 + +//! Parsing of [proto::extensions] types. + +mod simple_extension_uri; +pub use simple_extension_uri::SimpleExtensionUri; diff --git a/src/parse/proto/extensions/simple_extension_uri.rs b/src/parse/proto/extensions/simple_extension_uri.rs new file mode 100644 index 0000000..8fcb6df --- /dev/null +++ b/src/parse/proto/extensions/simple_extension_uri.rs @@ -0,0 +1,155 @@ +// SPDX-License-Identifier: Apache-2.0 + +//! Parsing of [proto::extensions::SimpleExtensionUri]. + +use thiserror::Error; +use url::Url; + +use crate::{ + parse::{context::ContextError, Anchor, Context, Parse}, + proto, +}; + +/// A parsed [proto::extensions::SimpleExtensionUri]. +#[derive(Clone, Debug, PartialEq)] +pub struct SimpleExtensionUri { + /// The URI of this simple extension. + uri: Url, + + /// The anchor value of this simple extension. + anchor: Anchor, +} + +impl SimpleExtensionUri { + /// Returns the uri of this simple extension. + /// + /// See [proto::extensions::SimpleExtensionUri::uri]. + pub fn uri(&self) -> &Url { + &self.uri + } + + /// Returns the anchor value of this simple extension. + /// + /// See [proto::extensions::SimpleExtensionUri::extension_uri_anchor]. + pub fn anchor(&self) -> Anchor { + self.anchor + } +} + +/// Parse errors for [proto::extensions::SimpleExtensionUri]. +#[derive(Debug, Error, PartialEq)] +pub enum SimpleExtensionUriError { + /// Invalid URI + #[error("invalid URI: {0}")] + InvalidURI(#[from] url::ParseError), + + /// Context error + #[error(transparent)] + Context(#[from] ContextError), +} + +impl Parse for proto::extensions::SimpleExtensionUri { + type Parsed = SimpleExtensionUri; + type Error = SimpleExtensionUriError; + + fn parse(self, ctx: &mut C) -> Result { + let proto::extensions::SimpleExtensionUri { + extension_uri_anchor: anchor, + uri, + } = self; + + // The uri is is required and must be valid. + let uri = Url::parse(&uri)?; + + // Construct the parsed simple extension URI. + let simple_extension_uri = SimpleExtensionUri { + uri, + anchor: Anchor::new(anchor), + }; + + // Make sure the URI is supported by this parse context, resolves and + // parses, and the anchor is unique. + ctx.add_simple_extension_uri(&simple_extension_uri)?; + + Ok(simple_extension_uri) + } +} + +impl From for proto::extensions::SimpleExtensionUri { + fn from(simple_extension_uri: SimpleExtensionUri) -> Self { + let SimpleExtensionUri { uri, anchor } = simple_extension_uri; + proto::extensions::SimpleExtensionUri { + uri: uri.to_string(), + extension_uri_anchor: anchor.into_inner(), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::parse::{context::tests::Context, Context as _}; + + #[test] + fn parse() -> Result<(), SimpleExtensionUriError> { + let simple_extension_uri = proto::extensions::SimpleExtensionUri { + extension_uri_anchor: 1, + uri: "https://substrait.io".to_string(), + }; + let simple_extension_uri = simple_extension_uri.parse(&mut Context::default())?; + assert_eq!(simple_extension_uri.anchor(), Anchor::new(1)); + assert_eq!(simple_extension_uri.uri().as_str(), "https://substrait.io/"); + Ok(()) + } + + #[test] + fn invalid_uri() { + let simple_extension_uri = proto::extensions::SimpleExtensionUri::default(); + assert_eq!( + simple_extension_uri.parse(&mut Context::default()), + Err(SimpleExtensionUriError::InvalidURI( + url::ParseError::RelativeUrlWithoutBase + )) + ); + let simple_extension_uri = proto::extensions::SimpleExtensionUri { + extension_uri_anchor: 1, + uri: "http://".to_string(), + }; + assert_eq!( + simple_extension_uri.parse(&mut Context::default()), + Err(SimpleExtensionUriError::InvalidURI( + url::ParseError::EmptyHost + )) + ); + } + + #[test] + fn duplicate_simple_extension() { + let mut ctx = Context::default(); + let simple_extension_uri = proto::extensions::SimpleExtensionUri { + extension_uri_anchor: 1, + uri: "https://substrait.io".to_string(), + }; + assert!(ctx.parse(simple_extension_uri.clone()).is_ok()); + assert_eq!( + ctx.parse(simple_extension_uri), + Err(SimpleExtensionUriError::Context( + ContextError::DuplicateSimpleExtension(Anchor::new(1)) + )) + ); + } + + #[test] + fn unsupported_uri() { + let simple_extension_uri = proto::extensions::SimpleExtensionUri { + extension_uri_anchor: 1, + uri: "ftp://substrait.io".to_string(), + }; + assert_eq!( + simple_extension_uri.parse(&mut Context::default()), + Err(SimpleExtensionUriError::Context( + ContextError::UnsupportedURI("`ftp` scheme not supported".to_string()) + )) + ); + } +} diff --git a/src/parse/proto/mod.rs b/src/parse/proto/mod.rs index 1dc4b2c..04c048d 100644 --- a/src/parse/proto/mod.rs +++ b/src/parse/proto/mod.rs @@ -7,3 +7,5 @@ pub use version::{Version, VersionError}; mod plan_version; pub use plan_version::{PlanVersion, PlanVersionError}; + +pub mod extensions; diff --git a/src/parse/proto/plan_version.rs b/src/parse/proto/plan_version.rs index c987ee3..6e95659 100644 --- a/src/parse/proto/plan_version.rs +++ b/src/parse/proto/plan_version.rs @@ -76,11 +76,12 @@ mod tests { }; #[test] - fn parse() { + fn parse() -> Result<(), PlanVersionError> { let plan_version = proto::PlanVersion { version: Some(version::version()), }; - assert!(plan_version.parse(&mut Context::default()).is_ok()); + plan_version.parse(&mut Context::default())?; + Ok(()) } #[test] diff --git a/src/parse/proto/version.rs b/src/parse/proto/version.rs index a7570b5..59e42fe 100644 --- a/src/parse/proto/version.rs +++ b/src/parse/proto/version.rs @@ -145,7 +145,7 @@ mod tests { use crate::parse::context::tests::Context; #[test] - fn version() { + fn version() -> Result<(), VersionError> { let version = proto::Version::default(); assert_eq!( version.parse(&mut Context::default()), @@ -153,7 +153,8 @@ mod tests { ); let version = version::version(); - assert!(version.parse(&mut Context::default()).is_ok()); + version.parse(&mut Context::default())?; + Ok(()) } #[test] @@ -217,7 +218,8 @@ mod tests { producer: String::from(""), ..version::version() }; - assert!(version.parse(&mut Context::default())?.producer.is_none()); + let version = version.parse(&mut Context::default())?; + assert!(version.producer.is_none()); Ok(()) } diff --git a/src/parse/text/mod.rs b/src/parse/text/mod.rs new file mode 100644 index 0000000..4f7b641 --- /dev/null +++ b/src/parse/text/mod.rs @@ -0,0 +1,5 @@ +// SPDX-License-Identifier: Apache-2.0 + +//! Parsing of [text](crate::text) types. + +pub mod simple_extensions; diff --git a/src/parse/text/simple_extensions/mod.rs b/src/parse/text/simple_extensions/mod.rs new file mode 100644 index 0000000..6145079 --- /dev/null +++ b/src/parse/text/simple_extensions/mod.rs @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: Apache-2.0 + +//! Parsing of [text::simple_extensions] types. + +use thiserror::Error; + +use crate::{ + parse::{Context, Parse}, + text, +}; + +/// A parsed [text::simple_extensions::SimpleExtensions]. +pub struct SimpleExtensions { + // TODO +} + +/// Parse errors for [text::simple_extensions::SimpleExtensions]. +#[derive(Debug, Error, PartialEq)] +pub enum SimpleExtensionsError { + // TODO +} + +impl Parse for text::simple_extensions::SimpleExtensions { + type Parsed = SimpleExtensions; + type Error = SimpleExtensionsError; + + fn parse(self, _ctx: &mut C) -> Result { + // let text::simple_extensions::SimpleExtensions { + // aggregate_functions, + // dependencies, + // scalar_functions, + // type_variations, + // types, + // window_functions, + // } = self; + + todo!("text::simple_extensions::SimpleExtensions - https://github.com/substrait-io/substrait-rs/issues/157") + } +} + +impl From for text::simple_extensions::SimpleExtensions { + fn from(_value: SimpleExtensions) -> Self { + todo!("text::simple_extensions::SimpleExtensions - https://github.com/substrait-io/substrait-rs/issues/157") + } +} diff --git a/src/parse/typed.rs b/src/parse/typed.rs new file mode 100644 index 0000000..991e7b5 --- /dev/null +++ b/src/parse/typed.rs @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: Apache-2.0 + +//! A generic new type wrapper. + +use std::{any, fmt, hash, marker::PhantomData}; + +/// A generic new type wrapper for values of type `T` that belong to items of type `U`. +pub struct Typed { + /// The wrapped value. + value: T, + /// The attached type. + _type: PhantomData, +} + +impl Typed { + /// Internal method to construct a new wrapper from a value. + pub(crate) fn new(value: T) -> Self { + Self { + value, + _type: PhantomData, + } + } + + /// Returns a reference to the wrapped value. + pub fn value(&self) -> &T { + &self.value + } + + /// Returns the inner value. + pub fn into_inner(self) -> T { + self.value + } +} + +impl, U, V: ?Sized> AsRef for Typed { + fn as_ref(&self) -> &V { + self.value.as_ref() + } +} + +impl Clone for Typed { + fn clone(&self) -> Self { + Self { + value: self.value.clone(), + _type: self._type, + } + } +} + +impl Copy for Typed {} + +impl fmt::Debug for Typed { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_tuple(any::type_name::()) + .field(&self.value) + .finish() + } +} + +impl fmt::Display for Typed { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(&self.value, f) + } +} + +impl PartialEq for Typed { + fn eq(&self, other: &Self) -> bool { + self.value == other.value + } +} + +impl Eq for Typed {} + +impl hash::Hash for Typed { + fn hash(&self, state: &mut H) { + self.value.hash(state); + } +} + +/// A generic anchor new type for the anchor mechanism used in Substrait data. +pub type Anchor = Typed;