Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: implement breaking changes and deprecations for Substrait 0.5.0 #24

Merged
merged 2 commits into from
Jul 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,25 @@ This repository contains an EXPERIMENTAL validator for
Rust, but bindings are available for Python and C. Other languages may use the
C API via their respective foreign function interface systems.

Substrait version support
-------------------------

Currently, each version of the validator only targets a subset of the available
Substrait versions. Whenever Substrait makes a breaking change that affects
validation, the validator will be updated accordingly and drop support for the
older version. Refer to the table below for the version compatibility matrix.

| Substrait... | ... is supported by validator ... |
| ------------- | ------------------------------------ |
| 0.7.x and up | not yet supported |
| 0.5.x - 0.6.x | current version |
| 0.3.x - 0.4.x | 0.0.4 - 0.0.5 |
| older | try 0.0.1, but your mileage may vary |

As Substrait and the validator stabilize and breaking changes become less
frequent, the intention is to support more versions within a single validator
version.

Command-line interface
----------------------

Expand Down
71 changes: 28 additions & 43 deletions rs/src/parse/relations/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,42 +101,42 @@ fn parse_path_type(
}
}

/// Parse file format, yielding a string description of the format if known.
fn parse_file_format(
x: &substrait::read_rel::local_files::file_or_files::FileFormat,
y: &mut context::Context,
) -> diagnostic::Result<Option<String>> {
Ok(match x {
substrait::read_rel::local_files::file_or_files::FileFormat::Parquet(_) => {
Some(String::from("Parquet"))
}
substrait::read_rel::local_files::file_or_files::FileFormat::Arrow(_) => {
Some(String::from("Arrow IPC"))
}
substrait::read_rel::local_files::file_or_files::FileFormat::Orc(_) => {
Some(String::from("Orc"))
}
substrait::read_rel::local_files::file_or_files::FileFormat::Extension(x) => {
extensions::advanced::parse_functional_any(x, y)?;
None
}
})
}

/// Parse file entry.
fn parse_file_or_files(
x: &substrait::read_rel::local_files::FileOrFiles,
y: &mut context::Context,
extension_present: bool,
) -> diagnostic::Result<()> {
// Parse path.
let multiple = proto_required_field!(x, y, path_type, parse_path_type)
.1
.unwrap_or_default();

// Parse read configuration.
let format = proto_enum_field!(
x,
y,
format,
substrait::read_rel::local_files::file_or_files::FileFormat,
|x, y| {
if !extension_present
&& matches!(
x,
substrait::read_rel::local_files::file_or_files::FileFormat::Unspecified
)
{
diagnostic!(
y,
Error,
IllegalValue,
"file format must be specified when no enhancement extension is present"
);
}
Ok(*x)
}
)
.1
.unwrap_or_default();
let format = proto_required_field!(x, y, file_format, parse_file_format)
.1
.flatten();
proto_primitive_field!(x, y, partition_index);
proto_primitive_field!(x, y, start);
proto_primitive_field!(x, y, length);
Expand Down Expand Up @@ -177,11 +177,8 @@ fn parse_file_or_files(
}
summary!(y, "a single");
}
match format {
substrait::read_rel::local_files::file_or_files::FileFormat::Unspecified => {}
substrait::read_rel::local_files::file_or_files::FileFormat::Parquet => {
summary!(y, "Parquet");
}
if let Some(format) = format {
summary!(y, "{}", format);
}
if multiple {
summary!(y, "files");
Expand All @@ -198,19 +195,7 @@ fn parse_local_files(
y: &mut context::Context,
) -> diagnostic::Result<SourceInfo> {
// Parse fields.
let extension_present = x
.advanced_extension
.as_ref()
.and_then(|x| x.enhancement.as_ref())
.is_some();
proto_required_repeated_field!(
x,
y,
items,
parse_file_or_files,
|_, _, _, _, _| (),
extension_present
);
proto_required_repeated_field!(x, y, items, parse_file_or_files, |_, _, _, _, _| (),);
proto_field!(
x,
y,
Expand Down
67 changes: 62 additions & 5 deletions rs/src/parse/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -436,10 +436,15 @@ pub fn parse_map(x: &substrait::r#type::Map, y: &mut context::Context) -> diagno
Ok(())
}

/// Parses a user-defined type.
pub fn parse_user_defined(x: &u32, y: &mut context::Context) -> diagnostic::Result<()> {
// FIXME: why can't user-defined types also have type variations associated
// with them?
/// Parses a pre-0.5.0 user-defined type.
pub fn parse_legacy_user_defined(x: &u32, y: &mut context::Context) -> diagnostic::Result<()> {
// Signal the deprecation.
diagnostic!(
y,
Warning,
Deprecation,
"this field was deprecated in favor of user_defined in Substrait 0.5.0"
);

// Parse fields.
let user_type = extensions::simple::parse_type_reference(x, y)
Expand All @@ -466,6 +471,57 @@ pub fn parse_user_defined(x: &u32, y: &mut context::Context) -> diagnostic::Resu
Ok(())
}

/// Parses a 0.6.0+ user-defined type.
pub fn parse_user_defined(
x: &substrait::r#type::UserDefined,
y: &mut context::Context,
) -> diagnostic::Result<()> {
// Parse fields.
let user_type = proto_primitive_field!(
x,
y,
type_reference,
extensions::simple::parse_type_reference
)
.1;
let nullable = proto_enum_field!(
x,
y,
nullability,
substrait::r#type::Nullability,
parse_required_nullability
)
.1;
let variation = proto_primitive_field!(
x,
y,
type_variation_reference,
extensions::simple::parse_type_variation_reference
)
.1;

// Convert to internal type object.
let data_type = if let (Some(user_type), Some(nullable), Some(variation)) =
(user_type, nullable, variation)
{
data_type::DataType::new(
data_type::Class::UserDefined(user_type),
nullable,
variation,
vec![],
)
.map_err(|e| diagnostic!(y, Error, e))
.unwrap_or_default()
} else {
Arc::default()
};

// Attach the type to the node.
y.set_data_type(data_type);

Ok(())
}

/// Parses a type kind.
pub fn parse_type_kind(
x: &substrait::r#type::Kind,
Expand Down Expand Up @@ -495,7 +551,8 @@ pub fn parse_type_kind(
substrait::r#type::Kind::Struct(x) => parse_struct(x, y),
substrait::r#type::Kind::List(x) => parse_list(x, y),
substrait::r#type::Kind::Map(x) => parse_map(x, y),
substrait::r#type::Kind::UserDefinedTypeReference(x) => parse_user_defined(x, y),
substrait::r#type::Kind::UserDefinedTypeReference(x) => parse_legacy_user_defined(x, y),
substrait::r#type::Kind::UserDefined(x) => parse_user_defined(x, y),
}
}

Expand Down
2 changes: 1 addition & 1 deletion rs/src/resources/substrait-version
Original file line number Diff line number Diff line change
@@ -1 +1 @@
v0.3.0
v0.6.0
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: read-files-basic
name: read-files-arrow
plan:
__test: [ level: i ]
relations:
Expand All @@ -13,4 +13,4 @@ plan:
localFiles:
items:
- uriPath: "1/2/3"
format: FILE_FORMAT_PARQUET
arrow: {}
3 changes: 1 addition & 2 deletions tests/tests/relations/read/file-table/extension-format.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@ plan:
localFiles:
items:
- uriPath: "1/2/3"
advancedExtension:
enhancement:
extension:
"@type": substrait.Plan
expectedTypeUrls:
- substrait.Plan
2 changes: 1 addition & 1 deletion tests/tests/relations/read/file-table/missing-format.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@ plan:
localFiles:
items:
- uriPath: "1/2/3"
format__test: [ diag: { level: e, code: 2, msg: "*file format must be specified*" } ]
__test: [ diag: { level: e, code: 1002, msg: "*file_format*" } ]
16 changes: 16 additions & 0 deletions tests/tests/relations/read/file-table/orc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
name: read-files-orc
plan:
__test: [ level: i ]
relations:
- rel:
read:
baseSchema:
names: [a]
struct:
nullability: NULLABILITY_REQUIRED
types:
- string: { nullability: NULLABILITY_REQUIRED }
localFiles:
items:
- uriPath: "1/2/3"
orc: {}
16 changes: 16 additions & 0 deletions tests/tests/relations/read/file-table/parquet.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
name: read-files-parquet
plan:
__test: [ level: i ]
relations:
- rel:
read:
baseSchema:
names: [a]
struct:
nullability: NULLABILITY_REQUIRED
types:
- string: { nullability: NULLABILITY_REQUIRED }
localFiles:
items:
- uriPath: "1/2/3"
parquet: {}
8 changes: 4 additions & 4 deletions tests/tests/relations/read/file-table/partial.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,17 @@ plan:
localFiles:
items:
- uriFile: "a/b/c"
format: FILE_FORMAT_PARQUET
parquet: {}
partitionIndex: 3
- uriFolder: "a/b/c"
format: FILE_FORMAT_PARQUET
parquet: {}
partitionIndex: 3
- uriFile: "a/b/c"
format: FILE_FORMAT_PARQUET
parquet: {}
start: 10
length: 20
- uriFolder: "a/b/c"
format: FILE_FORMAT_PARQUET
parquet: {}
start: 10
length: 20
__test:
Expand Down
32 changes: 16 additions & 16 deletions tests/tests/relations/read/file-table/uri-validation.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,37 +14,37 @@ plan:
items:
- uriPath: "not a valid URI"
uriPath__test: [ diag: { level: e, code: 4, msg: "*invalid path character*" } ]
format: FILE_FORMAT_PARQUET
parquet: {}
- uriPath: "a%20valid%20relative%20URI"
format: FILE_FORMAT_PARQUET
parquet: {}
- uriPath: "/path/to/local/file"
format: FILE_FORMAT_PARQUET
parquet: {}
- uriPath: "file:///path/to/local/file"
format: FILE_FORMAT_PARQUET
parquet: {}
- uriPath: "protocol://with.an.authority/path/goes/here"
format: FILE_FORMAT_PARQUET
parquet: {}
- uriPath: "protocol:urn"
format: FILE_FORMAT_PARQUET
parquet: {}
- uriPath: 'C:\windows\paths\are\not\uris'
uriPath__test: [ diag: { level: e, code: 4, msg: "*invalid path character*" } ]
format: FILE_FORMAT_PARQUET
parquet: {}
- uriPath: 'file://C:/write/them/like/this'
format: FILE_FORMAT_PARQUET
parquet: {}
- uriPath: 'C:/or/like/this'
format: FILE_FORMAT_PARQUET
parquet: {}
- uriPathGlob: '/can/have/*/and/?/in/path/globs'
format: FILE_FORMAT_PARQUET
parquet: {}
- uriPathGlob: 'file:///can/have/*/and/?/in/path/globs'
format: FILE_FORMAT_PARQUET
parquet: {}
- uriPathGlob: '/character/classes/must/be/escaped/[cls]'
uriPathGlob__test: [ diag: { level: e, code: 4, msg: "*invalid path character*" } ]
format: FILE_FORMAT_PARQUET
parquet: {}
- uriPathGlob: '/character/classes/must/be/escaped/%5Bcls%5D'
format: FILE_FORMAT_PARQUET
parquet: {}
- uriPathGlob: '/invalid/glob/syntax/%5Dcls%5B'
uriPathGlob__test: [ diag: { level: e, code: 5, msg: "*invalid range pattern*" } ]
format: FILE_FORMAT_PARQUET
parquet: {}
- uriFile: "/path/to/local/file"
format: FILE_FORMAT_PARQUET
parquet: {}
- uriFolder: "/path/to/local/folder"
format: FILE_FORMAT_PARQUET
parquet: {}