Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add antlr grammar for test file format #728

Merged
merged 6 commits into from
Nov 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .flake8
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
ignore = E203, E266, E501, W503, F403, F401
max-line-length = 88
select = B,C,E,F,W,T4,B9
exclude = tests/coverage/antlr_parser/*.py
8 changes: 8 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,11 @@ repos:
rev: 6.1.0
hooks:
- id: flake8
- repo: local
hooks:
- id: check-substrait-extensions
name: Check Substrait extensions
entry: pytest tests/test_extensions.py::test_read_substrait_extensions
language: python
pass_filenames: false

104 changes: 104 additions & 0 deletions grammar/FuncTestCaseLexer.g4
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
lexer grammar FuncTestCaseLexer;

import SubstraitLexer;

options {
caseInsensitive = true;
}

Whitespace : [ \t\n\r]+ -> channel(HIDDEN) ;

TripleHash: '###';
SubstraitScalarTest: 'SUBSTRAIT_SCALAR_TEST';
SubstraitInclude: 'SUBSTRAIT_INCLUDE';

FormatVersion
: 'v' DIGIT+ ('.' DIGIT+)?
;

DescriptionLine
: '# ' ~[\r\n]* '\r'? '\n'
;

ErrorResult: '<!ERROR>';
UndefineResult: '<!UNDEFINED>';
Overflow: 'OVERFLOW';
Rounding: 'ROUNDING';
Error: 'ERROR';
Saturate: 'SATURATE';
Silent: 'SILENT';
TieToEven: 'TIE_TO_EVEN';
NaN: 'NAN';

IntegerLiteral
: [+-]? Int
;

DecimalLiteral
: [+-]? [0-9]+ ('.' [0-9]+)?
;

FloatLiteral
: [+-]? [0-9]+ ('.' [0-9]*)? ( 'E' [+-]? [0-9]+ )?
| [+-]? 'inf'
| 'snan'
;

BooleanLiteral
: 'true' | 'false'
;

fragment FourDigits: [0-9][0-9][0-9][0-9];
fragment TwoDigits: [0-9][0-9];

TimestampTzLiteral
: '\'' FourDigits '-' TwoDigits '-' TwoDigits 'T' TwoDigits ':' TwoDigits ':' TwoDigits ( '.' [0-9]+ )?
[+-] TwoDigits ':' TwoDigits '\''
;

TimestampLiteral
: '\'' FourDigits '-' TwoDigits '-' TwoDigits 'T' TwoDigits ':' TwoDigits ':' TwoDigits ( '.' [0-9]+ )? '\''
;

TimeLiteral
: '\'' TwoDigits ':' TwoDigits ':' TwoDigits ( '.' [0-9]+ )? '\''
;

DateLiteral
: '\'' FourDigits '-' TwoDigits '-' TwoDigits '\''
;

PeriodPrefix: 'P';
TimePrefix: 'T';
YearPrefix: 'Y';
MSuffix: 'M'; // used for both months and minutes
DaySuffix: 'D';
HourSuffix: 'H';
SecondSuffix: 'S';
FractionalSecondSuffix: 'F';
OAngleBracket: Lt;
CAngleBracket: Gt;

IntervalYearLiteral
: '\'' PeriodPrefix IntegerLiteral YearPrefix (IntegerLiteral MSuffix)? '\''
| '\'' PeriodPrefix IntegerLiteral MSuffix '\''
;

IntervalDayLiteral
: '\'' PeriodPrefix IntegerLiteral DaySuffix (TimePrefix TimeInterval)? '\''
| '\'' PeriodPrefix TimePrefix TimeInterval '\''
;

fragment TimeInterval
: IntegerLiteral HourSuffix (IntegerLiteral MSuffix)? (IntegerLiteral SecondSuffix)?
(IntegerLiteral FractionalSecondSuffix)?
| IntegerLiteral MSuffix (IntegerLiteral SecondSuffix)? (IntegerLiteral FractionalSecondSuffix)?
| IntegerLiteral SecondSuffix (IntegerLiteral FractionalSecondSuffix)?
| IntegerLiteral FractionalSecondSuffix
;

NullLiteral: 'null';

StringLiteral
: '\'' ('\\' . | '\'\'' | ~['\\])* '\''
;
215 changes: 215 additions & 0 deletions grammar/FuncTestCaseParser.g4
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
parser grammar FuncTestCaseParser;

options {
caseInsensitive = true;
tokenVocab=SubstraitLexer;
tokenVocab=FuncTestCaseLexer;
}

doc
: header testGroup+ EOF
;

header
: version include
;

version
: TripleHash SubstraitScalarTest Colon FormatVersion
;

include
: TripleHash SubstraitInclude Colon StringLiteral (Comma StringLiteral)*
;

testGroupDescription
: DescriptionLine
;

testCase
: functionName=Identifier OParen arguments CParen ( OBracket func_options CBracket )? Eq result
;

testGroup
: testGroupDescription (testCase)+
;

arguments
: argument (Comma argument)*
;

result
: argument
| substraitError
;

argument
: nullArg
| intArg
| floatArg
| booleanArg
| stringArg
| decimalArg
| dateArg
| timeArg
| timestampArg
| timestampTzArg
| intervalYearArg
| intervalDayArg
;

numericLiteral
: DecimalLiteral | IntegerLiteral | floatLiteral
;

floatLiteral
: FloatLiteral | NaN
;

nullArg: NullLiteral DoubleColon datatype;

intArg: IntegerLiteral DoubleColon (I8 | I16 | I32 | I64);

floatArg: numericLiteral DoubleColon (FP32 | FP64);

decimalArg
: numericLiteral DoubleColon decimalType
;

booleanArg
: BooleanLiteral DoubleColon Bool
;

stringArg
: StringLiteral DoubleColon Str
;

dateArg
: DateLiteral DoubleColon Date
;

timeArg
: TimeLiteral DoubleColon Time
;

timestampArg
: TimestampLiteral DoubleColon Ts
;

timestampTzArg
: TimestampTzLiteral DoubleColon TsTZ
;

intervalYearArg
: IntervalYearLiteral DoubleColon IYear
;

intervalDayArg
: IntervalDayLiteral DoubleColon IDay
;

intervalYearLiteral
: PeriodPrefix (years=IntegerLiteral YearPrefix) (months=IntegerLiteral MSuffix)?
| PeriodPrefix (months=IntegerLiteral MSuffix)
;

intervalDayLiteral
: PeriodPrefix (days=IntegerLiteral DaySuffix) (TimePrefix timeInterval)?
| PeriodPrefix TimePrefix timeInterval
;

timeInterval
: hours=IntegerLiteral HourSuffix (minutes=IntegerLiteral MSuffix)? (seconds=IntegerLiteral SecondSuffix)?
(fractionalSeconds=IntegerLiteral FractionalSecondSuffix)?
| minutes=IntegerLiteral MSuffix (seconds=IntegerLiteral SecondSuffix)? (fractionalSeconds=IntegerLiteral FractionalSecondSuffix)?
| seconds=IntegerLiteral SecondSuffix (fractionalSeconds=IntegerLiteral FractionalSecondSuffix)?
| fractionalSeconds=IntegerLiteral FractionalSecondSuffix
;

datatype
: scalarType
| parameterizedType
;

scalarType
: Bool #Boolean
| I8 #i8
| I16 #i16
| I32 #i32
| I64 #i64
| FP32 #fp32
| FP64 #fp64
| Str #string
| Binary #binary
| Ts #timestamp
| TsTZ #timestampTz
| Date #date
| Time #time
| IDay #intervalDay
| IYear #intervalYear
| UUID #uuid
| UserDefined Identifier #userDefined
;

fixedCharType
: FChar isnull=QMark? OAngleBracket len=numericParameter CAngleBracket #fixedChar
;

varCharType
: VChar isnull=QMark? OAngleBracket len=numericParameter CAngleBracket #varChar
;

fixedBinaryType
: FBin isnull=QMark? OAngleBracket len=numericParameter CAngleBracket #fixedBinary
;

decimalType
: Dec isnull=QMark? (OAngleBracket precision=numericParameter Comma scale=numericParameter CAngleBracket)? #decimal
;

precisionTimestampType
: PTs isnull=QMark? OAngleBracket precision=numericParameter CAngleBracket #precisionTimestamp
;

precisionTimestampTZType
: PTsTZ isnull=QMark? OAngleBracket precision=numericParameter CAngleBracket #precisionTimestampTZ
;

parameterizedType
: fixedCharType
| varCharType
| fixedBinaryType
| decimalType
| precisionTimestampType
| precisionTimestampTZType
// TODO implement the rest of the parameterized types
// | Struct isnull='?'? Lt expr (Comma expr)* Gt #struct
// | NStruct isnull='?'? Lt Identifier expr (Comma Identifier expr)* Gt #nStruct
// | List isnull='?'? Lt expr Gt #list
// | Map isnull='?'? Lt key=expr Comma value=expr Gt #map
;

numericParameter
: IntegerLiteral #integerLiteral
;

substraitError
: ErrorResult | UndefineResult
;

func_option
: option_name Colon option_value
;

option_name
: Overflow | Rounding
| Identifier
;

option_value
: Error | Saturate | Silent | TieToEven | NaN
;

func_options
: func_option (Comma func_option)*
;
15 changes: 15 additions & 0 deletions grammar/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
ANTLR_JAR=antlr-4.13.2-complete.jar
TYPE_GRAMMAR=SubstraitLexer.g4 SubstraitType.g4
TYPE_OUTPUT_DIR=../tests/type/antlr_parser
TESTCASE_GRAMMAR=FuncTestCaseLexer.g4 FuncTestCaseParser.g4
TESTCASE_OUTPUT_DIR=../tests/coverage/antlr_parser

generate_testcase_parser:
java -jar $(ANTLR_JAR) -visitor -Dlanguage=Python3 -o $(TESTCASE_OUTPUT_DIR) $(TESTCASE_GRAMMAR)

generate_type_parser:
java -jar $(ANTLR_JAR) -visitor -Dlanguage=Python3 -o $(TYPE_OUTPUT_DIR) $(TYPE_GRAMMAR)

clean:
rm -rf $(TYPE_OUTPUT_DIR)/*.py $(TYPE_OUTPUT_DIR)/*.tokens $(TYPE_OUTPUT_DIR)/*.interp
rm -rf $(TESTCASE_OUTPUT_DIR)/*.py $(TESTCASE_OUTPUT_DIR)/*.tokens $(TESTCASE_OUTPUT_DIR)/*.interp
Empty file added tests/__init__.py
Empty file.
31 changes: 31 additions & 0 deletions tests/cases/arithmetic/add.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
### SUBSTRAIT_SCALAR_TEST: v1.0
### SUBSTRAIT_INCLUDE: '/extensions/functions_arithmetic.yaml'

# basic: Basic examples without any special cases
add(120::i8, 5::i8) = 125::i8
add(100::i16, 100::i16) = 200::i16
add(30000::i32, 30000::i32) = 60000::i32
add(2000000000::i64, 2000000000::i64) = 4000000000::i64

# overflow: Examples demonstrating overflow behavior
add(120::i8, 10::i8) [overflow:ERROR] = <!ERROR>
add(30000::i16, 30000::i16) [overflow:ERROR] = <!ERROR>
add(2000000000::i32, 2000000000::i32) [overflow:ERROR] = <!ERROR>
add(9223372036854775807::i64, 1::i64) [overflow:ERROR] = <!ERROR>

# overflow: Examples demonstrating overflow behavior tests: overflow with SATURATE
add(120::i8, 10::i8) [overflow:SATURATE] = 127::i8
add(-120::i8, -10::i8) [overflow:SATURATE] = -128::i8

# overflow: Examples demonstrating overflow behavior tests: overflow with SILENT
add(120::i8, 10::i8) [overflow:SILENT] = <!UNDEFINED>

# floating_exception: Examples demonstrating exceptional floating point cases
add(1.5e+308::fp64, 1.5e+308::fp64) = inf::fp64
add(-1.5e+308::fp64, -1.5e+308::fp64) = -inf::fp64

# rounding: Examples demonstrating floating point rounding behavior
add(4.5::fp32, 2.500001::fp32) [rounding:TIE_TO_EVEN] = 7.000001::fp32

# types: Examples demonstrating behavior of different data types
add(4.5::fp64, 2.5000007152557373::fp64) = 7.00000071525573::fp64
Loading
Loading