Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add LET and DECLARE statements parsing in Snowflake PL/SQL procedures #548

Merged
merged 44 commits into from
Aug 2, 2024
Merged
Show file tree
Hide file tree
Changes from 34 commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
f96e914
Grammar for Declare and Let variable assignment
sundarshankar89 Jul 11, 2024
74868f2
refactor multable variable declartion
sundarshankar89 Jul 11, 2024
1340e75
improved let grammar, removed visitdeclare
sundarshankar89 Jul 11, 2024
9ab919c
Merge branch 'main' into feature/assignment_syntax
sundarshankar89 Jul 12, 2024
1e58d6c
Added Test for SetVariable
sundarshankar89 Jul 12, 2024
d5c2bd4
Added parser for basic procedure
sundarshankar89 Jul 14, 2024
65735c1
Added Implementation for basic sql procedure
sundarshankar89 Jul 15, 2024
4aeca8d
intermediate changes
sundarshankar89 Jul 15, 2024
2d177b5
intermediate changes
sundarshankar89 Jul 16, 2024
46c4c37
Merge branch 'main' into feature/assignment_syntax
sundarshankar89 Jul 17, 2024
a07b3e3
Merge branch 'main' into feature/assignment_syntax
sundarshankar89 Jul 17, 2024
2b4a746
fmt fixes
sundarshankar89 Jul 18, 2024
0621eb7
Merge branch 'main' into feature/assignment_syntax
sundarshankar89 Jul 18, 2024
1948da3
Merge branch 'main' into feature/assignment_syntax
sundarshankar89 Jul 22, 2024
dce626c
fmt fixes
sundarshankar89 Jul 22, 2024
ebfabba
fmt fixes
sundarshankar89 Jul 23, 2024
1a8b140
Merge branch 'main' into feature/assignment_syntax
sundarshankar89 Jul 23, 2024
9b95869
Merge branch 'main' into feature/assignment_syntax
sundarshankar89 Jul 23, 2024
51af95e
Intermediate commits
sundarshankar89 Jul 24, 2024
1753b53
Merge branch 'main' into feature/assignment_syntax
sundarshankar89 Jul 24, 2024
a6244ea
Intermediate commits
sundarshankar89 Jul 24, 2024
7ee5a87
Intermediate commits
sundarshankar89 Jul 25, 2024
7681382
Merge branch 'main' into feature/assignment_syntax
sundarshankar89 Jul 26, 2024
32aa586
Merge branch 'main' into feature/assignment_syntax
sundarshankar89 Jul 26, 2024
30152f0
Mapper ExpressionLogicalPlan
sundarshankar89 Jul 26, 2024
133c301
New Implementation
sundarshankar89 Jul 29, 2024
0aae5d9
Review Comments and fmt fixes
sundarshankar89 Jul 29, 2024
0068f70
Merge branch 'main' into feature/assignment_syntax
sundarshankar89 Jul 29, 2024
7c44ef3
Refactor
sundarshankar89 Jul 29, 2024
e26a13f
Refactor
sundarshankar89 Jul 29, 2024
f2d1302
fmt fixes
sundarshankar89 Jul 29, 2024
b66264b
fmt fixes
sundarshankar89 Jul 29, 2024
6208247
fmt fixes
sundarshankar89 Jul 29, 2024
8ebd978
fmt fixes
sundarshankar89 Jul 29, 2024
8e89334
Fixed
sundarshankar89 Jul 30, 2024
7de63df
Addressed Review Comments and added further Let and Declare Definitions
sundarshankar89 Jul 30, 2024
bb410ee
Removed RESULTSET from datatype
sundarshankar89 Jul 30, 2024
ac19a2a
Added more grammar for Declare and Let
sundarshankar89 Jul 30, 2024
506ae5c
Added ScalarSubquery
sundarshankar89 Jul 30, 2024
be8ad4d
Merge branch 'main' into feature/assignment_syntax
sundarshankar89 Jul 30, 2024
e5c41b1
Revisited ResultSet implementation.
sundarshankar89 Jul 31, 2024
bb62ee2
Merge branch 'main' into feature/assignment_syntax
sundarshankar89 Aug 1, 2024
d52db4e
fixes
sundarshankar89 Aug 2, 2024
37d87c9
fixes
sundarshankar89 Aug 2, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ DATE_FORMAT : 'DATE_FORMAT';
DATE_INPUT_FORMAT : 'DATE_INPUT_FORMAT';
DATE_OUTPUT_FORMAT : 'DATE_OUTPUT_FORMAT';
DAYS_TO_EXPIRY : 'DAYS_TO_EXPIRY';
DECLARE : 'DECLARE';
DEFAULT : 'DEFAULT';
DEFAULT_DDL_COLLATION_ : 'DEFAULT_DDL_COLLATION';
DEFAULT_NAMESPACE : 'DEFAULT_NAMESPACE';
Expand Down Expand Up @@ -392,6 +393,7 @@ LAST : 'LAST';
LAST_NAME : 'LAST_NAME';
LAST_QUERY_ID : 'LAST_QUERY_ID';
LATERAL : 'LATERAL';
LET : 'LET';
LEAD : 'LEAD';
LEFT : 'LEFT';
LENGTH : 'LENGTH';
Expand Down Expand Up @@ -591,6 +593,7 @@ RESTRICT : 'RESTRICT';
RESTRICTIONS : 'RESTRICTIONS';
RESULT : 'RESULT';
RESUME : 'RESUME';
RETURN : 'RETURN';
RETURNS : 'RETURNS';
RETURN_ALL_ERRORS : 'RETURN_ALL_ERRORS';
RETURN_ERRORS : 'RETURN_ERRORS';
Expand Down Expand Up @@ -845,6 +848,7 @@ NUMERIC : 'NUMERIC';
NVARCHAR2 : 'NVARCHAR2';
NVARCHAR : 'NVARCHAR';
REAL_ : 'REAL';
RESULTSET : 'RESULTSET';
SMALLINT : 'SMALLINT';
STRING_ : 'STRING';
TEXT : 'TEXT';
Expand Down Expand Up @@ -907,6 +911,7 @@ GE : '>=';
LT : '<';
LE : '<=';

ASSIGN : ':=';
PIPE_PIPE : '||';
DOT : '.';
AT : '@';
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,10 @@ otherCommand
| truncateMaterializedView
| truncateTable
| unset
| call
| beginTxn
| scriptingStatement
;

scriptingStatement: call | declareCommand | let | returnStatement | beginTxn
;

beginTxn: BEGIN (WORK | TRANSACTION)? (NAME id)? | START TRANSACTION ( NAME id)?
Expand All @@ -162,6 +164,15 @@ copyIntoTable
) R_PAREN files? pattern? fileFormat? copyOptions*
;

declareCommand: DECLARE declareStatement+
;

declareStatement
: id dataType SEMI
| id dataType DEFAULT L_PAREN selectStatement R_PAREN SEMI
| id dataType DEFAULT expr SEMI
;

externalLocation
: STRING
//(for Amazon S3)
Expand Down Expand Up @@ -199,6 +210,14 @@ formatName: FORMAT_NAME EQ string
formatType: TYPE EQ typeFileformat formatTypeOptions*
;

let
: LET key = id dataType? (ASSIGN | DEFAULT) value = expr SEMI
| key = id dataType? (ASSIGN | DEFAULT) value = expr SEMI
;

returnStatement: RETURN expr SEMI
;

stageFileFormat
: STAGE_FILE_FORMAT EQ L_PAREN FORMAT_NAME EQ string
| TYPE EQ typeFileformat formatTypeOptions+ R_PAREN
Expand Down Expand Up @@ -1601,16 +1620,22 @@ callerOwner: CALLER | OWNER
executaAs: EXECUTE AS callerOwner
;

procedureDefinition: string | DBL_DOLLAR
procedureBody: (let | call | executeImmediate | string)*
;

procedureDefinition: DBL_DOLLAR | declareCommand? BEGIN procedureBody returnStatement END SEMI
;

notNull: NOT NULL_
;

table_: TABLE (L_PAREN (colDecl (COMMA colDecl)*)? R_PAREN) | (functionCall)
;

createProcedure
: CREATE orReplace? PROCEDURE objectName L_PAREN (argDecl (COMMA argDecl)*)? R_PAREN RETURNS (
dataType
| TABLE L_PAREN (colDecl (COMMA colDecl)*)? R_PAREN
| table_
) notNull? LANGUAGE SQL (CALLED ON NULL_ INPUT | RETURNS NULL_ ON NULL_ INPUT | STRICT)? (
VOLATILE
| IMMUTABLE
Expand Down Expand Up @@ -2228,7 +2253,7 @@ taskOverlap: ALLOW_OVERLAPPING_EXECUTION EQ trueFalse
sql: EXECUTE IMMEDIATE DBL_DOLLAR | sqlCommand | call
;

call: CALL objectName L_PAREN exprList? R_PAREN
call: CALL objectName L_PAREN exprList? R_PAREN SEMI
;

createUser: CREATE orReplace? USER ifNotExists? id objectProperties? objectParams? sessionParams?
Expand Down Expand Up @@ -3133,6 +3158,7 @@ expr
| DISTINCT expr # exprDistinct
//Should be latest rule as it's nearly a catch all
| primitiveExpression # exprPrimitive
| parameterExpression # exprParameter
;

withinGroup: WITHIN GROUP L_PAREN orderByClause R_PAREN
Expand Down Expand Up @@ -3199,6 +3225,7 @@ dataType
| ARRAY
| GEOGRAPHY
| GEOMETRY
| RESULTSET
;

primitiveExpression
Expand All @@ -3210,8 +3237,11 @@ primitiveExpression
| BOTH_Q # primExprBoth
| ARRAY_Q # primExprArray
| OBJECT_Q # primExprObject
| COLON id # primVariable
;

parameterExpression: COLON id
;
overClause: OVER L_PAREN (PARTITION BY expr (COMMA expr)*)? windowOrderingAndFrame? R_PAREN
;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,3 +87,10 @@ case class WriteStreamOperationStart(
foreach_batch: Option[StreamingForeachFunction])
extends LeafNode
with Command

case class CreateVariable(name: String, dataType: DataType, defaultExpr: Option[Expression], replace: Boolean)
extends LeafNode
with Command

case class SetVariable(name: String, dataType: Option[DataType], value: Expression) extends LeafNode with Command

Original file line number Diff line number Diff line change
Expand Up @@ -86,18 +86,18 @@ case class Batch(children: Seq[LogicalPlan]) extends LogicalPlan {

case class FunctionParameter(name: String, dataType: DataType, defaultValue: Option[Expression])

sealed trait UDFRuntimeInfo
case class JavaUDFInfo(runtimeVersion: Option[String], imports: Seq[String], handler: String) extends UDFRuntimeInfo
case class PythonUDFInfo(runtimeVersion: Option[String], packages: Seq[String], handler: String) extends UDFRuntimeInfo
case object JavascriptUDFInfo extends UDFRuntimeInfo
case class ScalaUDFInfo(runtimeVersion: Option[String], imports: Seq[String], handler: String) extends UDFRuntimeInfo
case class SQLUDFInfo(memoizable: Boolean) extends UDFRuntimeInfo
sealed trait RuntimeInfo
case class JavaRuntimeInfo(runtimeVersion: Option[String], imports: Seq[String], handler: String) extends RuntimeInfo
case class PythonRuntimeInfo(runtimeVersion: Option[String], packages: Seq[String], handler: String) extends RuntimeInfo
case object JavaScriptRuntimeInfo extends RuntimeInfo
case class ScalaRuntimeInfo(runtimeVersion: Option[String], imports: Seq[String], handler: String) extends RuntimeInfo
case class SQLRuntimeInfo(memoizable: Boolean) extends RuntimeInfo

case class CreateInlineUDF(
name: String,
returnType: DataType,
parameters: Seq[FunctionParameter],
runtimeInfo: UDFRuntimeInfo,
runtimeInfo: RuntimeInfo,
acceptsNullParameters: Boolean,
comment: Option[String],
body: String)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package com.databricks.labs.remorph.parsers.snowflake

import com.databricks.labs.remorph.parsers.snowflake.SnowflakeParser._
import com.databricks.labs.remorph.parsers.{IncompleteParser, ParserCommon, intermediate => ir}

class SnowflakeCommandBuilder
extends SnowflakeParserBaseVisitor[ir.Command]
with ParserCommon[ir.Command]
with IncompleteParser[ir.Command] {

private val expressionBuilder = new SnowflakeExpressionBuilder

protected override def wrapUnresolvedInput(unparsedInput: String): ir.UnresolvedCommand =
ir.UnresolvedCommand(unparsedInput)

override def visitDeclareStatement(ctx: DeclareStatementContext): ir.Command = {
val variableName = ctx.id().getText
val dataType = DataTypeBuilder.buildDataType(ctx.dataType())
val variableValue = ctx.expr() match {
case null => None
case _ => Some(ctx.expr().accept(expressionBuilder))
}
ir.CreateVariable(variableName, dataType, variableValue, replace = false)
}

override def visitLet(ctx: LetContext): ir.Command = {
val variableName = ctx.id().getText
val variableDataType = Option(ctx.dataType()).flatMap(dt => Some(DataTypeBuilder.buildDataType(dt)))
val variableValue = ctx.expr().accept(expressionBuilder)

ir.SetVariable(variableName, variableDataType, variableValue)
}

}
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package com.databricks.labs.remorph.parsers.snowflake

import com.databricks.labs.remorph.parsers.intermediate.AddColumn
import com.databricks.labs.remorph.parsers.snowflake.SnowflakeParser.{StringContext => StrContext, _}
import com.databricks.labs.remorph.parsers.{IncompleteParser, ParserCommon, intermediate => ir}

Expand All @@ -22,9 +21,9 @@ class SnowflakeDDLBuilder
val runtimeInfo = ctx match {
case c if c.JAVA() != null => buildJavaUDF(c)
case c if c.PYTHON() != null => buildPythonUDF(c)
case c if c.JAVASCRIPT() != null => ir.JavascriptUDFInfo
case c if c.JAVASCRIPT() != null => ir.JavaScriptRuntimeInfo
case c if c.SCALA() != null => buildScalaUDF(c)
case c if c.SQL() != null || c.LANGUAGE() == null => ir.SQLUDFInfo(c.MEMOIZABLE() != null)
case c if c.SQL() != null || c.LANGUAGE() == null => ir.SQLRuntimeInfo(c.MEMOIZABLE() != null)
}
val name = ctx.objectName().getText
val returnType = DataTypeBuilder.buildDataType(ctx.dataType())
Expand All @@ -48,11 +47,11 @@ class SnowflakeDDLBuilder
case c if c.string() != null => extractString(c.string())
}).trim

private def buildJavaUDF(ctx: CreateFunctionContext): ir.UDFRuntimeInfo = buildJVMUDF(ctx)(ir.JavaUDFInfo.apply)
private def buildScalaUDF(ctx: CreateFunctionContext): ir.UDFRuntimeInfo = buildJVMUDF(ctx)(ir.ScalaUDFInfo.apply)
private def buildJavaUDF(ctx: CreateFunctionContext): ir.RuntimeInfo = buildJVMUDF(ctx)(ir.JavaRuntimeInfo.apply)
private def buildScalaUDF(ctx: CreateFunctionContext): ir.RuntimeInfo = buildJVMUDF(ctx)(ir.ScalaRuntimeInfo.apply)

private def buildJVMUDF(ctx: CreateFunctionContext)(
ctr: (Option[String], Seq[String], String) => ir.UDFRuntimeInfo): ir.UDFRuntimeInfo = {
ctr: (Option[String], Seq[String], String) => ir.RuntimeInfo): ir.RuntimeInfo = {
val imports =
ctx
.stringList()
Expand All @@ -69,15 +68,15 @@ class SnowflakeDDLBuilder
private def extractHandler(ctx: CreateFunctionContext): String =
Option(ctx.HANDLER()).flatMap(h => ctx.string().asScala.find(occursBefore(h, _))).map(extractString).get

private def buildPythonUDF(ctx: CreateFunctionContext): ir.PythonUDFInfo = {
private def buildPythonUDF(ctx: CreateFunctionContext): ir.PythonRuntimeInfo = {
val packages =
ctx
.stringList()
.asScala
.find(occursBefore(ctx.PACKAGES(0), _))
.map(_.string().asScala.map(extractString))
.getOrElse(Seq())
ir.PythonUDFInfo(extractRuntimeVersion(ctx), packages, extractHandler(ctx))
ir.PythonRuntimeInfo(extractRuntimeVersion(ctx), packages, extractHandler(ctx))
}

override def visitCreateTable(ctx: CreateTableContext): ir.Catalog = {
Expand Down Expand Up @@ -168,7 +167,7 @@ class SnowflakeDDLBuilder

private[snowflake] def buildColumnActions(ctx: TableColumnActionContext): Seq[ir.TableAlteration] = ctx match {
case c if c.ADD() != null =>
c.fullColDecl().asScala.map(buildColumnDeclaration).map(AddColumn.apply)
c.fullColDecl().asScala.map(buildColumnDeclaration).map(ir.AddColumn.apply)
case c if !c.alterColumnClause().isEmpty =>
c.alterColumnClause().asScala.map(buildColumnAlterations)
case c if c.DROP() != null =>
Expand Down Expand Up @@ -219,4 +218,5 @@ class SnowflakeDDLBuilder
affectedColumns.map(col => ir.DropConstraint(Some(col), constraint))
}
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
package com.databricks.labs.remorph.parsers.snowflake

import com.databricks.labs.remorph.parsers.intermediate._
import org.scalatest.matchers.should.Matchers
import org.scalatest.wordspec.AnyWordSpec
import org.scalatestplus.mockito.MockitoSugar

class SnowflakeCommandBuilderSpec
extends AnyWordSpec
with SnowflakeParserTestCommon
with Matchers
with MockitoSugar
with IRHelpers {

override protected def astBuilder: SnowflakeCommandBuilder =
new SnowflakeCommandBuilder

"translate Declare to CreateVariable Expression" in {
example(
"x number default 0;",
_.declareStatement(),
CreateVariable(
name = "x",
dataType = DecimalType(None, None),
defaultExpr = Some(Literal(short = Some(0))),
replace = false))

example(
"select_statement varchar;",
_.declareStatement(),
CreateVariable(name = "select_statement", dataType = VarCharType(None), defaultExpr = None, replace = false))

}

"translate Let to SetVariable expressions" in {
example("LET X := 1;", _.let(), SetVariable(name = "X", dataType = None, value = Literal(short = Some(1))))

example(
"select_statement := 'select * from table where id = ' || id;",
_.let(),
SetVariable(
name = "select_statement",
dataType = None,
value = Concat(Seq(Literal(string = Some("select * from table where id = ")), Id("id")))))

example(
"let price number(13,2) default 111.50;",
_.let(),
SetVariable(
name = "price",
dataType = Some(DecimalType(Some(13), Some(2))),
value = Literal(float = Some(111.5f))))

example(
"let price number(13,2) := 121.55;",
_.let(),
SetVariable(
name = "price",
dataType = Some(DecimalType(Some(13), Some(2))),
value = Literal(float = Some(121.55f))))

}

}
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class SnowflakeDDLBuilderSpec
name = "echo_varchar",
returnType = VarCharType(None),
parameters = Seq(FunctionParameter("x", VarCharType(None), None)),
JavaUDFInfo(
JavaRuntimeInfo(
runtimeVersion = None,
imports = Seq("@~/some-dir/some-lib.jar"),
handler = "TestFunc.echoVarchar"),
Expand Down Expand Up @@ -73,7 +73,7 @@ class SnowflakeDDLBuilderSpec
name = "py_udf",
returnType = UnparsedType(),
parameters = Seq(),
runtimeInfo = PythonUDFInfo(
runtimeInfo = PythonRuntimeInfo(
runtimeVersion = Some("3.8"),
packages = Seq("numpy", "pandas", "xgboost==1.5.0"),
handler = "udf"),
Expand Down Expand Up @@ -105,7 +105,7 @@ class SnowflakeDDLBuilderSpec
name = "js_factorial",
returnType = DoubleType,
parameters = Seq(FunctionParameter("d", DoubleType, None)),
runtimeInfo = JavascriptUDFInfo,
runtimeInfo = JavaScriptRuntimeInfo,
acceptsNullParameters = false,
comment = Some("Compute factorial using JavaScript"),
body = javascriptCode))
Expand Down Expand Up @@ -133,7 +133,7 @@ class SnowflakeDDLBuilderSpec
name = "echo_varchar",
returnType = VarCharType(None),
parameters = Seq(FunctionParameter("x", VarCharType(None), Some(Literal(string = Some("foo"))))),
runtimeInfo = ScalaUDFInfo(runtimeVersion = Some("2.12"), imports = Seq(), handler = "Echo.echoVarchar"),
runtimeInfo = ScalaRuntimeInfo(runtimeVersion = Some("2.12"), imports = Seq(), handler = "Echo.echoVarchar"),
acceptsNullParameters = true,
comment = None,
body = scalaCode))
Expand All @@ -151,7 +151,7 @@ class SnowflakeDDLBuilderSpec
parameters = Seq(
FunctionParameter("a", DecimalType(None, None), None),
FunctionParameter("b", DecimalType(None, None), None)),
runtimeInfo = SQLUDFInfo(memoizable = false),
runtimeInfo = SQLRuntimeInfo(memoizable = false),
acceptsNullParameters = false,
comment = Some("multiply two numbers"),
body = "a * b"))
Expand Down
Loading