-
Notifications
You must be signed in to change notification settings - Fork 36
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement TSQL specific function call mapper (#765)
Here, we implement the function call mapper (Rule) that allows us to make complex transformations from TSQL specific functions into Databricks compatible constructs. Transformations are sometimes argument reordering, and sometimes replacement with a different expression. In this PR we use the transformation of TSQL `DATEADD` to Databricks SQL `DATE_ADD`, `ADD_MONTHS` and `xxx + INTERVAL n {days|months|etc}` For example the TSQL: ```tsql SELECT DATEADD(hh, 7, col1) AS add_hours_col1 FROM tabl; ``` Translates to: ```sql SELECT col1 + INTERVAL 7 HOUR AS add_hours_col1 FROM tabl; ```
- Loading branch information
Showing
19 changed files
with
238 additions
and
17 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
83 changes: 83 additions & 0 deletions
83
core/src/main/scala/com/databricks/labs/remorph/parsers/tsql/rules/TSqlCallMapper.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
package com.databricks.labs.remorph.parsers.tsql.rules | ||
|
||
import com.databricks.labs.remorph.parsers.intermediate._ | ||
|
||
class TSqlCallMapper extends CallMapper { | ||
|
||
override def convert(call: Fn): Expression = { | ||
call match { | ||
case CallFunction("DATEADD", args) => | ||
processDateAdd(args) | ||
case x: CallFunction => super.convert(x) | ||
} | ||
} | ||
|
||
private def processDateAdd(args: Seq[Expression]): Expression = { | ||
|
||
// The first argument of the TSQL DATEADD function is the interval type, which is one of way too | ||
// many strings and aliases for "day", "month", "year", etc. We need to extract this string and | ||
// perform the translation based on what we get | ||
val interval = args.head match { | ||
case Column(_, id) => id.id.toLowerCase() | ||
case _ => | ||
throw new IllegalArgumentException("DATEADD interval type is not valid. Should be 'day', 'month', 'year', etc.") | ||
} | ||
|
||
// The value is how many units, type indicated by interval, to add to the date | ||
val value = args(1) | ||
|
||
// And this is the thing we are going to add the value to | ||
val objectReference = args(2) | ||
|
||
// The interval type names are all over the place in TSQL, some of them having names that | ||
// belie their actual function. | ||
interval match { | ||
|
||
// Days are all that Spark DATE_ADD operates on, but the arguments are transposed from TSQL | ||
// despite the fact that 'dayofyear' implies the number of the day in the year, it is in fact the | ||
// same as day, as is `weekday` | ||
case "day" | "dayofyear" | "dd" | "d" | "dy" | "y" | "weekday" | "dw" | "w" => | ||
DateAdd(objectReference, value) | ||
|
||
// Months are handled by the MonthAdd function, with arguments transposed from TSQL | ||
case "month" | "mm" | "m" => AddMonths(objectReference, value) | ||
|
||
// There is no equivalent to quarter in Spark, so we have to use the MonthAdd function and multiply by 3 | ||
case "quarter" | "qq" | "q" => AddMonths(objectReference, Multiply(value, Literal(3))) | ||
|
||
// There is no equivalent to year in Spark SQL, but we use months and multiply by 12 | ||
case "year" | "yyyy" | "yy" => AddMonths(objectReference, Multiply(value, Literal(12))) | ||
|
||
// Weeks are not supported in Spark SQL, but we can multiply by 7 to get the same effect with DATE_ADD | ||
case "week" | "wk" | "ww" => DateAdd(objectReference, Multiply(value, Literal(7))) | ||
|
||
// Hours are not supported in Spark SQL, but we can use the number of hours to create an INTERVAL | ||
// and add it to the object reference | ||
case "hour" | "hh" => Add(objectReference, KnownInterval(value, HOUR_INTERVAL)) | ||
|
||
// Minutes are not supported in Spark SQL, but we can use the number of minutes to create an INTERVAL | ||
// and add it to the object reference | ||
case "minute" | "mi" | "n" => Add(objectReference, KnownInterval(value, MINUTE_INTERVAL)) | ||
|
||
// Seconds are not supported in Spark SQL, but we can use the number of seconds to create an INTERVAL | ||
// and add it to the object reference | ||
case "second" | "ss" | "s" => Add(objectReference, KnownInterval(value, SECOND_INTERVAL)) | ||
|
||
// Milliseconds are not supported in Spark SQL, but we can use the number of milliseconds to create an INTERVAL | ||
// and add it to the object reference | ||
case "millisecond" | "ms" => Add(objectReference, KnownInterval(value, MILLISECOND_INTERVAL)) | ||
|
||
// Microseconds are not supported in Spark SQL, but we can use the number of microseconds to create an INTERVAL | ||
// and add it to the object reference | ||
case "microsecond" | "mcs" => Add(objectReference, KnownInterval(value, MICROSECOND_INTERVAL)) | ||
|
||
// Nanoseconds are not supported in Spark SQL, but we can use the number of nanoseconds to create an INTERVAL | ||
// and add it to the object reference | ||
case "nanosecond" | "ns" => Add(objectReference, KnownInterval(value, NANOSECOND_INTERVAL)) | ||
|
||
case _ => | ||
throw new IllegalArgumentException( | ||
s"DATEADD interval type '${interval}' is not valid. Should be 'day', 'month', 'year', etc.") | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
96 changes: 96 additions & 0 deletions
96
...src/test/scala/com/databricks/labs/remorph/parsers/tsql/TSqlExpressionGeneratorTest.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
package com.databricks.labs.remorph.parsers.tsql | ||
|
||
import com.databricks.labs.remorph.generators.sql.{ExpressionGenerator, GeneratorTestCommon} | ||
import com.databricks.labs.remorph.parsers.intermediate.IRHelpers | ||
import com.databricks.labs.remorph.parsers.tsql.rules.TSqlCallMapper | ||
import com.databricks.labs.remorph.parsers.{intermediate => ir} | ||
import org.scalatest.wordspec.AnyWordSpec | ||
import org.scalatestplus.mockito.MockitoSugar | ||
|
||
// Only add tests here that require the TSqlCallMapper, or in the future any other transformer/rule | ||
// that is specific to T-SQL. Otherwise they belong in ExpressionGeneratorTest. | ||
|
||
class TSqlExpressionGeneratorTest | ||
extends AnyWordSpec | ||
with GeneratorTestCommon[ir.Expression] | ||
with MockitoSugar | ||
with IRHelpers { | ||
|
||
override protected val generator = new ExpressionGenerator(new TSqlCallMapper) | ||
|
||
"DATEADD" should { | ||
"transpile to DATE_ADD" in { | ||
ir.CallFunction( | ||
"DATEADD", | ||
Seq(simplyNamedColumn("day"), ir.Literal(42.toShort), simplyNamedColumn("col1"))) generates "DATE_ADD(col1, 42)" | ||
|
||
ir.CallFunction( | ||
"DATEADD", | ||
Seq( | ||
simplyNamedColumn("week"), | ||
ir.Literal(42.toShort), | ||
simplyNamedColumn("col1"))) generates "DATE_ADD(col1, 42 * 7)" | ||
} | ||
|
||
"transpile to ADD_MONTHS" in { | ||
ir.CallFunction( | ||
"DATEADD", | ||
Seq( | ||
simplyNamedColumn("Month"), | ||
ir.Literal(42.toShort), | ||
simplyNamedColumn("col1"))) generates "ADD_MONTHS(col1, 42)" | ||
|
||
ir.CallFunction( | ||
"DATEADD", | ||
Seq( | ||
simplyNamedColumn("qq"), | ||
ir.Literal(42.toShort), | ||
simplyNamedColumn("col1"))) generates "ADD_MONTHS(col1, 42 * 3)" | ||
} | ||
|
||
"transpile to INTERVAL" in { | ||
ir.CallFunction( | ||
"DATEADD", | ||
Seq( | ||
simplyNamedColumn("hour"), | ||
ir.Literal(42.toShort), | ||
simplyNamedColumn("col1"))) generates "col1 + INTERVAL 42 HOUR" | ||
|
||
ir.CallFunction( | ||
"DATEADD", | ||
Seq( | ||
simplyNamedColumn("minute"), | ||
ir.Literal(42.toShort), | ||
simplyNamedColumn("col1"))) generates "col1 + INTERVAL 42 MINUTE" | ||
|
||
ir.CallFunction( | ||
"DATEADD", | ||
Seq( | ||
simplyNamedColumn("second"), | ||
ir.Literal(42.toShort), | ||
simplyNamedColumn("col1"))) generates "col1 + INTERVAL 42 SECOND" | ||
|
||
ir.CallFunction( | ||
"DATEADD", | ||
Seq( | ||
simplyNamedColumn("millisecond"), | ||
ir.Literal(42.toShort), | ||
simplyNamedColumn("col1"))) generates "col1 + INTERVAL 42 MILLISECOND" | ||
|
||
ir.CallFunction( | ||
"DATEADD", | ||
Seq( | ||
simplyNamedColumn("mcs"), | ||
ir.Literal(42.toShort), | ||
simplyNamedColumn("col1"))) generates "col1 + INTERVAL 42 MICROSECOND" | ||
|
||
ir.CallFunction( | ||
"DATEADD", | ||
Seq( | ||
simplyNamedColumn("ns"), | ||
ir.Literal(42.toShort), | ||
simplyNamedColumn("col1"))) generates "col1 + INTERVAL 42 NANOSECOND" | ||
} | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters