From 0b52b26db1487072081d5dfe1a2573c730d09b02 Mon Sep 17 00:00:00 2001 From: Jordan Vieler Date: Tue, 12 Dec 2023 14:01:23 -0600 Subject: [PATCH 01/13] fix: Renamed modulus to modulo. Updated modulo operator defintion Renamed modulus to modulo as modulus is the length of a vector and this function describes calculating the remainder after interger division. Added options and documentation for the modulo operator as it is not consistently defined across mathematics and results are implementation/patform dependent. Refs: #353 --- extensions/functions_arithmetic.yaml | 58 +++++++++++++++++++++++++++- 1 file changed, 56 insertions(+), 2 deletions(-) diff --git a/extensions/functions_arithmetic.yaml b/extensions/functions_arithmetic.yaml index 74a3f990e..bdedf62f6 100644 --- a/extensions/functions_arithmetic.yaml +++ b/extensions/functions_arithmetic.yaml @@ -284,32 +284,86 @@ scalar_functions: value: fp64 return: fp64 - - name: "modulus" - description: "Get the remainder when dividing one value by another." + name: "modulo" + description: > + Calculate the remainder (r) when dividing dividend (x) by divisor (y). + + In mathematics, many conventions for the modulo operation exists. In computing the result of a modulo operation depends on the software implementation + and underlying hardware. Substrait is a format for describing compute operations on structured data and designed for + interoperability. Therefore the user is responsible for determining a definition of division as defined by the quotient (y). + + The following basic conditions of division are satisfied: + (1) q ∈ ℤ (the quotient is an integer) + (2) x = y * q + r (division rule) + (3) abs(r) < abs(y) + where q is the quotient. + + The `quotient` option determines the mathematical definition of quotient to use in the above definition of division. + When `quotient`=TRUNCATE, q = trunc(x/y). + When `quotient`=FLOOR, q = floor(x/y). + When `quotient`=CEILING, q = ceil(x/y). + When `quotient`=ROUND, q = round(x/y), where the round function is rounding half to even. + When `quotient`=EUCLIDIAN, q = sign(y) * floor(x/abs(y)) + + In the case of TRUNCATE, FLOOR, AND CEILING division: remainder r = x - round_func(x/y) + In the case of EUCLIDIAN division: remainder r = x - abs(y)*floor(x/abs(y)) + + The `on_domain_error` option governs behavior in cases where y is 0, y is +/-inf, or x is +/-inf. + Therefore, modulus is undefined. + The `overflow` option governs behavior when integer overflow occurs. + If x and y are both 0 or both +/-infinity, behavior will be governed by `on_domain_error`. impls: - args: - name: x value: i8 - name: y value: i8 + options: + quotient: + values: [ TRUNCATE, FLOOR, CEILING, ROUND, EUCLIDEAN ] + overflow: + values: [ SILENT, SATURATE, ERROR ] + on_domain_error: + values: [ NAN, ERROR ] return: i8 - args: - name: x value: i16 - name: y value: i16 + options: + quotient: + values: [ TRUNCATE, FLOOR, CEILING, ROUND, EUCLIDEAN ] + overflow: + values: [ SILENT, SATURATE, ERROR ] + on_domain_error: + values: [ NAN, ERROR ] return: i16 - args: - name: x value: i32 - name: y value: i32 + options: + quotient: + values: [ TRUNCATE, FLOOR, CEILING, ROUND, EUCLIDEAN ] + overflow: + values: [ SILENT, SATURATE, ERROR ] + on_domain_error: + values: [ NAN, ERROR ] return: i32 - args: - name: x value: i64 - name: y value: i64 + options: + quotient: + values: [ TRUNCATE, FLOOR, CEILING, ROUND, EUCLIDEAN ] + overflow: + values: [ SILENT, SATURATE, ERROR ] + on_domain_error: + values: [ NAN, ERROR ] return: i64 - name: "power" From 6f5a98670620ac7b6d1bb20f7c5911e36538df57 Mon Sep 17 00:00:00 2001 From: Jordan Vieler Date: Tue, 12 Dec 2023 15:38:37 -0600 Subject: [PATCH 02/13] fix: corrected line length, changed modulus to modulo --- extensions/functions_arithmetic.yaml | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/extensions/functions_arithmetic.yaml b/extensions/functions_arithmetic.yaml index bdedf62f6..3392afa38 100644 --- a/extensions/functions_arithmetic.yaml +++ b/extensions/functions_arithmetic.yaml @@ -288,9 +288,10 @@ scalar_functions: description: > Calculate the remainder (r) when dividing dividend (x) by divisor (y). - In mathematics, many conventions for the modulo operation exists. In computing the result of a modulo operation depends on the software implementation - and underlying hardware. Substrait is a format for describing compute operations on structured data and designed for - interoperability. Therefore the user is responsible for determining a definition of division as defined by the quotient (y). + In mathematics, many conventions for the modulo operation exists. In computing the result of a modulo operation + depends on the software implementation and underlying hardware. Substrait is a format for describing compute + operations on structured data and designed for interoperability. Therefore the user is responsible for determining + a definition of division as defined by the quotient (y). The following basic conditions of division are satisfied: (1) q ∈ ℤ (the quotient is an integer) @@ -298,7 +299,9 @@ scalar_functions: (3) abs(r) < abs(y) where q is the quotient. - The `quotient` option determines the mathematical definition of quotient to use in the above definition of division. + The `quotient` option determines the mathematical definition of quotient to use in the above definition of + division. + When `quotient`=TRUNCATE, q = trunc(x/y). When `quotient`=FLOOR, q = floor(x/y). When `quotient`=CEILING, q = ceil(x/y). @@ -308,8 +311,8 @@ scalar_functions: In the case of TRUNCATE, FLOOR, AND CEILING division: remainder r = x - round_func(x/y) In the case of EUCLIDIAN division: remainder r = x - abs(y)*floor(x/abs(y)) - The `on_domain_error` option governs behavior in cases where y is 0, y is +/-inf, or x is +/-inf. - Therefore, modulus is undefined. + The `on_domain_error` option governs behavior in cases where y is 0, y is +/-inf, or x is +/-inf. In these cases + modulo is undefined. The `overflow` option governs behavior when integer overflow occurs. If x and y are both 0 or both +/-infinity, behavior will be governed by `on_domain_error`. impls: From debc8f08f3e3b016a36d0d53714c2cf044659d1b Mon Sep 17 00:00:00 2001 From: Jordan Vieler Date: Thu, 14 Dec 2023 08:41:05 -0600 Subject: [PATCH 03/13] fix: renaimed quotient to division_type for clarity. removed CEILING, EUCLIDIAN, and ROUND --- extensions/functions_arithmetic.yaml | 31 ++++++++++++---------------- 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/extensions/functions_arithmetic.yaml b/extensions/functions_arithmetic.yaml index 3392afa38..6bdffcb60 100644 --- a/extensions/functions_arithmetic.yaml +++ b/extensions/functions_arithmetic.yaml @@ -299,17 +299,13 @@ scalar_functions: (3) abs(r) < abs(y) where q is the quotient. - The `quotient` option determines the mathematical definition of quotient to use in the above definition of + The `division_type` option determines the mathematical definition of quotient to use in the above definition of division. - When `quotient`=TRUNCATE, q = trunc(x/y). - When `quotient`=FLOOR, q = floor(x/y). - When `quotient`=CEILING, q = ceil(x/y). - When `quotient`=ROUND, q = round(x/y), where the round function is rounding half to even. - When `quotient`=EUCLIDIAN, q = sign(y) * floor(x/abs(y)) + When `division_type`=TRUNCATE, q = trunc(x/y). + When `division_type`=FLOOR, q = floor(x/y). - In the case of TRUNCATE, FLOOR, AND CEILING division: remainder r = x - round_func(x/y) - In the case of EUCLIDIAN division: remainder r = x - abs(y)*floor(x/abs(y)) + In the cases of TRUNCATE and FLOOR division: remainder r = x - round_func(x/y) The `on_domain_error` option governs behavior in cases where y is 0, y is +/-inf, or x is +/-inf. In these cases modulo is undefined. @@ -322,12 +318,11 @@ scalar_functions: - name: y value: i8 options: - quotient: - values: [ TRUNCATE, FLOOR, CEILING, ROUND, EUCLIDEAN ] + division_type: + values: [ TRUNCATE, FLOOR ] overflow: values: [ SILENT, SATURATE, ERROR ] - on_domain_error: - values: [ NAN, ERROR ] + on_domain_error: values: [ NAN, ERROR ] return: i8 - args: - name: x @@ -335,8 +330,8 @@ scalar_functions: - name: y value: i16 options: - quotient: - values: [ TRUNCATE, FLOOR, CEILING, ROUND, EUCLIDEAN ] + division_type: + values: [ TRUNCATE, FLOOR ] overflow: values: [ SILENT, SATURATE, ERROR ] on_domain_error: @@ -348,8 +343,8 @@ scalar_functions: - name: y value: i32 options: - quotient: - values: [ TRUNCATE, FLOOR, CEILING, ROUND, EUCLIDEAN ] + division_type: + values: [ TRUNCATE, FLOOR ] overflow: values: [ SILENT, SATURATE, ERROR ] on_domain_error: @@ -361,8 +356,8 @@ scalar_functions: - name: y value: i64 options: - quotient: - values: [ TRUNCATE, FLOOR, CEILING, ROUND, EUCLIDEAN ] + division_type: + values: [ TRUNCATE, FLOOR ] overflow: values: [ SILENT, SATURATE, ERROR ] on_domain_error: From b1425559d6ed84bb28c8d29bb1e74911d6877749 Mon Sep 17 00:00:00 2001 From: Jordan Vieler <97200595+jordanvieler@users.noreply.github.com> Date: Thu, 14 Dec 2023 17:23:43 -0600 Subject: [PATCH 04/13] Update extensions/functions_arithmetic.yaml Co-authored-by: Weston Pace --- extensions/functions_arithmetic.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions/functions_arithmetic.yaml b/extensions/functions_arithmetic.yaml index 6bdffcb60..aef3cb497 100644 --- a/extensions/functions_arithmetic.yaml +++ b/extensions/functions_arithmetic.yaml @@ -291,7 +291,7 @@ scalar_functions: In mathematics, many conventions for the modulo operation exists. In computing the result of a modulo operation depends on the software implementation and underlying hardware. Substrait is a format for describing compute operations on structured data and designed for interoperability. Therefore the user is responsible for determining - a definition of division as defined by the quotient (y). + a definition of division as defined by the quotient (q). The following basic conditions of division are satisfied: (1) q ∈ ℤ (the quotient is an integer) From 4e40ebfe384d38d2df7a22d38342dae06c57e43b Mon Sep 17 00:00:00 2001 From: Jordan Vieler <97200595+jordanvieler@users.noreply.github.com> Date: Thu, 14 Dec 2023 17:23:50 -0600 Subject: [PATCH 05/13] Update extensions/functions_arithmetic.yaml Co-authored-by: Weston Pace --- extensions/functions_arithmetic.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/extensions/functions_arithmetic.yaml b/extensions/functions_arithmetic.yaml index aef3cb497..5468a12e7 100644 --- a/extensions/functions_arithmetic.yaml +++ b/extensions/functions_arithmetic.yaml @@ -334,8 +334,6 @@ scalar_functions: values: [ TRUNCATE, FLOOR ] overflow: values: [ SILENT, SATURATE, ERROR ] - on_domain_error: - values: [ NAN, ERROR ] return: i16 - args: - name: x From d4c91f53ab464c0cef5a586f120df02cb1593fbc Mon Sep 17 00:00:00 2001 From: Jordan Vieler <97200595+jordanvieler@users.noreply.github.com> Date: Thu, 14 Dec 2023 17:24:02 -0600 Subject: [PATCH 06/13] Update extensions/functions_arithmetic.yaml Co-authored-by: Weston Pace --- extensions/functions_arithmetic.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions/functions_arithmetic.yaml b/extensions/functions_arithmetic.yaml index 5468a12e7..c0a46dd1e 100644 --- a/extensions/functions_arithmetic.yaml +++ b/extensions/functions_arithmetic.yaml @@ -346,7 +346,7 @@ scalar_functions: overflow: values: [ SILENT, SATURATE, ERROR ] on_domain_error: - values: [ NAN, ERROR ] + values: [ NULL, ERROR ] return: i32 - args: - name: x From 20ac54607cfbf84a53b699c955ca9e7360aa62a7 Mon Sep 17 00:00:00 2001 From: Jordan Vieler <97200595+jordanvieler@users.noreply.github.com> Date: Thu, 14 Dec 2023 17:24:09 -0600 Subject: [PATCH 07/13] Update extensions/functions_arithmetic.yaml Co-authored-by: Weston Pace --- extensions/functions_arithmetic.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions/functions_arithmetic.yaml b/extensions/functions_arithmetic.yaml index c0a46dd1e..34fd48787 100644 --- a/extensions/functions_arithmetic.yaml +++ b/extensions/functions_arithmetic.yaml @@ -359,7 +359,7 @@ scalar_functions: overflow: values: [ SILENT, SATURATE, ERROR ] on_domain_error: - values: [ NAN, ERROR ] + values: [ NULL, ERROR ] return: i64 - name: "power" From a8c8685817e3898eb4e9afb5b543c8e70ca16554 Mon Sep 17 00:00:00 2001 From: Jordan Vieler <97200595+jordanvieler@users.noreply.github.com> Date: Thu, 14 Dec 2023 17:28:04 -0600 Subject: [PATCH 08/13] Update extensions/functions_arithmetic.yaml Co-authored-by: Weston Pace --- extensions/functions_arithmetic.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions/functions_arithmetic.yaml b/extensions/functions_arithmetic.yaml index 34fd48787..c7d715279 100644 --- a/extensions/functions_arithmetic.yaml +++ b/extensions/functions_arithmetic.yaml @@ -322,7 +322,7 @@ scalar_functions: values: [ TRUNCATE, FLOOR ] overflow: values: [ SILENT, SATURATE, ERROR ] - on_domain_error: values: [ NAN, ERROR ] + on_domain_error: values: [ NULL, ERROR ] return: i8 - args: - name: x From a447a89d781ccce532c1d9600fb32f9d784c2d71 Mon Sep 17 00:00:00 2001 From: Jordan Vieler <97200595+jordanvieler@users.noreply.github.com> Date: Thu, 14 Dec 2023 23:20:05 -0600 Subject: [PATCH 09/13] Update extensions/functions_arithmetic.yaml Co-authored-by: Weston Pace --- extensions/functions_arithmetic.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions/functions_arithmetic.yaml b/extensions/functions_arithmetic.yaml index c7d715279..04d71b129 100644 --- a/extensions/functions_arithmetic.yaml +++ b/extensions/functions_arithmetic.yaml @@ -288,7 +288,7 @@ scalar_functions: description: > Calculate the remainder (r) when dividing dividend (x) by divisor (y). - In mathematics, many conventions for the modulo operation exists. In computing the result of a modulo operation + In mathematics, many conventions for the modulo operation exists. The result of a modulo operation depends on the software implementation and underlying hardware. Substrait is a format for describing compute operations on structured data and designed for interoperability. Therefore the user is responsible for determining a definition of division as defined by the quotient (q). From 8dc08fe52bb0b61b67560373dcfcb022558b4ae0 Mon Sep 17 00:00:00 2001 From: Jordan Vieler Date: Mon, 18 Dec 2023 13:53:50 -0600 Subject: [PATCH 10/13] fix: corrected missing return --- extensions/functions_arithmetic.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/extensions/functions_arithmetic.yaml b/extensions/functions_arithmetic.yaml index 04d71b129..f4e7f699a 100644 --- a/extensions/functions_arithmetic.yaml +++ b/extensions/functions_arithmetic.yaml @@ -322,7 +322,8 @@ scalar_functions: values: [ TRUNCATE, FLOOR ] overflow: values: [ SILENT, SATURATE, ERROR ] - on_domain_error: values: [ NULL, ERROR ] + on_domain_error: + values: [ NULL, ERROR ] return: i8 - args: - name: x From 327510efd60212139c4c81349b4231d7811551d6 Mon Sep 17 00:00:00 2001 From: Jordan Vieler <97200595+jordanvieler@users.noreply.github.com> Date: Mon, 18 Dec 2023 21:33:26 -0600 Subject: [PATCH 11/13] Update functions_arithmetic.yaml Co-authored-by: Weston Pace --- extensions/functions_arithmetic.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions/functions_arithmetic.yaml b/extensions/functions_arithmetic.yaml index f4e7f699a..539f2c28b 100644 --- a/extensions/functions_arithmetic.yaml +++ b/extensions/functions_arithmetic.yaml @@ -323,7 +323,7 @@ scalar_functions: overflow: values: [ SILENT, SATURATE, ERROR ] on_domain_error: - values: [ NULL, ERROR ] + values: [ "NULL", ERROR ] return: i8 - args: - name: x From 63779775a1d4b999fafd4e219e2d8cb006b8bb4f Mon Sep 17 00:00:00 2001 From: Jordan Vieler <97200595+jordanvieler@users.noreply.github.com> Date: Mon, 18 Dec 2023 21:33:29 -0600 Subject: [PATCH 12/13] Update functions_arithmetic.yaml Co-authored-by: Weston Pace --- extensions/functions_arithmetic.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions/functions_arithmetic.yaml b/extensions/functions_arithmetic.yaml index 539f2c28b..3d1e33a0c 100644 --- a/extensions/functions_arithmetic.yaml +++ b/extensions/functions_arithmetic.yaml @@ -347,7 +347,7 @@ scalar_functions: overflow: values: [ SILENT, SATURATE, ERROR ] on_domain_error: - values: [ NULL, ERROR ] + values: [ "NULL", ERROR ] return: i32 - args: - name: x From 2484fc8c8e139254b4776ea099df42dec171c293 Mon Sep 17 00:00:00 2001 From: Jordan Vieler Date: Mon, 18 Dec 2023 17:10:59 -0600 Subject: [PATCH 13/13] reverted modulo to modulus, fixed NULL quoting, added on_domain_error to i16 --- extensions/functions_arithmetic.yaml | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/extensions/functions_arithmetic.yaml b/extensions/functions_arithmetic.yaml index 3d1e33a0c..fbfc1f7b2 100644 --- a/extensions/functions_arithmetic.yaml +++ b/extensions/functions_arithmetic.yaml @@ -284,11 +284,11 @@ scalar_functions: value: fp64 return: fp64 - - name: "modulo" + name: "modulus" description: > Calculate the remainder (r) when dividing dividend (x) by divisor (y). - In mathematics, many conventions for the modulo operation exists. The result of a modulo operation + In mathematics, many conventions for the modulus (mod) operation exists. The result of a mod operation depends on the software implementation and underlying hardware. Substrait is a format for describing compute operations on structured data and designed for interoperability. Therefore the user is responsible for determining a definition of division as defined by the quotient (q). @@ -308,7 +308,7 @@ scalar_functions: In the cases of TRUNCATE and FLOOR division: remainder r = x - round_func(x/y) The `on_domain_error` option governs behavior in cases where y is 0, y is +/-inf, or x is +/-inf. In these cases - modulo is undefined. + the mod is undefined. The `overflow` option governs behavior when integer overflow occurs. If x and y are both 0 or both +/-infinity, behavior will be governed by `on_domain_error`. impls: @@ -335,6 +335,8 @@ scalar_functions: values: [ TRUNCATE, FLOOR ] overflow: values: [ SILENT, SATURATE, ERROR ] + on_domain_error: + values: [ "NULL", ERROR ] return: i16 - args: - name: x @@ -360,7 +362,7 @@ scalar_functions: overflow: values: [ SILENT, SATURATE, ERROR ] on_domain_error: - values: [ NULL, ERROR ] + values: [ "NULL", ERROR ] return: i64 - name: "power"