From 8b08e9aaa1dafe5b7505954dfb3a15a4f0176128 Mon Sep 17 00:00:00 2001 From: Oliver Tan Date: Sat, 14 Nov 2020 18:56:29 +1100 Subject: [PATCH] sql: normalize age and timestamptz intervals like postgres Release note (sql change, bug fix): Previously, `timestamp/timestamptz - timestamp/timestamptz` operators would normalize the interval into months, days, H:M:S (in older versions, this may be just H:M:S). This could result in an incorrect result: ``` > select '2020-01-01'::timestamptz - '2018-01-01'; ?column? ------------------- 2 years 10 days ``` This has now been fixed such that it is only normalized into days/H:M:S. This is now more postgres compatible. Release note (sql change, bug fix): Previously, the `age` builtin would incorrectly normalize months and days based on 30 days a month (in older versions this may be just H:M:S). This can give an incorrect result, e.g. ``` > select age('2020-01-01'::timestamptz, '2018-01-01'); ?column? ------------------- 2 years 10 days ``` This is not the most accurate it could be as `age` can use the given timestamptz arguments to be more accurate. This is now more postgres compatible. --- docs/generated/sql/functions.md | 8 +- .../logictest/testdata/logic_test/datetime | 7 +- .../logictest/testdata/logic_test/timestamp | 34 +++++- .../opt/norm/testdata/rules/fold_constants | 2 +- pkg/sql/sem/builtins/builtins.go | 24 +++- pkg/sql/sem/tree/eval.go | 23 +--- pkg/util/duration/duration.go | 113 ++++++++++++++++++ 7 files changed, 182 insertions(+), 29 deletions(-) diff --git a/docs/generated/sql/functions.md b/docs/generated/sql/functions.md index 6e0a590a64e9..5a253c23fafc 100644 --- a/docs/generated/sql/functions.md +++ b/docs/generated/sql/functions.md @@ -284,9 +284,13 @@ - - diff --git a/pkg/sql/logictest/testdata/logic_test/datetime b/pkg/sql/logictest/testdata/logic_test/datetime index f97442cd5d78..e1fd338f40c7 100644 --- a/pkg/sql/logictest/testdata/logic_test/datetime +++ b/pkg/sql/logictest/testdata/logic_test/datetime @@ -159,13 +159,18 @@ SELECT '5874897-12-31'::date - '4714-11-24 BC'::date query T SELECT age('2001-04-10 22:06:45', '1957-06-13') ---- -384190:06:45 +43 years 9 mons 27 days 22:06:45 query B SELECT age('1957-06-13') - age(now(), '1957-06-13') = interval '0s' ---- true +query T +select age('2017-12-10'::timestamptz, '2017-12-01'::timestamptz) +---- +9 days + query B SELECT now() - timestamp '2015-06-13' > interval '100h' ---- diff --git a/pkg/sql/logictest/testdata/logic_test/timestamp b/pkg/sql/logictest/testdata/logic_test/timestamp index 12d747996230..9024428a5bfa 100644 --- a/pkg/sql/logictest/testdata/logic_test/timestamp +++ b/pkg/sql/logictest/testdata/logic_test/timestamp @@ -402,8 +402,8 @@ SELECT FROM example ORDER BY a ---- -2010-11-07 22:59:00 -0600 CST 2010-11-07 23:59:00 -0600 CST 2010-12-06 23:59:00 -0600 CST 2010-11-05 23:59:00 -0500 CDT 2010-11-05 23:59:00 -0500 CDT 2010-10-06 23:59:00 -0500 CDT 00:00:00 -25:00:00 2010-11-06 23:59:00-05:00 -2010-11-08 23:59:00 -0600 CST 2010-11-08 23:59:00 -0600 CST 2010-12-07 23:59:00 -0600 CST 2010-11-07 00:59:00 -0500 CDT 2010-11-06 23:59:00 -0500 CDT 2010-10-07 23:59:00 -0500 CDT 25:00:00 00:00:00 2010-11-07 23:59:00-06:00 +2010-11-07 22:59:00 -0600 CST 2010-11-07 23:59:00 -0600 CST 2010-12-06 23:59:00 -0600 CST 2010-11-05 23:59:00 -0500 CDT 2010-11-05 23:59:00 -0500 CDT 2010-10-06 23:59:00 -0500 CDT 00:00:00 -1 days -01:00:00 2010-11-06 23:59:00-05:00 +2010-11-08 23:59:00 -0600 CST 2010-11-08 23:59:00 -0600 CST 2010-12-07 23:59:00 -0600 CST 2010-11-07 00:59:00 -0500 CDT 2010-11-06 23:59:00 -0500 CDT 2010-10-07 23:59:00 -0500 CDT 1 day 01:00:00 00:00:00 2010-11-07 23:59:00-06:00 statement ok DROP TABLE example @@ -422,3 +422,33 @@ query R set time zone 'UTC'; select extract(epoch from TIMESTAMP WITH TIME ZONE '2010-11-06 23:59:00-05:00') ---- 1.28910594e+09 + +query TTTTTT +SET TIME ZONE 'Europe/Berlin'; SELECT + age(a::timestamptz, b::timestamptz), + age(b::timestamptz, a::timestamptz), + a::timestamptz - b::timestamptz, + b::timestamptz - a::timestamptz, + a::timestamp - b::timestamp, + b::timestamp - a::timestamp +FROM (VALUES + ('2020-05-06 11:12:13', '2015-06-07 13:14:15'), + ('2020-05-06 15:00:00', '2020-04-03 16:00:00'), + ('2020-02-29 00:02:05', '2019-02-28 18:19:01'), + ('2020-02-29 00:02:05', '2020-01-28 18:19:01'), + ('2020-02-29 00:02:05', '2020-03-28 18:19:01'), + ('2021-02-27 00:02:05', '2019-02-28 18:19:01'), + ('2021-02-27 00:02:05', '2021-01-28 18:19:01'), + ('2021-02-27 00:02:05', '2021-03-28 18:19:01'), + ('2020-02-28 00:02:05', '2020-02-28 18:19:01') +) regression_age_tests(a, b) +---- +4 years 10 mons 28 days 21:57:58 -4 years -10 mons -28 days -21:57:58 1794 days 21:57:58 -1794 days -21:57:58 1794 days 21:57:58 -1794 days -21:57:58 +1 mon 2 days 23:00:00 -1 mons -2 days -23:00:00 32 days 23:00:00 -32 days -23:00:00 32 days 23:00:00 -32 days -23:00:00 +1 year 05:43:04 -1 years -05:43:04 365 days 05:43:04 -365 days -05:43:04 365 days 05:43:04 -365 days -05:43:04 +1 mon 05:43:04 -1 mons -05:43:04 31 days 05:43:04 -31 days -05:43:04 31 days 05:43:04 -31 days -05:43:04 +-28 days -18:16:56 28 days 18:16:56 -28 days -18:16:56 28 days 18:16:56 -28 days -18:16:56 28 days 18:16:56 +1 year 11 mons 26 days 05:43:04 -1 years -11 mons -26 days -05:43:04 729 days 05:43:04 -729 days -05:43:04 729 days 05:43:04 -729 days -05:43:04 +29 days 05:43:04 -29 days -05:43:04 29 days 05:43:04 -29 days -05:43:04 29 days 05:43:04 -29 days -05:43:04 +-1 mons -1 days -17:16:56 1 mon 1 day 17:16:56 -29 days -17:16:56 29 days 17:16:56 -29 days -18:16:56 29 days 18:16:56 +-18:16:56 18:16:56 -18:16:56 18:16:56 -18:16:56 18:16:56 diff --git a/pkg/sql/opt/norm/testdata/rules/fold_constants b/pkg/sql/opt/norm/testdata/rules/fold_constants index 469db12f2a2a..5df6d74f713d 100644 --- a/pkg/sql/opt/norm/testdata/rules/fold_constants +++ b/pkg/sql/opt/norm/testdata/rules/fold_constants @@ -366,7 +366,7 @@ values ├── cardinality: [1 - 1] ├── key: () ├── fd: ()-->(1) - └── ('-24:00:00',) + └── ('-1 days',) # Fold constant. norm expect=FoldBinary diff --git a/pkg/sql/sem/builtins/builtins.go b/pkg/sql/sem/builtins/builtins.go index 4ae90fdce378..08ce5e5ff106 100644 --- a/pkg/sql/sem/builtins/builtins.go +++ b/pkg/sql/sem/builtins/builtins.go @@ -1775,17 +1775,33 @@ CockroachDB supports the following flags: Types: tree.ArgTypes{{"val", types.TimestampTZ}}, ReturnType: tree.FixedReturnType(types.Interval), Fn: func(ctx *tree.EvalContext, args tree.Datums) (tree.Datum, error) { - return tree.TimestampDifference(ctx, ctx.GetTxnTimestamp(time.Microsecond), args[0]) + return &tree.DInterval{ + Duration: duration.Age( + ctx.GetTxnTimestamp(time.Microsecond).Time, + args[0].(*tree.DTimestampTZ).Time, + ), + }, nil }, - Info: "Calculates the interval between `val` and the current time.", + Info: "Calculates the interval between `val` and the current time, normalized into years, months and days." + ` + + Note this may not be an accurate time span since years and months are normalized from days, and years and months are out of context. + To avoid normalizing days into months and years, use ` + "`now() - timestamptz`" + `.`, }, tree.Overload{ Types: tree.ArgTypes{{"end", types.TimestampTZ}, {"begin", types.TimestampTZ}}, ReturnType: tree.FixedReturnType(types.Interval), Fn: func(ctx *tree.EvalContext, args tree.Datums) (tree.Datum, error) { - return tree.TimestampDifference(ctx, args[0], args[1]) + return &tree.DInterval{ + Duration: duration.Age( + args[0].(*tree.DTimestampTZ).Time, + args[1].(*tree.DTimestampTZ).Time, + ), + }, nil }, - Info: "Calculates the interval between `begin` and `end`.", + Info: "Calculates the interval between `begin` and `end`, normalized into years, months and days." + ` + + Note this may not be an accurate time span since years and months are normalized from days, and years and months are out of context. + To avoid normalizing days into months and years, use the timestamptz subtraction operator.`, }, ), diff --git a/pkg/sql/sem/tree/eval.go b/pkg/sql/sem/tree/eval.go index bf30848dae24..caf3c8c687dd 100644 --- a/pkg/sql/sem/tree/eval.go +++ b/pkg/sql/sem/tree/eval.go @@ -878,7 +878,7 @@ var BinOps = map[BinaryOperator]binOpOverload{ ReturnType: types.Interval, Fn: func(_ *EvalContext, left Datum, right Datum) (Datum, error) { nanos := left.(*DTimestamp).Sub(right.(*DTimestamp).Time).Nanoseconds() - return &DInterval{Duration: duration.MakeDuration(nanos, 0, 0)}, nil + return &DInterval{Duration: duration.MakeDurationJustifyHours(nanos, 0, 0)}, nil }, }, &BinOp{ @@ -887,7 +887,7 @@ var BinOps = map[BinaryOperator]binOpOverload{ ReturnType: types.Interval, Fn: func(_ *EvalContext, left Datum, right Datum) (Datum, error) { nanos := left.(*DTimestampTZ).Sub(right.(*DTimestampTZ).Time).Nanoseconds() - return &DInterval{Duration: duration.MakeDuration(nanos, 0, 0)}, nil + return &DInterval{Duration: duration.MakeDurationJustifyHours(nanos, 0, 0)}, nil }, }, &BinOp{ @@ -898,7 +898,7 @@ var BinOps = map[BinaryOperator]binOpOverload{ // These two quantities aren't directly comparable. Convert the // TimestampTZ to a timestamp first. nanos := left.(*DTimestamp).Sub(right.(*DTimestampTZ).stripTimeZone(ctx).Time).Nanoseconds() - return &DInterval{Duration: duration.MakeDuration(nanos, 0, 0)}, nil + return &DInterval{Duration: duration.MakeDurationJustifyHours(nanos, 0, 0)}, nil }, }, &BinOp{ @@ -909,7 +909,7 @@ var BinOps = map[BinaryOperator]binOpOverload{ // These two quantities aren't directly comparable. Convert the // TimestampTZ to a timestamp first. nanos := left.(*DTimestampTZ).stripTimeZone(ctx).Sub(right.(*DTimestamp).Time).Nanoseconds() - return &DInterval{Duration: duration.MakeDuration(nanos, 0, 0)}, nil + return &DInterval{Duration: duration.MakeDurationJustifyHours(nanos, 0, 0)}, nil }, }, &BinOp{ @@ -1703,21 +1703,6 @@ var BinOps = map[BinaryOperator]binOpOverload{ }, } -// timestampMinusBinOp is the implementation of the subtraction -// between types.TimestampTZ operands. -var timestampMinusBinOp *BinOp - -// TimestampDifference computes the interval difference between two -// TimestampTZ datums. The result is a DInterval. The caller must -// ensure that the arguments are of the proper Datum type. -func TimestampDifference(ctx *EvalContext, start, end Datum) (Datum, error) { - return timestampMinusBinOp.Fn(ctx, start, end) -} - -func init() { - timestampMinusBinOp, _ = BinOps[Minus].lookupImpl(types.TimestampTZ, types.TimestampTZ) -} - // CmpOp is a comparison operator. type CmpOp struct { LeftType *types.T diff --git a/pkg/util/duration/duration.go b/pkg/util/duration/duration.go index 2f09d08abd50..1a9a18cff775 100644 --- a/pkg/util/duration/duration.go +++ b/pkg/util/duration/duration.go @@ -35,6 +35,10 @@ const ( SecsPerHour = 3600 // SecsPerDay is the amount of seconds in a day. SecsPerDay = 86400 + // MinsPerHour is the amount of minutes in an hour. + MinsPerHour = 60 + // HoursPerDay is the number of hours in a day. + HoursPerDay = 24 // DaysPerMonth is the assumed amount of days in a month. // is always evaluated to 30, as it is in postgres. DaysPerMonth = 30 @@ -102,6 +106,115 @@ func MakeDuration(nanos, days, months int64) Duration { } } +// MakeDurationJustifyHours returns a duration where hours are moved +// to days if the number of hours exceeds 24. +func MakeDurationJustifyHours(nanos, days, months int64) Duration { + const nanosPerDay = int64(HoursPerDay * time.Hour) + extraDays := nanos / nanosPerDay + days += extraDays + nanos -= extraDays * nanosPerDay + return Duration{ + Months: months, + Days: days, + nanos: rounded(nanos), + } +} + +// Age returns a Duration rounded to the nearest microsecond +// from the time difference of (lhs - rhs). +// +// Note that we cannot use time.Time's sub, as time.Duration does not give +// an accurate picture of day/month differences. +// +// This is lifted from Postgres' timestamptz_age. The following comment applies: +// Note that this does not result in an accurate absolute time span +// since year and month are out of context once the arithmetic +// is done. +func Age(lhs, rhs time.Time) Duration { + // Strictly compare only UTC time. + lhs = lhs.UTC() + rhs = rhs.UTC() + + years := int64(lhs.Year() - rhs.Year()) + months := int64(lhs.Month() - rhs.Month()) + days := int64(lhs.Day() - rhs.Day()) + hours := int64(lhs.Hour() - rhs.Hour()) + minutes := int64(lhs.Minute() - rhs.Minute()) + seconds := int64(lhs.Second() - rhs.Second()) + nanos := int64(lhs.Nanosecond() - rhs.Nanosecond()) + + flip := func() { + years = -years + months = -months + days = -days + hours = -hours + minutes = -minutes + seconds = -seconds + nanos = -nanos + } + + // Flip signs so we're always operating from a positive. + if rhs.After(lhs) { + flip() + } + + // For each field that is now negative, promote them to positive. + // We could probably use smarter math here, but to keep things simple and postgres-esque, + // we'll do the the same way postgres does. We do not expect these overflow values + // to be too large from the math above anyway. + for nanos < 0 { + nanos += nanosInSecond + seconds-- + } + for seconds < 0 { + seconds += SecsPerMinute + minutes-- + } + for minutes < 0 { + minutes += MinsPerHour + hours-- + } + for hours < 0 { + hours += HoursPerDay + days-- + } + for days < 0 { + // Get days in month preceding the current month of whichever is greater. + if rhs.After(lhs) { + days += daysInCurrentMonth(lhs) + } else { + days += daysInCurrentMonth(rhs) + } + months-- + } + for months < 0 { + months += MonthsPerYear + years-- + } + + // Revert the sign back. + if rhs.After(lhs) { + flip() + } + + return Duration{ + Months: years*MonthsPerYear + months, + Days: days, + nanos: rounded( + nanos + + int64(time.Second)*seconds + + int64(time.Minute)*minutes + + int64(time.Hour)*hours, + ), + } +} + +func daysInCurrentMonth(t time.Time) int64 { + // Take the first day of the month, add a month and subtract a day. + // This returns the last day of the month, which the number of days in the month. + return int64(time.Date(t.Year(), t.Month(), 1, 0, 0, 0, 0, time.UTC).AddDate(0, 1, -1).Day()) +} + // DecodeDuration returns a Duration without rounding nanos. func DecodeDuration(months, days, nanos int64) Duration { return Duration{
Function → ReturnsDescription
age(end: timestamptz, begin: timestamptz) → interval

Calculates the interval between begin and end.

+
age(end: timestamptz, begin: timestamptz) → interval

Calculates the interval between begin and end, normalized into years, months and days.

+
		Note this may not be an accurate time span since years and months are normalized from days, and years and months are out of context.
+		To avoid normalizing days into months and years, use the timestamptz subtraction operator.
age(val: timestamptz) → interval

Calculates the interval between val and the current time.

+
age(val: timestamptz) → interval

Calculates the interval between val and the current time, normalized into years, months and days.

+
		Note this may not be an accurate time span since years and months are normalized from days, and years and months are out of context.
+		To avoid normalizing days into months and years, use `now() - timestamptz`.
clock_timestamp() → timestamp

Returns the current system time on one of the cluster nodes.