Skip to content

Commit

Permalink
Add week_of_year Spark function (#5941)
Browse files Browse the repository at this point in the history
Summary:
Presto's `week_of_year` function differs from Spark's in return type: BIGINT vs. INTEGER.

Copy-paste Presto's implementation and change the return type.

Spark's implementation: https://github.com/apache/spark/blob/v3.2.2/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala#L868

Pull Request resolved: #5941

Reviewed By: amitkdutta

Differential Revision: D48224232

Pulled By: mbasmanova

fbshipit-source-id: 19313b0294a8aa714b017deb4623ba1422084a35
  • Loading branch information
majian4work authored and facebook-github-bot committed Aug 10, 2023
1 parent 71047bf commit 15e1782
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 0 deletions.
7 changes: 7 additions & 0 deletions velox/docs/functions/spark/datetime.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,13 @@ These functions support TIMESTAMP and DATE input types.
Returns null if ``string`` does not match ``format`` or if ``format``
is invalid.

.. function:: week_of_year(x) -> integer

Returns the `ISO-Week`_ of the year from x. The value ranges from ``1`` to ``53``.
A week is considered to start on a Monday and week 1 is the first week with >3 days.

.. _ISO-Week: https://en.wikipedia.org/wiki/ISO_week_date

.. spark:function:: year(x) -> integer
Returns the year from ``x``.
54 changes: 54 additions & 0 deletions velox/functions/sparksql/DateTimeFunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,60 @@ struct YearFunction : public InitSessionTimezone<T> {
}
};

template <typename T>
struct WeekFunction : public InitSessionTimezone<T> {
VELOX_DEFINE_FUNCTION_TYPES(T);

FOLLY_ALWAYS_INLINE int32_t getWeek(const std::tm& time) {
// The computation of ISO week from date follows the algorithm here:
// https://en.wikipedia.org/wiki/ISO_week_date
int32_t week = floor(
10 + (time.tm_yday + 1) -
(time.tm_wday ? time.tm_wday : kDaysInWeek)) /
kDaysInWeek;

if (week == 0) {
// Distance in days between the first day of the current year and the
// Monday of the current week.
auto mondayOfWeek =
time.tm_yday + 1 - (time.tm_wday + kDaysInWeek - 1) % kDaysInWeek;
// Distance in days between the first day and the first Monday of the
// current year.
auto firstMondayOfYear =
1 + (mondayOfWeek + kDaysInWeek - 1) % kDaysInWeek;

if ((util::isLeapYear(time.tm_year + 1900 - 1) &&
firstMondayOfYear == 2) ||
firstMondayOfYear == 3 || firstMondayOfYear == 4) {
week = 53;
} else {
week = 52;
}
} else if (week == 53) {
// Distance in days between the first day of the current year and the
// Monday of the current week.
auto mondayOfWeek =
time.tm_yday + 1 - (time.tm_wday + kDaysInWeek - 1) % kDaysInWeek;
auto daysInYear = util::isLeapYear(time.tm_year + 1900) ? 366 : 365;
if (daysInYear - mondayOfWeek < 3) {
week = 1;
}
}

return week;
}

FOLLY_ALWAYS_INLINE void call(
int32_t& result,
const arg_type<Timestamp>& timestamp) {
result = getWeek(getDateTime(timestamp, this->timeZone_));
}

FOLLY_ALWAYS_INLINE void call(int32_t& result, const arg_type<Date>& date) {
result = getWeek(getDateTime(date));
}
};

template <typename T>
struct UnixTimestampFunction {
// unix_timestamp();
Expand Down
2 changes: 2 additions & 0 deletions velox/functions/sparksql/Register.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,8 @@ void registerFunctions(const std::string& prefix) {
// Register date functions.
registerFunction<YearFunction, int32_t, Timestamp>({prefix + "year"});
registerFunction<YearFunction, int32_t, Date>({prefix + "year"});
registerFunction<WeekFunction, int32_t, Timestamp>({prefix + "week_of_year"});
registerFunction<WeekFunction, int32_t, Date>({prefix + "week_of_year"});

registerFunction<UnixTimestampFunction, int64_t>({prefix + "unix_timestamp"});

Expand Down
27 changes: 27 additions & 0 deletions velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ class DateTimeFunctionsTest : public SparkFunctionBaseTest {
{core::QueryConfig::kAdjustTimestampToTimezone, "true"},
});
}

int32_t parseDate(const std::string& dateStr) {
return DATE()->toDays(dateStr);
}
};

TEST_F(DateTimeFunctionsTest, year) {
Expand Down Expand Up @@ -66,6 +70,29 @@ TEST_F(DateTimeFunctionsTest, yearDate) {
EXPECT_EQ(1920, year(DATE()->toDays("1920-01-01")));
}

TEST_F(DateTimeFunctionsTest, weekOfYear) {
const auto weekOfYear = [&](const char* dateString) {
auto date = std::make_optional(parseDate(dateString));
return evaluateOnce<int32_t, int32_t>("week_of_year(c0)", {date}, {DATE()})
.value();
};

EXPECT_EQ(1, weekOfYear("1919-12-31"));
EXPECT_EQ(1, weekOfYear("1920-01-01"));
EXPECT_EQ(1, weekOfYear("1920-01-04"));
EXPECT_EQ(2, weekOfYear("1920-01-05"));
EXPECT_EQ(53, weekOfYear("1960-01-01"));
EXPECT_EQ(53, weekOfYear("1960-01-03"));
EXPECT_EQ(1, weekOfYear("1960-01-04"));
EXPECT_EQ(1, weekOfYear("1969-12-31"));
EXPECT_EQ(1, weekOfYear("1970-01-01"));
EXPECT_EQ(1, weekOfYear("0001-01-01"));
EXPECT_EQ(52, weekOfYear("9999-12-31"));
EXPECT_EQ(8, weekOfYear("2008-02-20"));
EXPECT_EQ(15, weekOfYear("2015-04-08"));
EXPECT_EQ(15, weekOfYear("2013-04-08"));
}

TEST_F(DateTimeFunctionsTest, unixTimestamp) {
const auto unixTimestamp = [&](std::optional<StringView> dateStr) {
return evaluateOnce<int64_t>("unix_timestamp(c0)", dateStr);
Expand Down

0 comments on commit 15e1782

Please sign in to comment.