Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add week_of_year Spark function #5941

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions velox/docs/functions/spark/datetime.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,13 @@ These functions support TIMESTAMP and DATE input types.
Returns null if ``string`` does not match ``format`` or if ``format``
is invalid.

.. function:: week_of_year(x) -> integer

Returns the `ISO-Week`_ of the year from x. The value ranges from ``1`` to ``53``.
A week is considered to start on a Monday and week 1 is the first week with >3 days.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would you double check that this matches Spark's implementation?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add some extra ut that copy from Spark UT, and validate the results.
From https://github.com/apache/spark/blob/v3.2.2/sql/core/src/main/scala/org/apache/spark/sql/functions.scala#L3240-L3241, It also follows ISO 8601


.. _ISO-Week: https://en.wikipedia.org/wiki/ISO_week_date

.. spark:function:: year(x) -> integer
Returns the year from ``x``.
54 changes: 54 additions & 0 deletions velox/functions/sparksql/DateTimeFunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,60 @@ struct YearFunction : public InitSessionTimezone<T> {
}
};

template <typename T>
struct WeekFunction : public InitSessionTimezone<T> {
VELOX_DEFINE_FUNCTION_TYPES(T);

FOLLY_ALWAYS_INLINE int32_t getWeek(const std::tm& time) {
// The computation of ISO week from date follows the algorithm here:
// https://en.wikipedia.org/wiki/ISO_week_date
int32_t week = floor(
10 + (time.tm_yday + 1) -
(time.tm_wday ? time.tm_wday : kDaysInWeek)) /
kDaysInWeek;

if (week == 0) {
// Distance in days between the first day of the current year and the
// Monday of the current week.
auto mondayOfWeek =
time.tm_yday + 1 - (time.tm_wday + kDaysInWeek - 1) % kDaysInWeek;
// Distance in days between the first day and the first Monday of the
// current year.
auto firstMondayOfYear =
1 + (mondayOfWeek + kDaysInWeek - 1) % kDaysInWeek;

if ((util::isLeapYear(time.tm_year + 1900 - 1) &&
firstMondayOfYear == 2) ||
firstMondayOfYear == 3 || firstMondayOfYear == 4) {
week = 53;
} else {
week = 52;
}
} else if (week == 53) {
// Distance in days between the first day of the current year and the
// Monday of the current week.
auto mondayOfWeek =
time.tm_yday + 1 - (time.tm_wday + kDaysInWeek - 1) % kDaysInWeek;
auto daysInYear = util::isLeapYear(time.tm_year + 1900) ? 366 : 365;
if (daysInYear - mondayOfWeek < 3) {
week = 1;
}
}

return week;
}

FOLLY_ALWAYS_INLINE void call(
int32_t& result,
const arg_type<Timestamp>& timestamp) {
result = getWeek(getDateTime(timestamp, this->timeZone_));
}

FOLLY_ALWAYS_INLINE void call(int32_t& result, const arg_type<Date>& date) {
result = getWeek(getDateTime(date));
}
};

template <typename T>
struct UnixTimestampFunction {
// unix_timestamp();
Expand Down
2 changes: 2 additions & 0 deletions velox/functions/sparksql/Register.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,8 @@ void registerFunctions(const std::string& prefix) {
// Register date functions.
registerFunction<YearFunction, int32_t, Timestamp>({prefix + "year"});
registerFunction<YearFunction, int32_t, Date>({prefix + "year"});
registerFunction<WeekFunction, int32_t, Timestamp>({prefix + "week_of_year"});
registerFunction<WeekFunction, int32_t, Date>({prefix + "week_of_year"});

registerFunction<UnixTimestampFunction, int64_t>({prefix + "unix_timestamp"});

Expand Down
27 changes: 27 additions & 0 deletions velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ class DateTimeFunctionsTest : public SparkFunctionBaseTest {
{core::QueryConfig::kAdjustTimestampToTimezone, "true"},
});
}

int32_t parseDate(const std::string& dateStr) {
return DATE()->toDays(dateStr);
}
};

TEST_F(DateTimeFunctionsTest, year) {
Expand Down Expand Up @@ -66,6 +70,29 @@ TEST_F(DateTimeFunctionsTest, yearDate) {
EXPECT_EQ(1920, year(DATE()->toDays("1920-01-01")));
}

TEST_F(DateTimeFunctionsTest, weekOfYear) {
const auto weekOfYear = [&](const char* dateString) {
auto date = std::make_optional(parseDate(dateString));
return evaluateOnce<int32_t, int32_t>("week_of_year(c0)", {date}, {DATE()})
.value();
};

EXPECT_EQ(1, weekOfYear("1919-12-31"));
EXPECT_EQ(1, weekOfYear("1920-01-01"));
EXPECT_EQ(1, weekOfYear("1920-01-04"));
EXPECT_EQ(2, weekOfYear("1920-01-05"));
EXPECT_EQ(53, weekOfYear("1960-01-01"));
EXPECT_EQ(53, weekOfYear("1960-01-03"));
EXPECT_EQ(1, weekOfYear("1960-01-04"));
EXPECT_EQ(1, weekOfYear("1969-12-31"));
EXPECT_EQ(1, weekOfYear("1970-01-01"));
EXPECT_EQ(1, weekOfYear("0001-01-01"));
EXPECT_EQ(52, weekOfYear("9999-12-31"));
EXPECT_EQ(8, weekOfYear("2008-02-20"));
EXPECT_EQ(15, weekOfYear("2015-04-08"));
EXPECT_EQ(15, weekOfYear("2013-04-08"));
}

TEST_F(DateTimeFunctionsTest, unixTimestamp) {
const auto unixTimestamp = [&](std::optional<StringView> dateStr) {
return evaluateOnce<int64_t>("unix_timestamp(c0)", dateStr);
Expand Down