Skip to content

Commit

Permalink
fix: make parse_date able to parse partial dates (#8330)
Browse files Browse the repository at this point in the history
* fix: make parse_date able to parse partial dates

* use SimpleDateFormat

* use FastDateFormat

* errors
  • Loading branch information
Zara Lim authored Nov 9, 2021
1 parent d7ef855 commit 6a82026
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 29 deletions.
2 changes: 1 addition & 1 deletion ksqldb-common/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.3.1</version>
<version>3.5</version>
</dependency>

<dependency>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,33 +25,29 @@
import io.confluent.ksql.function.udf.UdfParameter;
import io.confluent.ksql.util.KsqlConstants;
import java.sql.Date;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeFormatterBuilder;
import java.time.temporal.ChronoField;
import java.time.temporal.TemporalAccessor;
import java.util.Arrays;
import java.util.Optional;
import java.text.ParseException;
import java.util.TimeZone;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import org.apache.commons.lang3.time.FastDateFormat;

@UdfDescription(
name = "parse_date",
category = FunctionCategory.DATE_TIME,
author = KsqlConstants.CONFLUENT_AUTHOR,
description = "Converts a string representation of a date in the given format"
+ " into a DATE value. The format pattern should be in the format expected by"
+ " java.time.format.DateTimeFormatter"
+ " java.text.SimpleDateFormat"
)
public class ParseDate {

private final LoadingCache<String, DateTimeFormatter> formatters =
private static final long MILLIS_IN_DAY = TimeUnit.DAYS.toMillis(1);

private final LoadingCache<String, FastDateFormat> formatters =
CacheBuilder.newBuilder()
.maximumSize(1000)
.build(CacheLoader.from(pattern -> new DateTimeFormatterBuilder()
.parseCaseInsensitive()
.appendPattern(pattern)
.toFormatter()));
.build(CacheLoader.from(pattern ->
FastDateFormat.getInstance(pattern, TimeZone.getTimeZone("GMT"))));

@Udf(description = "Converts a string representation of a date in the given format"
+ " into a DATE value.")
Expand All @@ -60,22 +56,14 @@ public Date parseDate(
description = "The string representation of a date.") final String formattedDate,
@UdfParameter(
description = "The format pattern should be in the format expected by"
+ " java.time.format.DateTimeFormatter.") final String formatPattern) {
+ " java.text.SimpleDateFormat.") final String formatPattern) {
try {

final TemporalAccessor ta = formatters.get(formatPattern).parse(formattedDate);
final Optional<ChronoField> timeField = Arrays.stream(ChronoField.values())
.filter(field -> field.isTimeBased())
.filter(field -> ta.isSupported(field))
.findFirst();

if (timeField.isPresent()) {
final long time = formatters.get(formatPattern).parse(formattedDate).getTime();
if (time % MILLIS_IN_DAY != 0) {
throw new KsqlFunctionException("Date format contains time field.");
}

return new Date(
TimeUnit.DAYS.toMillis(LocalDate.from(ta).toEpochDay()));
} catch (final ExecutionException | RuntimeException e) {
return new Date(time);
} catch (final ExecutionException | RuntimeException | ParseException e) {
throw new KsqlFunctionException("Failed to parse date '" + formattedDate
+ "' with formatter '" + formatPattern
+ "': " + e.getMessage(), e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,24 @@ public void shouldConvertStringToDate() {
assertThat(result.getTime(), is(1638316800000L));
}

@Test
public void shouldConvertYearMonthToDate() {
// When:
final Date result = udf.parseDate("2021-12", "yyyy-MM");

// Then:
assertThat(result.getTime(), is(1638316800000L));
}

@Test
public void shouldConvertYearToDate() {
// When:
final Date result = udf.parseDate("2022", "yyyy");

// Then:
assertThat(result.getTime(), is(1640995200000L));
}

@Test
public void shouldConvertCaseInsensitiveStringToDate() {
// When:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,15 @@
{"topic": "test_topic", "key": "1", "value": "1,zero,11/05/2019,dd/MM/yyyy"},
{"topic": "test_topic", "key": "2", "value": "2,zero,01-Jan-2022,dd-MMM-yyyy"},
{"topic": "test_topic", "key": "3", "value": "3,yyy,01-01-1970,dd-MM-yyyy"},
{"topic": "test_topic", "key": "4", "value": "4,yyy,01-JAN-2022,dd-MMM-yyyy"}
{"topic": "test_topic", "key": "4", "value": "4,yyy,01-JAN-2022,dd-MMM-yyyy"},
{"topic": "test_topic", "key": "4", "value": "4,yyy,JAN-2022,MMM-yyyy"}
],
"outputs": [
{"topic": "TS", "key": "0", "value": "0,17662"},
{"topic": "TS", "key": "1", "value": "1,18027"},
{"topic": "TS", "key": "2", "value": "2,18993"},
{"topic": "TS", "key": "3", "value": "3,0"},
{"topic": "TS", "key": "4", "value": "4,18993"},
{"topic": "TS", "key": "4", "value": "4,18993"}
]
}
Expand Down
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@
<commons-text.version>1.8</commons-text.version>
<csv.version>1.4</csv.version>
<commons.compress.version>1.21</commons.compress.version>
<lang3.version>3.3.1</lang3.version>
<lang3.version>3.5</lang3.version>
<guava.version>30.1.1-jre</guava.version>
<protobuf.version>3.17.0</protobuf.version>
<retrying.version>2.0.0</retrying.version>
Expand Down

0 comments on commit 6a82026

Please sign in to comment.