-
Notifications
You must be signed in to change notification settings - Fork 24.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Parse composite patterns using ClassicFormat.parseObject #40100
Changes from all commits
97d0472
4c7e997
a6448d2
aa505e2
abb0bc6
5bcbb7b
44ae404
945a6ad
68b11f4
8fb210c
0f2fca3
22269ce
71c149e
ef01371
0337fe0
b295fb0
09c1285
123633c
f24673f
1126ed1
8fcb0b1
3019974
9ffdb89
f35d0de
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,6 +21,7 @@ | |
|
||
import org.elasticsearch.common.Strings; | ||
|
||
import java.text.ParsePosition; | ||
import java.time.ZoneId; | ||
import java.time.format.DateTimeFormatter; | ||
import java.time.format.DateTimeFormatterBuilder; | ||
|
@@ -29,7 +30,10 @@ | |
import java.time.temporal.TemporalAccessor; | ||
import java.time.temporal.TemporalField; | ||
import java.util.Arrays; | ||
import java.util.Collection; | ||
import java.util.Collections; | ||
import java.util.HashMap; | ||
import java.util.List; | ||
import java.util.Locale; | ||
import java.util.Map; | ||
import java.util.Objects; | ||
|
@@ -39,6 +43,7 @@ class JavaDateFormatter implements DateFormatter { | |
|
||
// base fields which should be used for default parsing, when we round up for date math | ||
private static final Map<TemporalField, Long> ROUND_UP_BASE_FIELDS = new HashMap<>(6); | ||
|
||
{ | ||
ROUND_UP_BASE_FIELDS.put(ChronoField.MONTH_OF_YEAR, 1L); | ||
ROUND_UP_BASE_FIELDS.put(ChronoField.DAY_OF_MONTH, 1L); | ||
|
@@ -50,22 +55,15 @@ class JavaDateFormatter implements DateFormatter { | |
|
||
private final String format; | ||
private final DateTimeFormatter printer; | ||
private final DateTimeFormatter parser; | ||
private final List<DateTimeFormatter> parsers; | ||
private final DateTimeFormatter roundupParser; | ||
|
||
private JavaDateFormatter(String format, DateTimeFormatter printer, DateTimeFormatter roundupParser, DateTimeFormatter parser) { | ||
this.format = format; | ||
this.printer = printer; | ||
this.roundupParser = roundupParser; | ||
this.parser = parser; | ||
} | ||
|
||
JavaDateFormatter(String format, DateTimeFormatter printer, DateTimeFormatter... parsers) { | ||
this(format, printer, builder -> ROUND_UP_BASE_FIELDS.forEach(builder::parseDefaulting), parsers); | ||
} | ||
|
||
JavaDateFormatter(String format, DateTimeFormatter printer, Consumer<DateTimeFormatterBuilder> roundupParserConsumer, | ||
DateTimeFormatter... parsers) { | ||
DateTimeFormatter... parsers) { | ||
if (printer == null) { | ||
throw new IllegalArgumentException("printer may not be null"); | ||
} | ||
|
@@ -79,26 +77,21 @@ private JavaDateFormatter(String format, DateTimeFormatter printer, DateTimeForm | |
} | ||
this.printer = printer; | ||
this.format = format; | ||
|
||
if (parsers.length == 0) { | ||
this.parser = printer; | ||
} else if (parsers.length == 1) { | ||
this.parser = parsers[0]; | ||
this.parsers = Collections.singletonList(printer); | ||
} else { | ||
DateTimeFormatterBuilder builder = new DateTimeFormatterBuilder(); | ||
for (DateTimeFormatter parser : parsers) { | ||
builder.appendOptional(parser); | ||
} | ||
this.parser = builder.toFormatter(Locale.ROOT); | ||
this.parsers = Arrays.asList(parsers); | ||
} | ||
|
||
DateTimeFormatterBuilder builder = new DateTimeFormatterBuilder(); | ||
if (format.contains("||") == false) { | ||
builder.append(this.parser); | ||
builder.append(this.parsers.get(0)); | ||
} | ||
roundupParserConsumer.accept(builder); | ||
DateTimeFormatter roundupFormatter = builder.toFormatter(parser.getLocale()); | ||
DateTimeFormatter roundupFormatter = builder.toFormatter(locale()); | ||
if (printer.getZone() != null) { | ||
roundupFormatter = roundupFormatter.withZone(printer.getZone()); | ||
roundupFormatter = roundupFormatter.withZone(zone()); | ||
} | ||
this.roundupParser = roundupFormatter; | ||
} | ||
|
@@ -107,10 +100,6 @@ DateTimeFormatter getRoundupParser() { | |
return roundupParser; | ||
} | ||
|
||
DateTimeFormatter getParser() { | ||
return parser; | ||
} | ||
|
||
DateTimeFormatter getPrinter() { | ||
return printer; | ||
} | ||
|
@@ -122,30 +111,64 @@ public TemporalAccessor parse(String input) { | |
} | ||
|
||
try { | ||
return parser.parse(input); | ||
return doParse(input); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think a huge comment here would be warranted what happens here and why? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. absolutely, will add |
||
} catch (DateTimeParseException e) { | ||
throw new IllegalArgumentException("failed to parse date field [" + input + "] with format [" + format + "]", e); | ||
} | ||
} | ||
|
||
/** | ||
* Attempt parsing the input without throwing exception. If multiple parsers are provided, | ||
* it will continue iterating if the previous parser failed. The pattern must fully match, meaning whole input was used. | ||
* This also means that this method depends on <code>DateTimeFormatter.ClassicFormat.parseObject</code> | ||
* which does not throw exceptions when parsing failed. | ||
* | ||
* The approach with collection of parsers was taken because java-time requires ordering on optional (composite) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. minor nit: maybe also mention that you dont want to catch any exceptions or have any exceptions thrown due to performance reasons? |
||
* patterns. Joda does not suffer from this. | ||
* https://bugs.openjdk.java.net/browse/JDK-8188771 | ||
* | ||
* @param input An arbitrary string resembling the string representation of a date or time | ||
* @return a TemporalAccessor if parsing was successful. | ||
* @throws DateTimeParseException when unable to parse with any parsers | ||
*/ | ||
private TemporalAccessor doParse(String input) { | ||
if (parsers.size() > 1) { | ||
for (DateTimeFormatter formatter : parsers) { | ||
ParsePosition pos = new ParsePosition(0); | ||
Object object = formatter.toFormat().parseObject(input, pos); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there is no need to call There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you mean keeping a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd consider this optimization optional for now. |
||
if (parsingSucceeded(object, input, pos) == true) { | ||
return (TemporalAccessor) object; | ||
} | ||
} | ||
throw new DateTimeParseException("Failed to parse with all enclosed parsers", input, 0); | ||
} | ||
return this.parsers.get(0).parse(input); | ||
} | ||
|
||
private boolean parsingSucceeded(Object object, String input, ParsePosition pos) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is only used once. it might be more readable to just have a boolean
in the other method? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it is used once, but my intention was to hide the implementation details and this expression in the other method |
||
return object != null && pos.getIndex() == input.length(); | ||
} | ||
|
||
@Override | ||
public DateFormatter withZone(ZoneId zoneId) { | ||
// shortcurt to not create new objects unnecessarily | ||
if (zoneId.equals(parser.getZone())) { | ||
if (zoneId.equals(zone())) { | ||
return this; | ||
} | ||
|
||
return new JavaDateFormatter(format, printer.withZone(zoneId), roundupParser.withZone(zoneId), parser.withZone(zoneId)); | ||
return new JavaDateFormatter(format, printer.withZone(zoneId), | ||
parsers.stream().map(p -> p.withZone(zoneId)).toArray(size -> new DateTimeFormatter[size])); | ||
} | ||
|
||
@Override | ||
public DateFormatter withLocale(Locale locale) { | ||
// shortcurt to not create new objects unnecessarily | ||
if (locale.equals(parser.getLocale())) { | ||
if (locale.equals(locale())) { | ||
return this; | ||
} | ||
|
||
return new JavaDateFormatter(format, printer.withLocale(locale), roundupParser.withLocale(locale), parser.withLocale(locale)); | ||
return new JavaDateFormatter(format, printer.withLocale(locale), | ||
parsers.stream().map(p -> p.withLocale(locale)).toArray(size -> new DateTimeFormatter[size])); | ||
} | ||
|
||
@Override | ||
|
@@ -170,7 +193,7 @@ public ZoneId zone() { | |
|
||
@Override | ||
public DateMathParser toDateMathParser() { | ||
return new JavaDateMathParser(format, parser, roundupParser); | ||
return new JavaDateMathParser(format, this, getRoundupParser()); | ||
} | ||
|
||
@Override | ||
|
@@ -186,12 +209,16 @@ public boolean equals(Object obj) { | |
JavaDateFormatter other = (JavaDateFormatter) obj; | ||
|
||
return Objects.equals(format, other.format) && | ||
Objects.equals(locale(), other.locale()) && | ||
Objects.equals(this.printer.getZone(), other.printer.getZone()); | ||
Objects.equals(locale(), other.locale()) && | ||
Objects.equals(this.printer.getZone(), other.printer.getZone()); | ||
} | ||
|
||
@Override | ||
public String toString() { | ||
return String.format(Locale.ROOT, "format[%s] locale[%s]", format, locale()); | ||
} | ||
|
||
Collection<DateTimeFormatter> getParsers() { | ||
return parsers; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -35,6 +35,7 @@ | |
import java.time.temporal.TemporalAdjusters; | ||
import java.time.temporal.TemporalQueries; | ||
import java.util.Objects; | ||
import java.util.function.Function; | ||
import java.util.function.LongSupplier; | ||
|
||
/** | ||
|
@@ -46,11 +47,11 @@ | |
*/ | ||
public class JavaDateMathParser implements DateMathParser { | ||
|
||
private final DateTimeFormatter formatter; | ||
private final JavaDateFormatter formatter; | ||
private final DateTimeFormatter roundUpFormatter; | ||
private final String format; | ||
|
||
JavaDateMathParser(String format, DateTimeFormatter formatter, DateTimeFormatter roundUpFormatter) { | ||
JavaDateMathParser(String format, JavaDateFormatter formatter, DateTimeFormatter roundUpFormatter) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why this change? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the intention was to allow alternatives in patterns for date math calculations. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. actually this should stay. Plenty of tests started to fail now because of that.
So if we want to have an efficient parsing of composite patterns in
|
||
this.format = format; | ||
Objects.requireNonNull(formatter); | ||
this.formatter = formatter; | ||
|
@@ -215,20 +216,20 @@ private Instant parseDateTime(String value, ZoneId timeZone, boolean roundUpIfNo | |
throw new ElasticsearchParseException("cannot parse empty date"); | ||
} | ||
|
||
DateTimeFormatter formatter = roundUpIfNoTime ? this.roundUpFormatter : this.formatter; | ||
Function<String,TemporalAccessor> formatter = roundUpIfNoTime ? this.roundUpFormatter::parse : this.formatter::parse; | ||
try { | ||
if (timeZone == null) { | ||
return DateFormatters.from(formatter.parse(value)).toInstant(); | ||
return DateFormatters.from(formatter.apply(value)).toInstant(); | ||
} else { | ||
TemporalAccessor accessor = formatter.parse(value); | ||
TemporalAccessor accessor = formatter.apply(value); | ||
ZoneId zoneId = TemporalQueries.zone().queryFrom(accessor); | ||
if (zoneId != null) { | ||
timeZone = zoneId; | ||
} | ||
|
||
return DateFormatters.from(accessor).withZoneSameLocal(timeZone).toInstant(); | ||
} | ||
} catch (DateTimeParseException e) { | ||
} catch (IllegalArgumentException | DateTimeParseException e) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this still needed? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not needed as will revert back this class There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually needed - see this #40100 (comment) |
||
throw new ElasticsearchParseException("failed to parse date field [{}] with format [{}]: [{}]", | ||
e, value, format, e.getMessage()); | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -343,6 +343,17 @@ public void testDuellingFormatsValidParsing() { | |
assertSameDate("2012-W1-1", "weekyear_week_day"); | ||
} | ||
|
||
public void testCompositeParsing(){ | ||
//in all these examples the second pattern will be used | ||
assertSameDate("2014-06-06T12:01:02.123", "yyyy-MM-dd'T'HH:mm:ss||yyyy-MM-dd'T'HH:mm:ss.SSS"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we need some more tests:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Happy to add more test cases. Do you have anything specific in mind? |
||
assertSameDate("2014-06-06T12:01:02.123", "strictDateTimeNoMillis||yyyy-MM-dd'T'HH:mm:ss.SSS"); | ||
assertSameDate("2014-06-06T12:01:02.123", "yyyy-MM-dd'T'HH:mm:ss+HH:MM||yyyy-MM-dd'T'HH:mm:ss.SSS"); | ||
} | ||
|
||
public void testExceptionWhenCompositeParsingFails(){ | ||
assertParseException("2014-06-06T12:01:02.123", "yyyy-MM-dd'T'HH:mm:ss||yyyy-MM-dd'T'HH:mm:ss.SS"); | ||
} | ||
|
||
public void testDuelingStrictParsing() { | ||
assertSameDate("2018W313", "strict_basic_week_date"); | ||
assertParseException("18W313", "strict_basic_week_date"); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
not a fan of this merge method. It extracts relatively low level java.time.DateTimeFormatter when it should stick with org.elasticsearch.common.time.DateFormatter abstraction as long as possible.
Possibly the roundUpBuilder should also be used inside JavaDateFormatter constructor?
also I suspect that roundUpBuilder will suffer from the same problem when it is a composite?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe we can refactor this in a separate PR then? Also, this code should only be there temporary? As soon as we get rid of Joda time in the code base, I expect that we can get rid of quite a few abstractions.
Is it intended to be used as a composite?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is used as a composite in the
merge
method when constructing the roundUpParser with theappendOptional
.I can imagine we can have the same pattern as on the issue (
yyyy-MM-dd'T'HH:mm:ss||yyyy-MM-dd'T'HH:mm:ss.SSS
) but used with this parser. Will try to come up with a testcaseThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I discussed that with @spinscale and there is no way we could create a pattern that would suffer from the same problem.
For index name calculations like
prefix-{2010-01-01/d{yyyy-MM-dd||yyyy-MM-ddTHH}}
it would fail parsing, as the only thing expected after|
is the timezone (it would fail saying unexpected|
)