Skip to content

Commit

Permalink
Fix using optional between numbers; forbid duplicate and empty stri…
Browse files Browse the repository at this point in the history
…ngs in tries (#362)

Fix not being able to parse some separator-free formatsу

The reproducer was:
```
offsetHours(Padding.NONE)
optional {
    optional { char(':') }
    offsetMinutesOfHour()
}
```

This showed us one bug and one inefficiency, both of which are
fixed here.

Inefficiency: the `optional { char(':') }` should for all intents
and purposes be equivalent to
`alternativeParsing({ char(':') }) { }`: both the empty string and
the `:` should be parsed, and, according to `optional`'s
definition, if none of the fields mentioned in the block are
non-zero, nothing should be output. However, such `optional` still
created a fake parser element that notified all the fields that
they are zero when an empty string is parsed, even though there are
no fields.

Bug: the fake parser element that notifies the fields that they
are zero even though nothing of note was parsed interfered with the
mechanism supporting compact formats. Number parsers are greedy,
so `number(hh); number(mm)` gets merged into `number(hhmm)`.
If there is something between them, this merge can't happen:
`number(hh); string("x"); number(mm)`.
For this reason, parsers that don't accept any strings are
forbidden (or just very unlikely to happen)--except for the
zero-width unconditional modification parser. This bug is fixed by
moving such parsers to the end of the parser:
`number(hh); modification(); number(mm); string("x")` will get
transformed to `number(hhmm); string("x"); modification()`.

To further improve the robustness of the parser, we revisited other
places where zero-width parsers were possible and forbade them:
now, AM/PM markers, month names, and day-of-week names can't
be empty. This limitation can be lifted at a later point, but it wouldn't
be easy.

While we were at it, we also require distinct month names,
day-of-week names, and AM/PM markers, just because we were
near that place in the code.
  • Loading branch information
dkhalanskyjb authored Apr 29, 2024
1 parent ce30554 commit e721269
Show file tree
Hide file tree
Showing 8 changed files with 117 additions and 15 deletions.
3 changes: 3 additions & 0 deletions core/common/src/format/DateTimeFormatBuilder.kt
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,9 @@ public sealed interface DateTimeFormatBuilder {
*
* [am] is used for the AM marker (0-11 hours), [pm] is used for the PM marker (12-23 hours).
*
* Empty strings can not be used as markers.
* [IllegalArgumentException] is thrown if either [am] or [pm] is empty or if they are equal.
*
* @see [amPmHour]
*/
public fun amPmMarker(am: String, pm: String)
Expand Down
20 changes: 20 additions & 0 deletions core/common/src/format/LocalDateFormat.kt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ import kotlinx.datetime.internal.format.parser.Copyable

/**
* A description of how month names are formatted.
*
* An [IllegalArgumentException] will be thrown if some month name is empty or there are duplicate names.
*/
public class MonthNames(
/**
Expand All @@ -21,6 +23,14 @@ public class MonthNames(
) {
init {
require(names.size == 12) { "Month names must contain exactly 12 elements" }
names.indices.forEach { ix ->
require(names[ix].isNotEmpty()) { "A month name can not be empty" }
for (ix2 in 0 until ix) {
require(names[ix] != names[ix2]) {
"Month names must be unique, but '${names[ix]}' was repeated"
}
}
}
}

/**
Expand Down Expand Up @@ -63,6 +73,8 @@ internal fun MonthNames.toKotlinCode(): String = when (this.names) {

/**
* A description of how day of week names are formatted.
*
* An [IllegalArgumentException] will be thrown if some day-of-week name is empty or there are duplicate names.
*/
public class DayOfWeekNames(
/**
Expand All @@ -72,6 +84,14 @@ public class DayOfWeekNames(
) {
init {
require(names.size == 7) { "Day of week names must contain exactly 7 elements" }
names.indices.forEach { ix ->
require(names[ix].isNotEmpty()) { "A day-of-week name can not be empty" }
for (ix2 in 0 until ix) {
require(names[ix] != names[ix2]) {
"Day-of-week names must be unique, but '${names[ix]}' was repeated"
}
}
}
}

/**
Expand Down
30 changes: 19 additions & 11 deletions core/common/src/internal/format/FormatStructure.kt
Original file line number Diff line number Diff line change
Expand Up @@ -160,13 +160,17 @@ internal class OptionalFormatStructure<in T>(
listOf(
ConstantFormatStructure<T>(onZero).parser(),
ParserStructure(
listOf(
UnconditionalModification {
for (field in fields) {
field.assignDefault(it)
if (fields.isEmpty()) {
emptyList()
} else {
listOf(
UnconditionalModification {
for (field in fields) {
field.assignDefault(it)
}
}
}
),
)
},
emptyList()
)
).concat()
Expand All @@ -176,12 +180,16 @@ internal class OptionalFormatStructure<in T>(
override fun formatter(): FormatterStructure<T> {
val formatter = format.formatter()
val predicate = conjunctionPredicate(fields.map { it.isDefaultComparisonPredicate() })
return ConditionalFormatter(
listOf(
predicate::test to ConstantStringFormatterStructure(onZero),
Truth::test to formatter
return if (predicate is Truth) {
ConstantStringFormatterStructure(onZero)
} else {
ConditionalFormatter(
listOf(
predicate::test to ConstantStringFormatterStructure(onZero),
Truth::test to formatter
)
)
)
}
}

private class PropertyWithDefault<in T, E> private constructor(
Expand Down
15 changes: 11 additions & 4 deletions core/common/src/internal/format/parser/Parser.kt
Original file line number Diff line number Diff line change
Expand Up @@ -49,17 +49,21 @@ internal fun <T> List<ParserStructure<T>>.concat(): ParserStructure<T> {
ParserStructure(operations, followedBy.map { it.append(other) })
}

fun <T> ParserStructure<T>.simplify(): ParserStructure<T> {
fun <T> ParserStructure<T>.simplify(unconditionalModifications: List<UnconditionalModification<T>>): ParserStructure<T> {
val newOperations = mutableListOf<ParserOperation<T>>()
var currentNumberSpan: MutableList<NumberConsumer<T>>? = null
// joining together the number consumers in this parser before the first alternative
val unconditionalModificationsForTails = unconditionalModifications.toMutableList()
// joining together the number consumers in this parser before the first alternative;
// collecting the unconditional modifications to push them to the end of all the parser's branches.
for (op in operations) {
if (op is NumberSpanParserOperation) {
if (currentNumberSpan != null) {
currentNumberSpan.addAll(op.consumers)
} else {
currentNumberSpan = op.consumers.toMutableList()
}
} else if (op is UnconditionalModification) {
unconditionalModificationsForTails.add(op)
} else {
if (currentNumberSpan != null) {
newOperations.add(NumberSpanParserOperation(currentNumberSpan))
Expand All @@ -69,7 +73,7 @@ internal fun <T> List<ParserStructure<T>>.concat(): ParserStructure<T> {
}
}
val mergedTails = followedBy.flatMap {
val simplified = it.simplify()
val simplified = it.simplify(unconditionalModificationsForTails)
// parser `ParserStructure(emptyList(), p)` is equivalent to `p`,
// unless `p` is empty. For example, ((a|b)|(c|d)) is equivalent to (a|b|c|d).
// As a special case, `ParserStructure(emptyList(), emptyList())` represents a parser that recognizes an empty
Expand All @@ -78,6 +82,9 @@ internal fun <T> List<ParserStructure<T>>.concat(): ParserStructure<T> {
simplified.followedBy.ifEmpty { listOf(simplified) }
else
listOf(simplified)
}.ifEmpty {
// preserving the invariant that `mergedTails` contains all unconditional modifications
listOf(ParserStructure(unconditionalModificationsForTails, emptyList()))
}
return if (currentNumberSpan == null) {
// the last operation was not a number span, or it was a number span that we are allowed to interrupt
Expand Down Expand Up @@ -115,7 +122,7 @@ internal fun <T> List<ParserStructure<T>>.concat(): ParserStructure<T> {
}
}
val naiveParser = foldRight(ParserStructure<T>(emptyList(), emptyList())) { parser, acc -> parser.append(acc) }
return naiveParser.simplify()
return naiveParser.simplify(emptyList())
}

internal interface Copyable<Self> {
Expand Down
2 changes: 2 additions & 0 deletions core/common/src/internal/format/parser/ParserOperation.kt
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ internal class StringSetParserOperation<Output>(

init {
for (string in strings) {
require(string.isNotEmpty()) { "Found an empty string in $whatThisExpects" }
var node = trie
for (char in string) {
val searchResult = node.children.binarySearchBy(char.toString()) { it.first }
Expand All @@ -166,6 +167,7 @@ internal class StringSetParserOperation<Output>(
node.children[searchResult].second
}
}
require(!node.isTerminal) { "The string '$string' was passed several times" }
node.isTerminal = true
}
fun reduceTrie(trie: TrieNode) {
Expand Down
12 changes: 12 additions & 0 deletions core/common/test/format/DateTimeFormatTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,18 @@ class DateTimeFormatTest {
}
}
}

@Test
fun testOptionalBetweenConsecutiveNumbers() {
val format = UtcOffset.Format {
offsetHours(Padding.NONE)
optional {
optional { offsetSecondsOfMinute() }
offsetMinutesOfHour()
}
}
assertEquals(UtcOffset(-7, -30), format.parse("-730"))
}
}

fun <T> DateTimeFormat<T>.assertCanNotParse(input: String) {
Expand Down
32 changes: 32 additions & 0 deletions core/common/test/format/LocalDateFormatTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,38 @@ class LocalDateFormatTest {
assertEquals("2020 Jan 05", format.format(LocalDate(2020, 1, 5)))
}

@Test
fun testEmptyMonthNames() {
val names = MonthNames.ENGLISH_FULL.names
for (i in 0 until 12) {
val newNames = (0 until 12).map { if (it == i) "" else names[it] }
assertFailsWith<IllegalArgumentException> { MonthNames(newNames) }
}
}

@Test
fun testEmptyDayOfWeekNames() {
val names = DayOfWeekNames.ENGLISH_FULL.names
for (i in 0 until 7) {
val newNames = (0 until 7).map { if (it == i) "" else names[it] }
assertFailsWith<IllegalArgumentException> { DayOfWeekNames(newNames) }
}
}

@Test
fun testIdenticalMonthNames() {
assertFailsWith<IllegalArgumentException> {
MonthNames("Jan", "Jan", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")
}
}

@Test
fun testIdenticalDayOfWeekNames() {
assertFailsWith<IllegalArgumentException> {
DayOfWeekNames("Mon", "Tue", "Tue", "Thu", "Fri", "Sat", "Sun")
}
}

private fun test(strings: Map<LocalDate, Pair<String, Set<String>>>, format: DateTimeFormat<LocalDate>) {
for ((date, stringsForDate) in strings) {
val (canonicalString, otherStrings) = stringsForDate
Expand Down
18 changes: 18 additions & 0 deletions core/common/test/format/LocalTimeFormatTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,24 @@ class LocalTimeFormatTest {
}
}

@Test
fun testEmptyAmPmMarkers() {
assertFailsWith<IllegalArgumentException> {
LocalTime.Format {
amPmMarker("", "pm")
}
}
}

@Test
fun testIdenticalAmPmMarkers() {
assertFailsWith<IllegalArgumentException> {
LocalTime.Format {
amPmMarker("pm", "pm")
}
}
}

private fun test(strings: Map<LocalTime, Pair<String, Set<String>>>, format: DateTimeFormat<LocalTime>) {
for ((date, stringsForDate) in strings) {
val (canonicalString, otherStrings) = stringsForDate
Expand Down

0 comments on commit e721269

Please sign in to comment.