Skip to content

Commit

Permalink
Fix paragraph word boundary unicode handling (#541)
Browse files Browse the repository at this point in the history
* Adopt tests for paragraph.getWordBoundary

* Rewrote getWordBoundary and add more TODOs

* Add CodePoint typealias

* More detailed comments and more TODOs

* Add test for no-break space and fix offset==length case

* Address feedback
  • Loading branch information
MatkovIvan authored May 5, 2023
1 parent ab3b980 commit 1682d2f
Show file tree
Hide file tree
Showing 6 changed files with 462 additions and 79 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,35 +15,31 @@
*/
package androidx.compose.ui.text

internal actual fun strongDirectionType(codePoint: Int): StrongDirectionType =
codePoint.getDirectionality().toStrongDirectionType()

internal actual fun Char.isNeutralDirection(): Boolean =
directionality.isNeutralDirection()

/**
* Get the Unicode directionality of a character.
*/
private fun Int.getDirectionality(): CharDirectionality =
CharDirectionality.valueOf(Character.getDirectionality(this).toInt())

/**
* Get strong (R, L or AL) direction type.
* See https://www.unicode.org/reports/tr9/
*/
private fun CharDirectionality.toStrongDirectionType() = when (this) {
CharDirectionality.LEFT_TO_RIGHT -> StrongDirectionType.Ltr
internal actual fun CodePoint.strongDirectionType(): StrongDirectionType =
when (getDirectionality()) {
CharDirectionality.LEFT_TO_RIGHT -> StrongDirectionType.Ltr

CharDirectionality.RIGHT_TO_LEFT,
CharDirectionality.RIGHT_TO_LEFT_ARABIC -> StrongDirectionType.Rtl
CharDirectionality.RIGHT_TO_LEFT,
CharDirectionality.RIGHT_TO_LEFT_ARABIC -> StrongDirectionType.Rtl

else -> StrongDirectionType.None
}
else -> StrongDirectionType.None
}
internal actual fun CodePoint.isNeutralDirection(): Boolean =
when (getDirectionality()) {
CharDirectionality.OTHER_NEUTRALS,
CharDirectionality.WHITESPACE,
CharDirectionality.BOUNDARY_NEUTRAL -> true

private fun CharDirectionality.isNeutralDirection(): Boolean = when (this) {
CharDirectionality.OTHER_NEUTRALS,
CharDirectionality.WHITESPACE,
CharDirectionality.BOUNDARY_NEUTRAL -> true
else -> false
}

else -> false
}
/**
* Get the Unicode directionality of a character.
*/
private fun CodePoint.getDirectionality(): CharDirectionality =
CharDirectionality.valueOf(Character.getDirectionality(this).toInt())
Original file line number Diff line number Diff line change
Expand Up @@ -18,29 +18,25 @@ package androidx.compose.ui.text
import org.jetbrains.skia.icu.CharDirection


internal actual fun strongDirectionType(codePoint: Int): StrongDirectionType =
CharDirection.of(codePoint).toStrongDirectionType()

internal actual fun Char.isNeutralDirection(): Boolean =
CharDirection.of(code).isNeutralDirection()

/**
* Get strong (R, L or AL) direction type.
* See https://www.unicode.org/reports/tr9/
*/
private fun Int.toStrongDirectionType() = when (this) {
CharDirection.LEFT_TO_RIGHT -> StrongDirectionType.Ltr
internal actual fun CodePoint.strongDirectionType(): StrongDirectionType =
when (CharDirection.of(this)) {
CharDirection.LEFT_TO_RIGHT -> StrongDirectionType.Ltr

CharDirection.RIGHT_TO_LEFT,
CharDirection.RIGHT_TO_LEFT_ARABIC -> StrongDirectionType.Rtl
CharDirection.RIGHT_TO_LEFT,
CharDirection.RIGHT_TO_LEFT_ARABIC -> StrongDirectionType.Rtl

else -> StrongDirectionType.None
}
else -> StrongDirectionType.None
}

private fun Int.isNeutralDirection(): Boolean = when (this) {
CharDirection.OTHER_NEUTRAL,
CharDirection.WHITE_SPACE_NEUTRAL,
CharDirection.BOUNDARY_NEUTRAL -> true
internal actual fun CodePoint.isNeutralDirection(): Boolean =
when (CharDirection.of(this)) {
CharDirection.OTHER_NEUTRAL,
CharDirection.WHITE_SPACE_NEUTRAL,
CharDirection.BOUNDARY_NEUTRAL -> true

else -> false
}
else -> false
}
Original file line number Diff line number Diff line change
Expand Up @@ -43,18 +43,43 @@ internal value class StrongDirectionType private constructor(val value: Int) {
}
}

internal expect fun strongDirectionType(codePoint: Int): StrongDirectionType
// TODO Remove once it's available in common stdlib https://youtrack.jetbrains.com/issue/KT-23251
internal typealias CodePoint = Int

/**
* Converts a surrogate pair to a unicode code point.
*/
private fun Char.Companion.toCodePoint(high: Char, low: Char): CodePoint =
(((high - MIN_HIGH_SURROGATE) shl 10) or (low - MIN_LOW_SURROGATE)) + 0x10000

/**
* The minimum value of a supplementary code point, `\u0x10000`.
*/
private const val MIN_SUPPLEMENTARY_CODE_POINT: Int = 0x10000

/**
* The maximum value of a Unicode code point.
*/
private const val MAX_CODE_POINT = 0X10FFFF

internal fun CodePoint.charCount(): Int = if (this >= MIN_SUPPLEMENTARY_CODE_POINT) 2 else 1

/**
* Checks if the codepoint specified is a supplementary codepoint or not.
*/
internal fun CodePoint.isSupplementaryCodePoint(): Boolean =
this in MIN_SUPPLEMENTARY_CODE_POINT..MAX_CODE_POINT

// TODO: Use unicode code point instead of Char
internal expect fun Char.isNeutralDirection(): Boolean
internal expect fun CodePoint.strongDirectionType(): StrongDirectionType
internal expect fun CodePoint.isNeutralDirection(): Boolean

/**
* Determine direction based on the first strong directional character.
* Only considers the characters outside isolate pairs.
*/
internal fun String.firstStrongDirectionType(): StrongDirectionType {
for (codePoint in codePointsOutsideDirectionalIsolate) {
return when (val strongDirectionType = strongDirectionType(codePoint)) {
return when (val strongDirectionType = codePoint.strongDirectionType()) {
StrongDirectionType.None -> continue
else -> strongDirectionType
}
Expand Down Expand Up @@ -89,7 +114,7 @@ private val String.codePointsOutsideDirectionalIsolate get() = sequence {
}
}

private val String.codePoints get() = sequence {
internal val String.codePoints get() = sequence {
var index = 0
while (index < length) {
val codePoint = codePointAt(index)
Expand All @@ -98,8 +123,10 @@ private val String.codePoints get() = sequence {
}
}

// TODO Remove once it's available in common stdlib https://youtrack.jetbrains.com/issue/KT-23251
private fun String.codePointAt(index: Int): Int {
/**
* Returns the character (Unicode code point) at the specified index.
*/
internal fun String.codePointAt(index: Int): CodePoint {
val high = this[index]
if (high.isHighSurrogate() && index + 1 < this.length) {
val low = this[index + 1]
Expand All @@ -111,15 +138,15 @@ private fun String.codePointAt(index: Int): Int {
}

/**
* Converts a surrogate pair to a unicode code point.
* Returns the character (Unicode code point) before the specified index.
*/
private fun Char.Companion.toCodePoint(high: Char, low: Char): Int =
(((high - MIN_HIGH_SURROGATE) shl 10) or (low - MIN_LOW_SURROGATE)) + 0x10000

/**
* The minimum value of a supplementary code point, `\u0x10000`.
*/
private const val MIN_SUPPLEMENTARY_CODE_POINT: Int = 0x10000

// TODO Remove once it's available in common stdlib https://youtrack.jetbrains.com/issue/KT-23251
private fun Int.charCount(): Int = if (this >= MIN_SUPPLEMENTARY_CODE_POINT) 2 else 1
internal fun String.codePointBefore(index: Int): CodePoint {
val low = this[index]
if (low.isLowSurrogate() && index - 1 >= 0) {
val high = this[index - 1]
if (high.isHighSurrogate()) {
return Char.toCodePoint(high, low)
}
}
return low.code
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import androidx.compose.ui.text.style.ResolvedTextDirection
import androidx.compose.ui.text.style.TextDecoration
import androidx.compose.ui.unit.Constraints
import kotlin.math.floor
import org.jetbrains.skia.IRange
import org.jetbrains.skia.paragraph.*

internal class SkiaParagraph(
Expand Down Expand Up @@ -165,6 +166,7 @@ internal class SkiaParagraph(
} ?: 0f

private fun lineMetricsForOffset(offset: Int): LineMetrics? {
checkOffsetIsValid(offset)
val metrics = lineMetrics
for (line in metrics) {
if (offset < line.endIncludingNewline) {
Expand Down Expand Up @@ -209,8 +211,7 @@ internal class SkiaParagraph(
override fun isLineEllipsized(lineIndex: Int) = false

override fun getLineForOffset(offset: Int) =
lineMetricsForOffset(offset)?.run { lineNumber.toInt() }
?: 0
lineMetricsForOffset(offset)?.lineNumber ?: 0

override fun getLineForVerticalPosition(vertical: Float): Int {
return getLineMetricsForVerticalPosition(vertical)?.lineNumber ?: 0
Expand Down Expand Up @@ -266,7 +267,8 @@ internal class SkiaParagraph(
}

private fun getBoxForwardByOffset(offset: Int): TextBox? {
var to = offset + 1
checkOffsetIsValid(offset)
var to = offset + 1 // TODO: Use unicode code points (CodePoint.charCount() instead of +1)
while (to <= text.length) {
val box = paragraph.getRectsForRange(
offset, to,
Expand All @@ -275,12 +277,13 @@ internal class SkiaParagraph(
if (box != null) {
return box
}
to += 1
to += 1 // TODO: Use unicode code points (CodePoint.charCount() instead of +1)
}
return null
}

private fun getBoxBackwardByOffset(offset: Int, end: Int = offset): TextBox? {
checkOffsetIsValid(offset)
var from = offset - 1
val isRtl = paragraphIntrinsics.textDirection == ResolvedTextDirection.Rtl
while (from >= 0) {
Expand Down Expand Up @@ -312,6 +315,7 @@ internal class SkiaParagraph(
val rect = SkRect(width, box.rect.bottom, width, bottom)
TextBox(rect, box.direction)
} else {
// TODO: Use unicode code points (CodePoint.charCount() instead of +1)
val nextBox = paragraph.getRectsForRange(
offset, offset + 1,
RectHeightMode.STRUT, RectWidthMode.TIGHT
Expand All @@ -331,6 +335,7 @@ internal class SkiaParagraph(
}

override fun getParagraphDirection(offset: Int): ResolvedTextDirection =
// TODO: It should be position based (direction isolate for example)
paragraphIntrinsics.textDirection

override fun getBidiRunDirection(offset: Int): ResolvedTextDirection =
Expand Down Expand Up @@ -394,12 +399,13 @@ internal class SkiaParagraph(
correctedGlyphPosition = paragraph.getGlyphPositionAtCoordinate(leftX + 1f, position.y).position
} else if (position.x >= rightX) { // when clicked to the right of a text line
correctedGlyphPosition = paragraph.getGlyphPositionAtCoordinate(rightX - 1f, position.y).position
val isNeutralChar = if (correctedGlyphPosition in text.indices) {
text.codePointAt(correctedGlyphPosition).isNeutralDirection()
} else false

// TODO: Use unicode code points
val isNeutralChar = text.getOrNull(correctedGlyphPosition)?.isNeutralDirection() ?: false
// For RTL blocks, the position is still not correct, so we have to subtract 1 from the returned result
if (!isNeutralChar && getBoxBackwardByOffset(correctedGlyphPosition)?.direction == Direction.RTL) {
correctedGlyphPosition -= 1
correctedGlyphPosition -= 1 // TODO Check if it should be CodePoint.charCount()
}
}

Expand All @@ -412,16 +418,29 @@ internal class SkiaParagraph(
}

override fun getWordBoundary(offset: Int): TextRange {
return when {
(text.getOrNull(offset)?.isLetterOrDigit() ?: false) -> paragraph.getWordBoundary(offset).let {
TextRange(it.start, it.end)
}
(text.getOrNull(offset - 1)?.isLetterOrDigit() ?: false) ->
paragraph.getWordBoundary(offset - 1).let {
TextRange(it.start, it.end)
}
else -> TextRange(offset, offset)
checkOffsetIsValid(offset)

// To match with Android implementation it should return:
// - empty range if offset is between spaces
// - previous word if offset right after that
// - TODO: punctuation doesn't divide words
// NOTE: Android punctuation handling has some issues at the moment, so it doesn't clear
// what expected behavior is.

// Android uses `isLetterOrDigit` for codepoints, but we have it only for chars.
// Using `Char.isWhitespace` instead because whitespaces are not supplementary code units.
// TODO: Replace chars to code units to make this code future proof.
if (offset < text.length && text[offset].isWhitespace() || offset == text.length) {
// If it's whitespace, we're sure that it's not surrogate.
return if (offset > 0 && !text[offset - 1].isWhitespace()) {
paragraph.getWordBoundary(offset - 1).toTextRange()
} else TextRange(offset, offset)
}

// Skia paragraph should handle unicode correctly, but it doesn't match Android behavior.
// It uses ICU's BreakIterator under the hood, so we can use
// `BreakIterator.makeWordInstance()` without calling skia's `getWordBoundary` at all.
return paragraph.getWordBoundary(offset).toTextRange()
}

override fun paint(
Expand Down Expand Up @@ -493,4 +512,15 @@ internal class SkiaParagraph(
}
paragraph.paint(canvas.nativeCanvas, 0.0f, 0.0f)
}

/**
* Check if the given offset is in the given range.
*/
private inline fun checkOffsetIsValid(offset: Int) {
require(offset in 0..text.length) {
("Invalid offset: $offset. Valid range is [0, ${text.length}]")
}
}
}

private fun IRange.toTextRange() = TextRange(start, end)
Loading

0 comments on commit 1682d2f

Please sign in to comment.