From 8a0a1a0565291836cb4561acd08f39ca5ec1dda8 Mon Sep 17 00:00:00 2001 From: Gaurav Kale Date: Thu, 22 Feb 2024 15:20:11 +0800 Subject: [PATCH] Take surrogate pairs into account for caretPositions --- packages/troika-three-text/src/Typesetter.js | 49 ++++++++++++++++---- 1 file changed, 40 insertions(+), 9 deletions(-) diff --git a/packages/troika-three-text/src/Typesetter.js b/packages/troika-three-text/src/Typesetter.js index a5c1389d..95f9d4c2 100644 --- a/packages/troika-three-text/src/Typesetter.js +++ b/packages/troika-three-text/src/Typesetter.js @@ -105,6 +105,10 @@ export function createTypesetter(resolveFonts, bidi) { // In the future we may consider a full Unicode line breaking algorithm impl: https://www.unicode.org/reports/tr14 const BREAK_AFTER_CHARS = new RegExp(`${lineBreakingWhiteSpace}|[\\-\\u007C\\u00AD\\u2010\\u2012-\\u2014\\u2027\\u2056\\u2E17\\u2E40]`) + // glyphs that start with a code point within the surrogate range should be treated as surrogate pairs + const HIGH_SURROGATE_START = 0xd800; + const HIGH_SURROGATE_END = 0xdbff; + /** * Load and parse all the necessary fonts to render a given string of text, then group * them into consecutive runs of characters sharing a font. @@ -517,13 +521,17 @@ export function createTypesetter(resolveFonts, bidi) { caretPositions[charIndex * 4 + 2] = line.baseline + fontData.caretBottom + anchorYOffset //common bottom y caretPositions[charIndex * 4 + 3] = line.baseline + fontData.caretTop + anchorYOffset //common top y - // If we skipped any chars from the previous glyph (due to ligature subs), fill in caret - // positions for those missing char indices; currently this uses a best-guess by dividing + // If we skipped any chars from the previous glyph (due to ligature subs/surrogates), fill in caret + // positions for those missing char indices; currently ligatures uses a best-guess by dividing // the ligature's width evenly. In the future we may try to use the font's LigatureCaretList // table to get better interior caret positions. - const ligCount = charIndex - prevCharIndex - if (ligCount > 1) { - fillLigatureCaretPositions(caretPositions, prevCharIndex, ligCount) + const charCount = charIndex - prevCharIndex + if (charCount > 1) { + if (isSurrogate(text.slice(prevCharIndex, charIndex))) { + fillSurrogateCaretPositions(caretPositions, prevCharIndex, charCount) + } else { + fillLigatureCaretPositions(caretPositions, prevCharIndex, charCount) + } } prevCharIndex = charIndex } @@ -597,11 +605,15 @@ export function createTypesetter(resolveFonts, bidi) { } }) - // Fill in remaining caret positions in case the final character was a ligature + // Fill in remaining caret positions in case the final character was a ligature/surrogate if (caretPositions) { - const ligCount = text.length - prevCharIndex; - if (ligCount > 1) { - fillLigatureCaretPositions(caretPositions, prevCharIndex, ligCount) + const charCount = text.length - prevCharIndex + if (charCount > 1) { + if (isSurrogate(text.slice(prevCharIndex, charIndex))) { + fillSurrogateCaretPositions(caretPositions, prevCharIndex, charCount) + } else { + fillLigatureCaretPositions(caretPositions, prevCharIndex, charCount) + } } } } @@ -677,6 +689,25 @@ export function createTypesetter(resolveFonts, bidi) { } } + function fillSurrogateCaretPositions(caretPositions, charStartIndex, charCount) { + const charStartX = caretPositions[charStartIndex * 4] + const charEndX = caretPositions[charStartIndex * 4 + 1] + const charBottom = caretPositions[charStartIndex * 4 + 2] + const charTop = caretPositions[charStartIndex * 4 + 3] + for (let i = 0; i < charCount; i++) { + const startIndex = (charStartIndex + i) * 4 + caretPositions[startIndex] = charStartX; + caretPositions[startIndex + 1] = charEndX; + caretPositions[startIndex + 2] = charBottom + caretPositions[startIndex + 3] = charTop + } + } + + function isSurrogate(text) { + const firstCodeUnit = text.charCodeAt(0); + return firstCodeUnit >= HIGH_SURROGATE_START && firstCodeUnit <= HIGH_SURROGATE_END; + } + function now() { return (self.performance || Date).now() }