Skip to content

Commit

Permalink
fix: handling of characters outside the BMP (@fehmer) (#5911)
Browse files Browse the repository at this point in the history
Handle multi-byte characters outside the [basic multilingual
plane](https://en.wikipedia.org/wiki/Plane_(Unicode)) correctly.

Fixes #5906
  • Loading branch information
fehmer authored Sep 25, 2024
1 parent 6bf1cb8 commit f9409e3
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 21 deletions.
12 changes: 12 additions & 0 deletions frontend/__tests__/utils/strings.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import * as Strings from "../../src/ts/utils/strings";

describe("string utils", () => {
describe("splitIntoCharacters", () => {
it("splits regular characters", () => {
expect(Strings.splitIntoCharacters("abc")).toEqual(["a", "b", "c"]);
});
it("splits characters outside of the bmp", () => {
expect(Strings.splitIntoCharacters("t𐑩e")).toEqual(["t", "𐑩", "e"]);
});
});
});
5 changes: 3 additions & 2 deletions frontend/src/ts/test/caret.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import * as TestState from "../test/test-state";
import * as TestWords from "./test-words";
import { prefersReducedMotion } from "../utils/misc";
import { convertRemToPixels } from "../utils/numbers";
import { splitIntoCharacters } from "../utils/strings";

export let caretAnimating = true;
const caret = document.querySelector("#caret") as HTMLElement;
Expand Down Expand Up @@ -133,8 +134,8 @@ export async function updatePosition(noAnim = false): Promise<void> {
Config.caretStyle
);

let wordLen = TestWords.words.getCurrent().length;
const inputLen = TestInput.input.current.length;
let wordLen = splitIntoCharacters(TestWords.words.getCurrent()).length;
const inputLen = splitIntoCharacters(TestInput.input.current).length;
if (Config.mode === "zen") wordLen = inputLen;
const activeWordEl = document?.querySelector("#words .active") as HTMLElement;
//insert temporary character so the caret will work in zen mode
Expand Down
43 changes: 24 additions & 19 deletions frontend/src/ts/test/test-ui.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,14 @@ function createHintsHtml(
activeWordLetters: NodeListOf<Element>,
inputWord: string
): string {
const inputChars = Strings.splitIntoCharacters(inputWord);
let hintsHtml = "";
for (const adjacentLetters of incorrectLtrIndices) {
for (const indx of adjacentLetters) {
const blockLeft = (activeWordLetters[indx] as HTMLElement).offsetLeft;
const blockWidth = (activeWordLetters[indx] as HTMLElement).offsetWidth;
const blockIndices = `[${indx}]`;
const blockChars = inputWord[indx];
const blockChars = inputChars[indx];

hintsHtml +=
`<hint data-length=1 data-chars-index=${blockIndices}` +
Expand Down Expand Up @@ -332,16 +333,17 @@ function getWordHTML(word: string): string {
const funbox = FunboxList.get(Config.funbox).find(
(f) => f.functions?.getWordHtml
);
for (let c = 0; c < word.length; c++) {
const chars = Strings.splitIntoCharacters(word);
for (const char of chars) {
if (funbox?.functions?.getWordHtml) {
retval += funbox.functions.getWordHtml(word.charAt(c), true);
} else if (word.charAt(c) === "\t") {
retval += funbox.functions.getWordHtml(char, true);
} else if (char === "\t") {
retval += `<letter class='tabChar'><i class="fas fa-long-arrow-alt-right fa-fw"></i></letter>`;
} else if (word.charAt(c) === "\n") {
} else if (char === "\n") {
newlineafter = true;
retval += `<letter class='nlChar'><i class="fas fa-level-down-alt fa-rotate-90 fa-fw"></i></letter>`;
} else {
retval += "<letter>" + word.charAt(c) + "</letter>";
retval += "<letter>" + char + "</letter>";
}
}
retval += "</div>";
Expand Down Expand Up @@ -833,10 +835,12 @@ export async function updateActiveWordLetters(
(f) => f.functions?.getWordHtml
);

for (let i = 0; i < input.length; i++) {
const charCorrect = currentWord[i] === input[i];
const inputChars = Strings.splitIntoCharacters(input);
const currentWordChars = Strings.splitIntoCharacters(currentWord);
for (let i = 0; i < inputChars.length; i++) {
const charCorrect = currentWordChars[i] === inputChars[i];

let currentLetter = currentWord[i] as string;
let currentLetter = currentWordChars[i] as string;
let tabChar = "";
let nlChar = "";
if (funbox?.functions?.getWordHtml) {
Expand All @@ -862,13 +866,13 @@ export async function updateActiveWordLetters(
) {
ret += `<letter class="dead">${
Config.indicateTypos === "replace"
? input[i] === " "
? inputChars[i] === " "
? "_"
: input[i]
: inputChars[i]
: currentLetter
}</letter>`;
} else if (currentLetter === undefined) {
let letter = input[i];
let letter = inputChars[i];
if (letter === " " || letter === "\t" || letter === "\n") {
letter = "_";
}
Expand All @@ -877,9 +881,9 @@ export async function updateActiveWordLetters(
ret +=
`<letter class="incorrect ${tabChar}${nlChar}">` +
(Config.indicateTypos === "replace"
? input[i] === " "
? inputChars[i] === " "
? "_"
: input[i]
: inputChars[i]
: currentLetter) +
"</letter>";
if (Config.indicateTypos === "below") {
Expand All @@ -893,15 +897,16 @@ export async function updateActiveWordLetters(
}
}

for (let i = input.length; i < currentWord.length; i++) {
for (let i = inputChars.length; i < currentWordChars.length; i++) {
const currentLetter = currentWordChars[i];
if (funbox?.functions?.getWordHtml) {
ret += funbox.functions.getWordHtml(currentWord[i] as string, true);
} else if (currentWord[i] === "\t") {
ret += funbox.functions.getWordHtml(currentLetter as string, true);
} else if (currentLetter === "\t") {
ret += `<letter class='tabChar'><i class="fas fa-long-arrow-alt-right fa-fw"></i></letter>`;
} else if (currentWord[i] === "\n") {
} else if (currentLetter === "\n") {
ret += `<letter class='nlChar'><i class="fas fa-level-down-alt fa-rotate-90 fa-fw"></i></letter>`;
} else {
ret += `<letter>` + currentWord[i] + "</letter>";
ret += `<letter>` + currentLetter + "</letter>";
}
}
}
Expand Down
15 changes: 15 additions & 0 deletions frontend/src/ts/utils/strings.ts
Original file line number Diff line number Diff line change
Expand Up @@ -149,3 +149,18 @@ export function cleanTypographySymbols(textToClean: string): string {
(char) => specials[char as keyof typeof specials] || ""
);
}

/**
* Split a string into characters. This supports multi-byte characters outside of the [Basic Multilinugal Plane](https://en.wikipedia.org/wiki/Plane_(Unicode).
* Using `string.length` and `string[index]` does not work.
* @param s string to be tokenized into characters
* @returns array of characters
*/
export function splitIntoCharacters(s: string): string[] {
const result: string[] = [];
for (const t of s) {
result.push(t);
}

return result;
}

0 comments on commit f9409e3

Please sign in to comment.