diff --git a/.eslintrc.js b/.eslintrc.js index caeeca403d20..e26a2ba95b38 100644 --- a/.eslintrc.js +++ b/.eslintrc.js @@ -70,6 +70,11 @@ module.exports = { name: "matrix-react-sdk/", message: "Please use matrix-react-sdk/src/index instead", }, + { + name: "emojibase-regex", + message: + "This regex doesn't actually test for emoji. See the docs at https://emojibase.dev/docs/regex/ and prefer our own EMOJI_REGEX from HtmlUtils.", + }, ], patterns: [ { @@ -138,6 +143,11 @@ module.exports = { ], message: "Please use matrix-js-sdk/src/matrix instead", }, + { + group: ["emojibase-regex/emoji*"], + message: + "This regex doesn't actually test for emoji. See the docs at https://emojibase.dev/docs/regex/ and prefer our own EMOJI_REGEX from HtmlUtils.", + }, ], }, ], diff --git a/src/HtmlUtils.tsx b/src/HtmlUtils.tsx index b63ed1dcf0ce..0ad3477d821f 100644 --- a/src/HtmlUtils.tsx +++ b/src/HtmlUtils.tsx @@ -20,7 +20,6 @@ limitations under the License. import React, { LegacyRef, ReactNode } from "react"; import sanitizeHtml from "sanitize-html"; import classNames from "classnames"; -import EMOJIBASE_REGEX from "emojibase-regex"; import katex from "katex"; import { decode } from "html-entities"; import { IContent } from "matrix-js-sdk/src/matrix"; @@ -46,10 +45,17 @@ const SURROGATE_PAIR_PATTERN = /([\ud800-\udbff])([\udc00-\udfff])/; const SYMBOL_PATTERN = /([\u2100-\u2bff])/; // Regex pattern for non-emoji characters that can appear in an "all-emoji" message -// (Zero-Width Joiner, Zero-Width Space, Emoji presentation character, other whitespace) -const EMOJI_SEPARATOR_REGEX = /[\u200D\u200B\s]|\uFE0F/g; +// (Zero-Width Space, other whitespace) +const EMOJI_SEPARATOR_REGEX = /[\u200B\s]/g; -const BIGEMOJI_REGEX = new RegExp(`^(${EMOJIBASE_REGEX.source})+$`, "i"); +// Regex for emoji. This includes any RGI_Emoji sequence followed by an optional +// emoji presentation VS (U+FE0F), but not those sequences that are followed by +// a text presentation VS (U+FE0E). Technically this produces false negatives +// for emoji followed by U+FE0E when the emoji doesn't have a text variant, but +// in practice this doesn't matter. +export const EMOJI_REGEX = /\p{RGI_Emoji}(?!\uFE0E)(?:(? { expect(html).toMatchInlineSnapshot(`"test foo <b>bar"`); }); + it("generates big emoji for emoji made of multiple characters", () => { + const { asFragment } = render(bodyToHtml({ body: "πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘¦ ↔️", msgtype: "m.text" }, [], {}) as ReactElement); + + expect(asFragment()).toMatchSnapshot(); + }); + it("should generate big emoji for an emoji-only reply to a message", () => { const { asFragment } = render( bodyToHtml( @@ -132,6 +138,12 @@ describe("bodyToHtml", () => { expect(asFragment()).toMatchSnapshot(); }); + it("does not mistake characters in text presentation mode for emoji", () => { + const { asFragment } = render(bodyToHtml({ body: "↔", msgtype: "m.text" }, [], {}) as ReactElement); + + expect(asFragment()).toMatchSnapshot(); + }); + describe("feature_latex_maths", () => { beforeEach(() => { jest.spyOn(SettingsStore, "getValue").mockImplementation((feature) => feature === "feature_latex_maths"); diff --git a/test/__snapshots__/HtmlUtils-test.tsx.snap b/test/__snapshots__/HtmlUtils-test.tsx.snap index c33cc46433d3..03de209307fe 100644 --- a/test/__snapshots__/HtmlUtils-test.tsx.snap +++ b/test/__snapshots__/HtmlUtils-test.tsx.snap @@ -1,5 +1,16 @@ // Jest Snapshot v1, https://goo.gl/fbAQLP +exports[`bodyToHtml does not mistake characters in text presentation mode for emoji 1`] = ` + + + ↔ + + +`; + exports[`bodyToHtml feature_latex_maths should not mangle code blocks 1`] = `"

hello

$\\xi$

world

"`; exports[`bodyToHtml feature_latex_maths should not mangle divs 1`] = `"

hello

world
"`; @@ -8,6 +19,29 @@ exports[`bodyToHtml feature_latex_maths should render block katex 1`] = `"

hel exports[`bodyToHtml feature_latex_maths should render inline katex 1`] = `"hello ΞΎ\\xi world"`; +exports[`bodyToHtml generates big emoji for emoji made of multiple characters 1`] = ` + + + + πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘¦ + + + + ↔️ + + + +`; + exports[`bodyToHtml should generate big emoji for an emoji-only reply to a message 1`] = `