diff --git a/lib/commons/text/has-unicode.js b/lib/commons/text/has-unicode.js index 9d10ad6834..a7ac371906 100644 --- a/lib/commons/text/has-unicode.js +++ b/lib/commons/text/has-unicode.js @@ -1,7 +1,8 @@ import { getUnicodeNonBmpRegExp, getSupplementaryPrivateUseRegExp, - getPunctuationRegExp + getPunctuationRegExp, + getCategoryFormatRegExp } from './unicode'; import emojiRegexText from 'emoji-regex'; @@ -20,19 +21,22 @@ import emojiRegexText from 'emoji-regex'; */ function hasUnicode(str, options) { const { emoji, nonBmp, punctuations } = options; + let value = false; + if (emoji) { - return emojiRegexText().test(str); + value ||= emojiRegexText().test(str); } if (nonBmp) { - return ( + value ||= getUnicodeNonBmpRegExp().test(str) || - getSupplementaryPrivateUseRegExp().test(str) - ); + getSupplementaryPrivateUseRegExp().test(str) || + getCategoryFormatRegExp().test(str); } if (punctuations) { - return getPunctuationRegExp().test(str); + value ||= getPunctuationRegExp().test(str); } - return false; + + return value; } export default hasUnicode; diff --git a/lib/commons/text/remove-unicode.js b/lib/commons/text/remove-unicode.js index ddf10ae7ed..4527cfc871 100644 --- a/lib/commons/text/remove-unicode.js +++ b/lib/commons/text/remove-unicode.js @@ -1,7 +1,8 @@ import { getUnicodeNonBmpRegExp, getSupplementaryPrivateUseRegExp, - getPunctuationRegExp + getPunctuationRegExp, + getCategoryFormatRegExp } from './unicode.js'; import emojiRegexText from 'emoji-regex'; @@ -25,8 +26,10 @@ function removeUnicode(str, options) { str = str.replace(emojiRegexText(), ''); } if (nonBmp) { - str = str.replace(getUnicodeNonBmpRegExp(), ''); - str = str.replace(getSupplementaryPrivateUseRegExp(), ''); + str = str + .replace(getUnicodeNonBmpRegExp(), '') + .replace(getSupplementaryPrivateUseRegExp(), '') + .replace(getCategoryFormatRegExp(), ''); } if (punctuations) { str = str.replace(getPunctuationRegExp(), ''); diff --git a/lib/commons/text/unicode.js b/lib/commons/text/unicode.js index 9a8fd11901..f76d354cba 100644 --- a/lib/commons/text/unicode.js +++ b/lib/commons/text/unicode.js @@ -83,3 +83,15 @@ export function getSupplementaryPrivateUseRegExp() { // ┏━━━━━━┻━━━━━━┓┏━━━━━━┻━━━━━━┓ return /[\uDB80-\uDBBF][\uDC00-\uDFFF]/g; } + +/** + * Get regular expression for unicode format category. + * When we drop IE11 we can instead use unicode character escape `/p{Cf}/gu` + * Reference: + * - https://www.compart.com/en/unicode/category/Cf + * + * @returns {RegExp} + */ +export function getCategoryFormatRegExp() { + return /[\xAD\u0600-\u0605\u061C\u06DD\u070F\u08E2\u180E\u200B-\u200F\u202A-\u202E\u2060-\u2064\u2066-\u206F\uFEFF\uFFF9-\uFFFB]|\uD804[\uDCBD\uDCCD]|\uD80D[\uDC30-\uDC38]|\uD82F[\uDCA0-\uDCA3]|\uD834[\uDD73-\uDD7A]|\uDB40[\uDC01\uDC20-\uDC7F]/g; +} diff --git a/test/commons/text/unicode.js b/test/commons/text/unicode.js index b5d8b2489e..819e01d271 100644 --- a/test/commons/text/unicode.js +++ b/test/commons/text/unicode.js @@ -1,86 +1,94 @@ -describe('text.hasUnicode', function () { - describe('text.hasUnicode, characters of type Non Bi Multilingual Plane', function () { - it('returns false when given string is alphanumeric', function () { - var actual = axe.commons.text.hasUnicode('1 apple', { +describe('text.hasUnicode', () => { + describe('text.hasUnicode, characters of type Non Bi Multilingual Plane', () => { + it('returns false when given string is alphanumeric', () => { + const actual = axe.commons.text.hasUnicode('1 apple', { nonBmp: true }); assert.isFalse(actual); }); - it('returns false when given string is number', function () { - var actual = axe.commons.text.hasUnicode('100', { + it('returns false when given string is number', () => { + const actual = axe.commons.text.hasUnicode('100', { nonBmp: true }); assert.isFalse(actual); }); - it('returns false when given string is a sentence', function () { - var actual = axe.commons.text.hasUnicode('Earth is round', { + it('returns false when given string is a sentence', () => { + const actual = axe.commons.text.hasUnicode('Earth is round', { nonBmp: true }); assert.isFalse(actual); }); - it('returns true when given string is a phonetic extension', function () { - var actual = axe.commons.text.hasUnicode('ᴁ', { + it('returns true when given string is a phonetic extension', () => { + const actual = axe.commons.text.hasUnicode('ᴁ', { nonBmp: true }); assert.isTrue(actual); }); - it('returns true when given string is a combining diacritical marks supplement', function () { - var actual = axe.commons.text.hasUnicode('ᴁ', { + it('returns true when given string is a combining diacritical marks supplement', () => { + const actual = axe.commons.text.hasUnicode('ᴁ', { nonBmp: true }); assert.isTrue(actual); }); - it('returns true when given string is a currency symbols', function () { - var actual = axe.commons.text.hasUnicode('₨ 20000', { + it('returns true when given string is a currency symbols', () => { + const actual = axe.commons.text.hasUnicode('₨ 20000', { nonBmp: true }); assert.isTrue(actual); }); - it('returns true when given string has arrows', function () { - var actual = axe.commons.text.hasUnicode('← turn left', { + it('returns true when given string has arrows', () => { + const actual = axe.commons.text.hasUnicode('← turn left', { nonBmp: true }); assert.isTrue(actual); }); - it('returns true when given string has geometric shapes', function () { - var actual = axe.commons.text.hasUnicode('◓', { + it('returns true when given string has geometric shapes', () => { + const actual = axe.commons.text.hasUnicode('◓', { nonBmp: true }); assert.isTrue(actual); }); - it('returns true when given string has math operators', function () { - var actual = axe.commons.text.hasUnicode('√4 = 2', { + it('returns true when given string has math operators', () => { + const actual = axe.commons.text.hasUnicode('√4 = 2', { nonBmp: true }); assert.isTrue(actual); }); - it('returns true when given string has windings font', function () { - var actual = axe.commons.text.hasUnicode('▽', { + it('returns true when given string has windings font', () => { + const actual = axe.commons.text.hasUnicode('▽', { nonBmp: true }); assert.isTrue(actual); }); - it('returns true for a string with characters in supplementary private use area A', function () { - var actual = axe.commons.text.hasUnicode('\uDB80\uDFFE', { + it('returns true for a string with characters in supplementary private use area A', () => { + const actual = axe.commons.text.hasUnicode('\uDB80\uDFFE', { + nonBmp: true + }); + assert.isTrue(actual); + }); + + it('returns true when given string has format unicode', () => { + // zero-width spacer character U+200B + const actual = axe.commons.text.hasUnicode('\u200BHello World', { nonBmp: true }); assert.isTrue(actual); }); }); - describe('text.hasUnicode, characters of type Emoji', function () { - it('returns false when given string is alphanumeric', function () { - var actual = axe.commons.text.hasUnicode( + describe('text.hasUnicode, characters of type Emoji', () => { + it('returns false when given string is alphanumeric', () => { + const actual = axe.commons.text.hasUnicode( '1 apple a day, keeps the doctor away', { emoji: true @@ -89,60 +97,60 @@ describe('text.hasUnicode', function () { assert.isFalse(actual); }); - it('returns false when given string is number', function () { - var actual = axe.commons.text.hasUnicode('100', { + it('returns false when given string is number', () => { + const actual = axe.commons.text.hasUnicode('100', { emoji: true }); assert.isFalse(actual); }); - it('returns false when given string is a sentence', function () { - var actual = axe.commons.text.hasUnicode('Earth is round', { + it('returns false when given string is a sentence', () => { + const actual = axe.commons.text.hasUnicode('Earth is round', { emoji: true }); assert.isFalse(actual); }); - it('returns true when given string has emoji', function () { - var actual = axe.commons.text.hasUnicode('🌎 is round', { + it('returns true when given string has emoji', () => { + const actual = axe.commons.text.hasUnicode('🌎 is round', { emoji: true }); assert.isTrue(actual); }); - it('returns true when given string has emoji', function () { - var actual = axe.commons.text.hasUnicode('plant a 🌱', { + it('returns true when given string has emoji', () => { + const actual = axe.commons.text.hasUnicode('plant a 🌱', { emoji: true }); assert.isTrue(actual); }); }); - describe('text.hasUnicode, characters of type punctuations', function () { - it('returns false when given string is number', function () { - var actual = axe.commons.text.hasUnicode('100', { + describe('text.hasUnicode, characters of type punctuations', () => { + it('returns false when given string is number', () => { + const actual = axe.commons.text.hasUnicode('100', { punctuations: true }); assert.isFalse(actual); }); - it('returns false when given string is a sentence', function () { - var actual = axe.commons.text.hasUnicode('Earth is round', { + it('returns false when given string is a sentence', () => { + const actual = axe.commons.text.hasUnicode('Earth is round', { punctuations: true }); assert.isFalse(actual); }); - it('returns true when given string has punctuations', function () { - var actual = axe.commons.text.hasUnicode("What's your name?", { + it('returns true when given string has punctuations', () => { + const actual = axe.commons.text.hasUnicode("What's your name?", { punctuations: true }); assert.isTrue(actual); }); - it('returns true for strings with money signs and odd symbols', function () { + it('returns true for strings with money signs and odd symbols', () => { ['£', '¢', '¥', '€', '§', '±'].forEach(function (str) { - var actual = axe.commons.text.hasUnicode(str, { + const actual = axe.commons.text.hasUnicode(str, { punctuations: true }); assert.isTrue(actual); @@ -150,9 +158,9 @@ describe('text.hasUnicode', function () { }); }); - describe('text.hasUnicode, has combination of unicode', function () { - it('returns false when given string is number', function () { - var actual = axe.commons.text.hasUnicode('100', { + describe('text.hasUnicode, has combination of unicode', () => { + it('returns false when given string is number', () => { + const actual = axe.commons.text.hasUnicode('100', { emoji: true, nonBmp: true, punctuations: true @@ -160,8 +168,8 @@ describe('text.hasUnicode', function () { assert.isFalse(actual); }); - it('returns true when given string has unicode characters', function () { - var actual = axe.commons.text.hasUnicode( + it('returns true when given string has unicode characters', () => { + const actual = axe.commons.text.hasUnicode( 'The ☀️ is orange, the ◓ is white.', { emoji: true, @@ -171,61 +179,80 @@ describe('text.hasUnicode', function () { ); assert.isTrue(actual); }); + + it('returns true when given format unicode characters', () => { + // zero-width spacer character U+200B + const actual = axe.commons.text.hasUnicode('\u200BHello World', { + emoji: true, + nonBmp: true, + punctuations: true + }); + assert.isTrue(actual); + }); + + it('returns true when given punctuation characters', () => { + const actual = axe.commons.text.hasUnicode('Earth!!!', { + emoji: true, + nonBmp: true, + punctuations: true + }); + assert.isTrue(actual); + }); }); }); -describe('text.removeUnicode', function () { - it('returns string by removing non BMP unicode ', function () { - var actual = axe.commons.text.removeUnicode('₨₨20000₨₨', { +describe('text.removeUnicode', () => { + it('returns string by removing non BMP unicode ', () => { + const actual = axe.commons.text.removeUnicode('₨₨20000₨₨', { nonBmp: true }); assert.equal(actual, '20000'); }); - it('returns string by removing emoji unicode ', function () { - var actual = axe.commons.text.removeUnicode('☀️Sun 🌎Earth', { + it('returns string by removing emoji unicode ', () => { + const actual = axe.commons.text.removeUnicode('☀️Sun 🌎Earth', { emoji: true }); assert.equal(actual, 'Sun Earth'); }); - it('returns string after removing punctuations from word', function () { - var actual = axe.commons.text.removeUnicode('Earth!!!', { + it('returns string after removing punctuations from word', () => { + const actual = axe.commons.text.removeUnicode('Earth!!!', { punctuations: true }); assert.equal(actual, 'Earth'); }); - it('returns string removing all punctuations', function () { - var actual = axe.commons.text.removeUnicode('', { + it('returns string removing all punctuations', () => { + const actual = axe.commons.text.removeUnicode('', { punctuations: true }); assert.equal(actual, ''); }); - it('returns string removing all private use unicode', function () { - var actual = axe.commons.text.removeUnicode('', { + it('returns string removing all private use unicode', () => { + const actual = axe.commons.text.removeUnicode('', { nonBmp: true }); assert.equal(actual, ''); }); - it('returns string removing all supplementary private use unicode', function () { - var actual = axe.commons.text.removeUnicode('󰀀󿰀󿿽󰏽', { + it('returns string removing all supplementary private use unicode', () => { + const actual = axe.commons.text.removeUnicode('󰀀󿰀󿿽󰏽', { nonBmp: true }); assert.equal(actual, ''); }); - it('returns the string with supplementary private use area A characters removed', function () { - var actual = axe.commons.text.removeUnicode('\uDB80\uDFFE', { + it('returns the string with supplementary private use area A characters removed', () => { + const actual = axe.commons.text.removeUnicode('\uDB80\uDFFE', { nonBmp: true }); assert.equal(actual, ''); }); - it('returns string removing combination of unicode characters', function () { - var actual = axe.commons.text.removeUnicode( + it('returns string removing combination of unicode characters', () => { + const actual = axe.commons.text.removeUnicode( 'The ☀️ is orange, the ◓ is white.', { emoji: true, @@ -235,4 +262,12 @@ describe('text.removeUnicode', function () { ); assert.equal(actual, 'The is orange the is white'); }); + + it('returns string removing format unicode', () => { + // zero-width spacer character U+200B + const actual = axe.commons.text.removeUnicode('\u200BHello World', { + nonBmp: true + }); + assert.equal(actual, 'Hello World'); + }); });