Skip to content

Commit

Permalink
Js impl of refine accepted length for ph extensions pr#2526 (#2530)
Browse files Browse the repository at this point in the history
* See PR #2526 for notes.
  • Loading branch information
penmetsaa authored Oct 23, 2020
1 parent 32719cf commit 55b2646
Show file tree
Hide file tree
Showing 2 changed files with 252 additions and 34 deletions.
141 changes: 107 additions & 34 deletions javascript/i18n/phonenumbers/phonenumberutil.js
Original file line number Diff line number Diff line change
Expand Up @@ -744,46 +744,119 @@ i18n.phonenumbers.PhoneNumberUtil.DEFAULT_EXTN_PREFIX_ = ' ext. ';


/**
* Pattern to capture digits used in an extension.
* Places a maximum length of '7' for an extension.
* Helper method for constructing regular expressions for parsing. Creates
* an expression that captures up to max_length digits.
*
* @const
* @type {string}
* @return {string} RegEx pattern to capture extension digits.
* @private
*/
i18n.phonenumbers.PhoneNumberUtil.CAPTURING_EXTN_DIGITS_ =
'([' + i18n.phonenumbers.PhoneNumberUtil.VALID_DIGITS_ + ']{1,7})';

i18n.phonenumbers.PhoneNumberUtil.extnDigits_ =
function(maxLength) {
return ('([' + i18n.phonenumbers.PhoneNumberUtil.VALID_DIGITS_ + ']'
+ '{1,' + maxLength + '})');
};

/**
* Regexp of all possible ways to write extensions, for use when parsing. This
* will be run as a case-insensitive regexp match. Wide character versions are
* also provided after each ASCII version. There are three regular expressions
* here. The first covers RFC 3966 format, where the extension is added using
* ';ext='. The second more generic one starts with optional white space and
* ends with an optional full stop (.), followed by zero or more spaces/tabs
* /commas and then the numbers themselves. The other one covers the special
* case of American numbers where the extension is written with a hash at the
* end, such as '- 503#'. Note that the only capturing groups should be around
* the digits that you want to capture as part of the extension, or else parsing
* will fail! We allow two options for representing the accented o - the
* character itself, and one in the unicode decomposed form with the combining
* acute accent.
* Helper initialiser method to create the regular-expression pattern to match
* extensions.
*
* @const
* @type {string}
* @return {string} RegEx pattern to capture extensions.
* @private
*/
i18n.phonenumbers.PhoneNumberUtil.EXTN_PATTERNS_FOR_PARSING_ =
i18n.phonenumbers.PhoneNumberUtil.RFC3966_EXTN_PREFIX_ +
i18n.phonenumbers.PhoneNumberUtil.CAPTURING_EXTN_DIGITS_ + '|' +
'[ \u00A0\\t,]*' +
'(?:e?xt(?:ensi(?:o\u0301?|\u00F3))?n?|\uFF45?\uFF58\uFF54\uFF4E?|' +
'\u0434\u043E\u0431|' +
'[;,x\uFF58#\uFF03~\uFF5E]|int|anexo|\uFF49\uFF4E\uFF54)' +
'[:\\.\uFF0E]?[ \u00A0\\t,-]*' +
i18n.phonenumbers.PhoneNumberUtil.CAPTURING_EXTN_DIGITS_ + '#?|' +
'[- ]+([' + i18n.phonenumbers.PhoneNumberUtil.VALID_DIGITS_ + ']{1,5})#';
i18n.phonenumbers.PhoneNumberUtil.createExtnPattern_ =
function() {
// We cap the maximum length of an extension based on the ambiguity of the way
// the extension is prefixed. As per ITU, the officially allowed length for
// extensions is actually 40, but we don't support this since we haven't seen real
// examples and this introduces many false interpretations as the extension labels
// are not standardized.
/** @type {string} */
var extLimitAfterExplicitLabel = '20';
/** @type {string} */
var extLimitAfterLikelyLabel = '15';
/** @type {string} */
var extLimitAfterAmbiguousChar = '9';
/** @type {string} */
var extLimitWhenNotSure = '6';

/** @type {string} */
var possibleSeparatorsBetweenNumberAndExtLabel = "[ \u00A0\\t,]*";
// Optional full stop (.) or colon, followed by zero or more spaces/tabs/commas.
/** @type {string} */
var possibleCharsAfterExtLabel = "[:\\.\uFF0E]?[ \u00A0\\t,-]*";
/** @type {string} */
var optionalExtnSuffix = "#?";

// Here the extension is called out in more explicit way, i.e mentioning it obvious
// patterns like "ext.".
/** @type {string} */
var explicitExtLabels =
"(?:e?xt(?:ensi(?:o\u0301?|\u00F3))?n?|\uFF45?\uFF58\uFF54\uFF4E?|\u0434\u043E\u0431|anexo)";
// One-character symbols that can be used to indicate an extension, and less
// commonly used or more ambiguous extension labels.
/** @type {string} */
var ambiguousExtLabels = "(?:[x\uFF58#\uFF03~\uFF5E]|int|\uFF49\uFF4E\uFF54)";
// When extension is not separated clearly.
/** @type {string} */
var ambiguousSeparator = "[- ]+";
// This is the same as possibleSeparatorsBetweenNumberAndExtLabel, but not matching
// comma as extension label may have it.
/** @type {string} */
var possibleSeparatorsNumberExtLabelNoComma = "[ \u00A0\\t]*";
// ",," is commonly used for auto dialling the extension when connected. First
// comma is matched through possibleSeparatorsBetweenNumberAndExtLabel, so we do
// not repeat it here. Semi-colon works in Iphone and Android also to pop up a
// button with the extension number following.
/** @type {string} */
var autoDiallingAndExtLabelsFound = "(?:,{2}|;)";

/** @type {string} */
var rfcExtn = i18n.phonenumbers.PhoneNumberUtil.RFC3966_EXTN_PREFIX_
+ i18n.phonenumbers.PhoneNumberUtil.extnDigits_(extLimitAfterExplicitLabel);
/** @type {string} */
var explicitExtn = possibleSeparatorsBetweenNumberAndExtLabel + explicitExtLabels
+ possibleCharsAfterExtLabel
+ i18n.phonenumbers.PhoneNumberUtil.extnDigits_(extLimitAfterExplicitLabel)
+ optionalExtnSuffix;
/** @type {string} */
var ambiguousExtn = possibleSeparatorsBetweenNumberAndExtLabel + ambiguousExtLabels
+ possibleCharsAfterExtLabel
+ i18n.phonenumbers.PhoneNumberUtil.extnDigits_(extLimitAfterAmbiguousChar)
+ optionalExtnSuffix;
/** @type {string} */
var americanStyleExtnWithSuffix = ambiguousSeparator
+ i18n.phonenumbers.PhoneNumberUtil.extnDigits_(extLimitWhenNotSure) + "#";

/** @type {string} */
var autoDiallingExtn = possibleSeparatorsNumberExtLabelNoComma
+ autoDiallingAndExtLabelsFound + possibleCharsAfterExtLabel
+ i18n.phonenumbers.PhoneNumberUtil.extnDigits_(extLimitAfterLikelyLabel)
+ optionalExtnSuffix;
/** @type {string} */
var onlyCommasExtn = possibleSeparatorsNumberExtLabelNoComma
+ "(?:,)+" + possibleCharsAfterExtLabel
+ i18n.phonenumbers.PhoneNumberUtil.extnDigits_(extLimitAfterAmbiguousChar)
+ optionalExtnSuffix;

// The first regular expression covers RFC 3966 format, where the extension is added
// using ";ext=". The second more generic where extension is mentioned with explicit
// labels like "ext:". In both the above cases we allow more numbers in extension than
// any other extension labels. The third one captures when single character extension
// labels or less commonly used labels are used. In such cases we capture fewer
// extension digits in order to reduce the chance of falsely interpreting two
// numbers beside each other as a number + extension. The fourth one covers the
// special case of American numbers where the extension is written with a hash
// at the end, such as "- 503#". The fifth one is exclusively for extension
// autodialling formats which are used when dialling and in this case we accept longer
// extensions. The last one is more liberal on the number of commas that acts as
// extension labels, so we have a strict cap on the number of digits in such extensions.
return rfcExtn + "|"
+ explicitExtn + "|"
+ ambiguousExtn + "|"
+ americanStyleExtnWithSuffix + "|"
+ autoDiallingExtn + "|"
+ onlyCommasExtn;
};


/**
Expand All @@ -796,7 +869,7 @@ i18n.phonenumbers.PhoneNumberUtil.EXTN_PATTERNS_FOR_PARSING_ =
*/
i18n.phonenumbers.PhoneNumberUtil.EXTN_PATTERN_ =
new RegExp('(?:' +
i18n.phonenumbers.PhoneNumberUtil.EXTN_PATTERNS_FOR_PARSING_ +
i18n.phonenumbers.PhoneNumberUtil.createExtnPattern_() +
')$', 'i');


Expand All @@ -814,7 +887,7 @@ i18n.phonenumbers.PhoneNumberUtil.VALID_PHONE_NUMBER_PATTERN_ =
i18n.phonenumbers.PhoneNumberUtil.MIN_LENGTH_PHONE_NUMBER_PATTERN_ +
'$|' +
'^' + i18n.phonenumbers.PhoneNumberUtil.VALID_PHONE_NUMBER_ +
'(?:' + i18n.phonenumbers.PhoneNumberUtil.EXTN_PATTERNS_FOR_PARSING_ +
'(?:' + i18n.phonenumbers.PhoneNumberUtil.createExtnPattern_() +
')?' + '$', 'i');


Expand Down
145 changes: 145 additions & 0 deletions javascript/i18n/phonenumbers/phonenumberutil_test.js
Original file line number Diff line number Diff line change
Expand Up @@ -3552,6 +3552,151 @@ function testParseExtensions() {
phoneUtil.parse('+1 (645) 123 1234 ext. 910#', RegionCode.US)));
}

function testParseHandlesLongExtensionsWithExplicitLabels() {
// Test lower and upper limits of extension lengths for each type of label.
/** @type {!i18n.phonenumbers.PhoneNumber} */
var nzNumber = new i18n.phonenumbers.PhoneNumber();
nzNumber.setCountryCode(64);
nzNumber.setNationalNumber(33316005);

// Firstly, when in RFC format: PhoneNumberUtil.extLimitAfterExplicitLabel
nzNumber.setExtension('0');
assertTrue(nzNumber.equals(
phoneUtil.parse('tel:+6433316005;ext=0', RegionCode.NZ)));
nzNumber.setExtension('01234567890123456789');
assertTrue(nzNumber.equals(
phoneUtil.parse('tel:+6433316005;ext=01234567890123456789', RegionCode.NZ)));
// Extension too long.
try {
phoneUtil.parse('tel:+6433316005;ext=012345678901234567890', RegionCode.NZ);
fail(
'This should not parse as length of extension is higher than allowed: '
+ 'tel:+6433316005;ext=012345678901234567890');
} catch (e) {
// Expected this exception.
assertEquals(
'Wrong error type stored in exception.',
i18n.phonenumbers.Error.NOT_A_NUMBER, e.message);
}

// Explicit extension label: PhoneNumberUtil.extLimitAfterExplicitLabel
nzNumber.setExtension('1');
assertTrue(nzNumber.equals(
phoneUtil.parse('03 3316005ext:1', RegionCode.NZ)));
nzNumber.setExtension('12345678901234567890');
assertTrue(nzNumber.equals(
phoneUtil.parse('03 3316005 xtn:12345678901234567890', RegionCode.NZ)));
assertTrue(nzNumber.equals(
phoneUtil.parse('03 3316005 extension\t12345678901234567890', RegionCode.NZ)));
assertTrue(nzNumber.equals(
phoneUtil.parse('03 3316005 xtensio:12345678901234567890', RegionCode.NZ)));
assertTrue(nzNumber.equals(
phoneUtil.parse('03 3316005 xtensión, 12345678901234567890#', RegionCode.NZ)));
assertTrue(nzNumber.equals(
phoneUtil.parse('03 3316005extension.12345678901234567890', RegionCode.NZ)));
assertTrue(nzNumber.equals(
phoneUtil.parse('03 3316005 доб:12345678901234567890', RegionCode.NZ)));
// Extension too long.
try {
phoneUtil.parse('03 3316005 extension 123456789012345678901', RegionCode.NZ);
fail(
'This should not parse as length of extension is higher than allowed: '
+ '03 3316005 extension 123456789012345678901');
} catch (e) {
// Expected this exception.
assertEquals(
'Wrong error type stored in exception.',
i18n.phonenumbers.Error.TOO_LONG, e.message);
}
}

function testParseHandlesLongExtensionsWithAutoDiallingLabels() {
// Lastly, cases of auto-dialling and other standard extension labels,
// PhoneNumberUtil.extLimitAfterLikelyLabel
var usNumberUserInput = new i18n.phonenumbers.PhoneNumber();
usNumberUserInput.setCountryCode(1);
usNumberUserInput.setNationalNumber(2679000000);
usNumberUserInput.setExtension('123456789012345');
assertTrue(usNumberUserInput.equals(
phoneUtil.parse('+12679000000,,123456789012345#', RegionCode.US)));
assertTrue(usNumberUserInput.equals(
phoneUtil.parse('+12679000000;123456789012345#', RegionCode.US)));
var ukNumberUserInput = new i18n.phonenumbers.PhoneNumber();
ukNumberUserInput.setCountryCode(44);
ukNumberUserInput.setNationalNumber(2034000000);
ukNumberUserInput.setExtension('123456789');
assertTrue(ukNumberUserInput.equals(
phoneUtil.parse('+442034000000,,123456789#', RegionCode.GB)));
// Extension too long.
try {
phoneUtil.parse('+12679000000,,1234567890123456#', RegionCode.US);
fail(
'This should not parse as length of extension is higher than allowed: '
+ '+12679000000,,1234567890123456#');
} catch (e) {
// Expected this exception.
assertEquals(
'Wrong error type stored in exception.',
i18n.phonenumbers.Error.NOT_A_NUMBER, e.message);
}
}

function testParseHandlesShortExtensionsWithAmbiguousChar() {
var nzNumber = new i18n.phonenumbers.PhoneNumber();
nzNumber.setCountryCode(64);
nzNumber.setNationalNumber(33316005);

// Secondly, for single and non-standard cases:
// PhoneNumberUtil.extLimitAfterAmbiguousChar
nzNumber.setExtension("123456789");
assertTrue(nzNumber.equals(
phoneUtil.parse('03 3316005 x 123456789', RegionCode.NZ)));
assertTrue(nzNumber.equals(
phoneUtil.parse('03 3316005 x. 123456789', RegionCode.NZ)));
assertTrue(nzNumber.equals(
phoneUtil.parse('03 3316005 #123456789#', RegionCode.NZ)));
assertTrue(nzNumber.equals(
phoneUtil.parse('03 3316005 ~ 123456789', RegionCode.NZ)));
// Extension too long.
try {
phoneUtil.parse("03 3316005 ~ 1234567890", RegionCode.NZ);
fail(
"This should not parse as length of extension is higher than allowed: "
+ "03 3316005 ~ 1234567890");
} catch (e) {
// Expected this exception.
assertEquals(
'Wrong error type stored in exception.',
i18n.phonenumbers.Error.TOO_LONG, e.message);
}
}

function testParseHandlesShortExtensionsWhenNotSureOfLabel() {
// Thirdly, when no explicit extension label present, but denoted by tailing #:
// PhoneNumberUtil.extLimitWhenNotSure
var usNumber = new i18n.phonenumbers.PhoneNumber();
usNumber.setCountryCode(1);
usNumber.setNationalNumber(1234567890);
usNumber.setExtension('666666');
assertTrue(usNumber.equals(
phoneUtil.parse('+1123-456-7890 666666#', RegionCode.US)));
usNumber.setExtension('6');
assertTrue(usNumber.equals(
phoneUtil.parse('+11234567890-6#', RegionCode.US)));
// Extension too long.
try {
phoneUtil.parse('+1123-456-7890 7777777#', RegionCode.US);
fail(
'This should not parse as length of extension is higher than allowed: '
+ '+1123-456-7890 7777777#');
} catch (e) {
// Expected this exception.
assertEquals(
'Wrong error type stored in exception.',
i18n.phonenumbers.Error.NOT_A_NUMBER, e.message);
}
}

function testParseAndKeepRaw() {
var CCS = i18n.phonenumbers.PhoneNumber.CountryCodeSource;
/** @type {!i18n.phonenumbers.PhoneNumber} */
Expand Down

0 comments on commit 55b2646

Please sign in to comment.