From a9a9f7e70091a0201b8a5c9a264fe3faf5e2c833 Mon Sep 17 00:00:00 2001 From: Alessandro Segala Date: Mon, 6 Oct 2014 14:43:57 -0400 Subject: [PATCH 1/5] Patch for validator.normalizeEmail: reject invalid emails, add more flexibility, improved googlemail.com support --- test/client-side.js | 0 test/exports.js | 0 test/sanitizers.js | 41 ++++++++++++++++++++++++++++++++++++----- test/validators.js | 2 ++ validator.js | 39 ++++++++++++++++++++++++++++++++++----- 5 files changed, 72 insertions(+), 10 deletions(-) mode change 100644 => 100755 test/client-side.js mode change 100644 => 100755 test/exports.js mode change 100644 => 100755 test/sanitizers.js mode change 100644 => 100755 test/validators.js mode change 100644 => 100755 validator.js diff --git a/test/client-side.js b/test/client-side.js old mode 100644 new mode 100755 diff --git a/test/exports.js b/test/exports.js old mode 100644 new mode 100755 diff --git a/test/sanitizers.js b/test/sanitizers.js old mode 100644 new mode 100755 index 6a14f4499..1d40efdbb --- a/test/sanitizers.js +++ b/test/sanitizers.js @@ -183,17 +183,48 @@ describe('Sanitizers', function () { test({ sanitizer: 'normalizeEmail' , expect: { - 'some.name@gmail.com': 'somename@gmail.com' + 'test@me.com': 'test@me.com' + , 'some.name@gmail.com': 'somename@gmail.com' , 'some.name@googleMail.com': 'somename@googlemail.com' , 'some.name+extension@gmail.com': 'somename@gmail.com' , 'some.Name+extension@GoogleMail.com': 'somename@googlemail.com' , 'some.name.middleName+extension@gmail.com': 'somenamemiddlename@gmail.com' , 'some.name.middleName+extension@GoogleMail.com': 'somenamemiddlename@googlemail.com' - , 'some.name.midd..leNa...me...+extension@gmail.com': 'somenamemiddlename@gmail.com' - , 'some.name.midd..leNa...me...+extension@GoogleMail.com': 'somenamemiddlename@googlemail.com' + , 'some.name.midd.leNa.me.+extension@gmail.com': 'somenamemiddlename@gmail.com' + , 'some.name.midd.leNa.me.+extension@GoogleMail.com': 'somenamemiddlename@googlemail.com' , 'some.name+extension@unknown.com': 'some.name+extension@unknown.com' - , 'an invalid email address': 'an invalid email address' - , '': '' + , 'hans@m端ller.com': 'hans@m端ller.com' + , 'an invalid email address': false + , '': false + // some.name.midd..leNa...me...+extension@GoogleMail.com was removed from test cases because of a bug with validator.isEmail. See issue #258 + } + }); + test({ + sanitizer: 'normalizeEmail' + , args: [{lowercase: false}] + , expect: { + 'test@me.com': 'test@me.com' + , 'hans@m端ller.com': 'hans@m端ller.com' + , 'test@ME.COM': 'test@me.com' // Hostname is always lowercased + , 'TEST@me.com': 'TEST@me.com' + , 'TEST@ME.COM': 'TEST@me.com' + , 'blAH@x.com': 'blAH@x.com' + + // Domains that are known for being case-insensitive are always lowercased + , 'SOME.name@GMAIL.com': 'somename@gmail.com' + , 'SOME.name.middleName+extension@GoogleMail.com': 'somenamemiddlename@googlemail.com' + , 'SOME.name.midd.leNa.me.+extension@gmail.com': 'somenamemiddlename@gmail.com' + } + }); + test({ + sanitizer: 'normalizeEmail' + , args: [{googlemail_to_gmail: true}] + , expect: { + 'SOME.name@GMAIL.com': 'somename@gmail.com' + , 'SOME.name.middleName+extension@GoogleMail.com': 'somenamemiddlename@gmail.com' + , 'SOME.name.midd.leNa.me.+extension@gmail.com': 'somenamemiddlename@gmail.com' + , 'test@googlemail.com': 'test@gmail.com' + , 'foo@bar.com': 'foo@bar.com' } }); }); diff --git a/test/validators.js b/test/validators.js old mode 100644 new mode 100755 index 7f4e700e5..9f11a27aa --- a/test/validators.js +++ b/test/validators.js @@ -45,6 +45,8 @@ describe('Validators', function () { , 'hans.m端ller@test.com' , 'hans@m端ller.com' , 'test|123@m端ller.com' + , 'test+ext@gmail.com' + , 'some.name.midd.leNa.me.+extension@GoogleMail.com' ] , invalid: [ 'invalidemail@' diff --git a/validator.js b/validator.js old mode 100644 new mode 100755 index 7a5918fa8..8440e4829 --- a/validator.js +++ b/validator.js @@ -433,14 +433,43 @@ validator.blacklist = function (str, chars) { return str.replace(new RegExp('[' + chars + ']+', 'g'), ''); }; - - validator.normalizeEmail = function (email) { - var parts = email.toLowerCase().split('@', 2); + + var default_normalize_email_options = { + // Lowercase the local part for all domains (domains that are known for having case-insensitive local parts are always lowercased, such as gmail.com) + lowercase: true + // Convert googlemail.com to gmail.com + , googlemail_to_gmail: false + }; + + validator.normalizeEmail = function (email, options) { + options = merge(options, default_normalize_email_options); + + // Fail if the email address is invalid + if(!validator.isEmail(email)) { + return false; + } + + var parts = email.split('@', 2); + + // Always lowercase the domain, but the local part only if requested + parts[1] = parts[1].toLowerCase(); + if(options.lowercase) { + parts[0] = parts[0].toLowerCase(); + } + + // gmail.com and googlemail.com if (parts[1] === 'gmail.com' || parts[1] === 'googlemail.com') { + if(!options.lowercase) { // case-insensitive + parts[0] = parts[0].toLowerCase(); + } parts[0] = parts[0].replace(/\./g, '').split('+')[0]; - email = parts.join('@'); + + if(options.googlemail_to_gmail) { + parts[1] = 'gmail.com'; + } } - return email; + + return parts.join('@'); }; function merge(obj, defaults) { From ec0cc3bcdfcd4319fa14ec69cd0078b73c199217 Mon Sep 17 00:00:00 2001 From: Alessandro Segala Date: Mon, 6 Oct 2014 14:49:58 -0400 Subject: [PATCH 2/5] Restored file permissions /(sorry for that) --- test/client-side.js | 0 test/exports.js | 0 test/sanitizers.js | 0 test/validators.js | 0 validator.js | 0 5 files changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 test/client-side.js mode change 100755 => 100644 test/exports.js mode change 100755 => 100644 test/sanitizers.js mode change 100755 => 100644 test/validators.js mode change 100755 => 100644 validator.js diff --git a/test/client-side.js b/test/client-side.js old mode 100755 new mode 100644 diff --git a/test/exports.js b/test/exports.js old mode 100755 new mode 100644 diff --git a/test/sanitizers.js b/test/sanitizers.js old mode 100755 new mode 100644 diff --git a/test/validators.js b/test/validators.js old mode 100755 new mode 100644 diff --git a/validator.js b/validator.js old mode 100755 new mode 100644 From 6ba1c3ed7762312db08b5bd4debc7e4d89ec90dc Mon Sep 17 00:00:00 2001 From: Alessandro Segala Date: Mon, 6 Oct 2014 15:13:00 -0400 Subject: [PATCH 3/5] Updated the README file --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) mode change 100644 => 100755 README.md diff --git a/README.md b/README.md old mode 100644 new mode 100755 index 198adbb1d..66ff97cec --- a/README.md +++ b/README.md @@ -83,7 +83,7 @@ $ bower install validator-js - **stripLow(input [, keep_new_lines])** - remove characters with a numerical value < 32 and 127, mostly control characters. If `keep_new_lines` is `true`, newline characters are preserved (`\n` and `\r`, hex `0xA` and `0xD`). Unicode-safe in JavaScript. - **whitelist(input, chars)** - remove characters that do not appear in the whitelist. - **blacklist(input, chars)** - remove characters that appear in the blacklist. -- **normalizeEmail(email)** - canonicalize a gmail address. +- **normalizeEmail(email [, options])** - canonicalize an email address. `options` is an object which defaults to `{ lowercase: true, googlemail_to_gmail: false }`. With `lowercase` set to `true`, the local part of the email address is lowercased for all domains; the hostname is always lowercased and the local part of the email address is always lowercased for hosts that are known to be case-insensitive (currently only GMail). With `googlemail_to_gmail` all `@googlemail.com` addresses are normalized to `@gmail.com`. Normalization follows special rules for known providers: currently, GMail addresses have dots removed in the local part and are stripped of tags (e.g. `some.one+tag@gmail.com` becomes `someone@gmail.com`). ### Strings only From 5758f8ab621587b4907357801db3dbc5a595a9e6 Mon Sep 17 00:00:00 2001 From: Alessandro Segala Date: Fri, 10 Oct 2014 10:22:15 -0400 Subject: [PATCH 4/5] normalizeEmail converts googlemail.com to gmail.com by default, as requested --- README.md | 2 +- test/sanitizers.js | 16 ++++++++-------- validator.js | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 66ff97cec..b71fe55af 100755 --- a/README.md +++ b/README.md @@ -83,7 +83,7 @@ $ bower install validator-js - **stripLow(input [, keep_new_lines])** - remove characters with a numerical value < 32 and 127, mostly control characters. If `keep_new_lines` is `true`, newline characters are preserved (`\n` and `\r`, hex `0xA` and `0xD`). Unicode-safe in JavaScript. - **whitelist(input, chars)** - remove characters that do not appear in the whitelist. - **blacklist(input, chars)** - remove characters that appear in the blacklist. -- **normalizeEmail(email [, options])** - canonicalize an email address. `options` is an object which defaults to `{ lowercase: true, googlemail_to_gmail: false }`. With `lowercase` set to `true`, the local part of the email address is lowercased for all domains; the hostname is always lowercased and the local part of the email address is always lowercased for hosts that are known to be case-insensitive (currently only GMail). With `googlemail_to_gmail` all `@googlemail.com` addresses are normalized to `@gmail.com`. Normalization follows special rules for known providers: currently, GMail addresses have dots removed in the local part and are stripped of tags (e.g. `some.one+tag@gmail.com` becomes `someone@gmail.com`). +- **normalizeEmail(email [, options])** - canonicalize an email address. `options` is an object which defaults to `{ lowercase: true, googlemail_to_gmail: true }`. With `lowercase` set to `true`, the local part of the email address is lowercased for all domains; the hostname is always lowercased and the local part of the email address is always lowercased for hosts that are known to be case-insensitive (currently only GMail). With `googlemail_to_gmail` all `@googlemail.com` addresses are normalized to `@gmail.com`. Normalization follows special rules for known providers: currently, GMail addresses have dots removed in the local part and are stripped of tags (e.g. `some.one+tag@gmail.com` becomes `someone@gmail.com`). ### Strings only diff --git a/test/sanitizers.js b/test/sanitizers.js index 1d40efdbb..dfbbc4090 100644 --- a/test/sanitizers.js +++ b/test/sanitizers.js @@ -185,13 +185,13 @@ describe('Sanitizers', function () { , expect: { 'test@me.com': 'test@me.com' , 'some.name@gmail.com': 'somename@gmail.com' - , 'some.name@googleMail.com': 'somename@googlemail.com' + , 'some.name@googleMail.com': 'somename@gmail.com' , 'some.name+extension@gmail.com': 'somename@gmail.com' - , 'some.Name+extension@GoogleMail.com': 'somename@googlemail.com' + , 'some.Name+extension@GoogleMail.com': 'somename@gmail.com' , 'some.name.middleName+extension@gmail.com': 'somenamemiddlename@gmail.com' - , 'some.name.middleName+extension@GoogleMail.com': 'somenamemiddlename@googlemail.com' + , 'some.name.middleName+extension@GoogleMail.com': 'somenamemiddlename@gmail.com' , 'some.name.midd.leNa.me.+extension@gmail.com': 'somenamemiddlename@gmail.com' - , 'some.name.midd.leNa.me.+extension@GoogleMail.com': 'somenamemiddlename@googlemail.com' + , 'some.name.midd.leNa.me.+extension@GoogleMail.com': 'somenamemiddlename@gmail.com' , 'some.name+extension@unknown.com': 'some.name+extension@unknown.com' , 'hans@m端ller.com': 'hans@m端ller.com' , 'an invalid email address': false @@ -212,18 +212,18 @@ describe('Sanitizers', function () { // Domains that are known for being case-insensitive are always lowercased , 'SOME.name@GMAIL.com': 'somename@gmail.com' - , 'SOME.name.middleName+extension@GoogleMail.com': 'somenamemiddlename@googlemail.com' + , 'SOME.name.middleName+extension@GoogleMail.com': 'somenamemiddlename@gmail.com' , 'SOME.name.midd.leNa.me.+extension@gmail.com': 'somenamemiddlename@gmail.com' } }); test({ sanitizer: 'normalizeEmail' - , args: [{googlemail_to_gmail: true}] + , args: [{googlemail_to_gmail: false}] , expect: { 'SOME.name@GMAIL.com': 'somename@gmail.com' - , 'SOME.name.middleName+extension@GoogleMail.com': 'somenamemiddlename@gmail.com' + , 'SOME.name.middleName+extension@GoogleMail.com': 'somenamemiddlename@googlemail.com' , 'SOME.name.midd.leNa.me.+extension@gmail.com': 'somenamemiddlename@gmail.com' - , 'test@googlemail.com': 'test@gmail.com' + , 'test@googlemail.com': 'test@googlemail.com' , 'foo@bar.com': 'foo@bar.com' } }); diff --git a/validator.js b/validator.js index 8440e4829..ae193840b 100644 --- a/validator.js +++ b/validator.js @@ -438,7 +438,7 @@ // Lowercase the local part for all domains (domains that are known for having case-insensitive local parts are always lowercased, such as gmail.com) lowercase: true // Convert googlemail.com to gmail.com - , googlemail_to_gmail: false + , googlemail_to_gmail: true }; validator.normalizeEmail = function (email, options) { From 384a2aebe67e9f7adb4a93118a216b6c633c8db1 Mon Sep 17 00:00:00 2001 From: Alessandro Segala Date: Tue, 14 Oct 2014 09:41:58 -0400 Subject: [PATCH 5/5] Updated patch as requested --- README.md | 2 +- test/sanitizers.js | 11 ----------- validator.js | 17 ++++++----------- 3 files changed, 7 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index de3833f13..8bb8b4ea4 100755 --- a/README.md +++ b/README.md @@ -83,7 +83,7 @@ $ bower install validator-js - **stripLow(input [, keep_new_lines])** - remove characters with a numerical value < 32 and 127, mostly control characters. If `keep_new_lines` is `true`, newline characters are preserved (`\n` and `\r`, hex `0xA` and `0xD`). Unicode-safe in JavaScript. - **whitelist(input, chars)** - remove characters that do not appear in the whitelist. The characters are used in a RegExp and so you will need to escape some chars, e.g. whitelist(input, '\\[\\]'). - **blacklist(input, chars)** - remove characters that appear in the blacklist. The characters are used in a RegExp and so you will need to escape some chars, e.g. blacklist(input, '\\[\\]'). -- **normalizeEmail(email [, options])** - canonicalize an email address. `options` is an object which defaults to `{ lowercase: true, googlemail_to_gmail: true }`. With `lowercase` set to `true`, the local part of the email address is lowercased for all domains; the hostname is always lowercased and the local part of the email address is always lowercased for hosts that are known to be case-insensitive (currently only GMail). With `googlemail_to_gmail` all `@googlemail.com` addresses are normalized to `@gmail.com`. Normalization follows special rules for known providers: currently, GMail addresses have dots removed in the local part and are stripped of tags (e.g. `some.one+tag@gmail.com` becomes `someone@gmail.com`). +- **normalizeEmail(email [, options])** - canonicalize an email address. `options` is an object which defaults to `{ lowercase: true }`. With `lowercase` set to `true`, the local part of the email address is lowercased for all domains; the hostname is always lowercased and the local part of the email address is always lowercased for hosts that are known to be case-insensitive (currently only GMail). Normalization follows special rules for known providers: currently, GMail addresses have dots removed in the local part and are stripped of tags (e.g. `some.one+tag@gmail.com` becomes `someone@gmail.com`) and all `@googlemail.com` addresses are normalized to `@gmail.com`. This library validates and sanitizes **strings** only. All input will be coerced to a string using the following rules diff --git a/test/sanitizers.js b/test/sanitizers.js index dfbbc4090..63201f4cd 100644 --- a/test/sanitizers.js +++ b/test/sanitizers.js @@ -216,17 +216,6 @@ describe('Sanitizers', function () { , 'SOME.name.midd.leNa.me.+extension@gmail.com': 'somenamemiddlename@gmail.com' } }); - test({ - sanitizer: 'normalizeEmail' - , args: [{googlemail_to_gmail: false}] - , expect: { - 'SOME.name@GMAIL.com': 'somename@gmail.com' - , 'SOME.name.middleName+extension@GoogleMail.com': 'somenamemiddlename@googlemail.com' - , 'SOME.name.midd.leNa.me.+extension@gmail.com': 'somenamemiddlename@gmail.com' - , 'test@googlemail.com': 'test@googlemail.com' - , 'foo@bar.com': 'foo@bar.com' - } - }); }); }); diff --git a/validator.js b/validator.js index ae193840b..3d28c4dc9 100644 --- a/validator.js +++ b/validator.js @@ -437,15 +437,13 @@ var default_normalize_email_options = { // Lowercase the local part for all domains (domains that are known for having case-insensitive local parts are always lowercased, such as gmail.com) lowercase: true - // Convert googlemail.com to gmail.com - , googlemail_to_gmail: true }; validator.normalizeEmail = function (email, options) { options = merge(options, default_normalize_email_options); // Fail if the email address is invalid - if(!validator.isEmail(email)) { + if (!validator.isEmail(email)) { return false; } @@ -453,20 +451,17 @@ // Always lowercase the domain, but the local part only if requested parts[1] = parts[1].toLowerCase(); - if(options.lowercase) { + if (options.lowercase) { parts[0] = parts[0].toLowerCase(); } // gmail.com and googlemail.com if (parts[1] === 'gmail.com' || parts[1] === 'googlemail.com') { - if(!options.lowercase) { // case-insensitive - parts[0] = parts[0].toLowerCase(); - } - parts[0] = parts[0].replace(/\./g, '').split('+')[0]; - - if(options.googlemail_to_gmail) { - parts[1] = 'gmail.com'; + if (!options.lowercase) { // case-insensitive + parts[0] = parts[0].toLowerCase(); } + parts[0] = parts[0].replace(/\./g, '').split('+')[0]; + parts[1] = 'gmail.com'; // Always replace googlemail.com to gmail.com } return parts.join('@');