diff --git a/README.md b/README.md old mode 100644 new mode 100755 index 9cf15ea8c..8bb8b4ea4 --- a/README.md +++ b/README.md @@ -83,9 +83,7 @@ $ bower install validator-js - **stripLow(input [, keep_new_lines])** - remove characters with a numerical value < 32 and 127, mostly control characters. If `keep_new_lines` is `true`, newline characters are preserved (`\n` and `\r`, hex `0xA` and `0xD`). Unicode-safe in JavaScript. - **whitelist(input, chars)** - remove characters that do not appear in the whitelist. The characters are used in a RegExp and so you will need to escape some chars, e.g. whitelist(input, '\\[\\]'). - **blacklist(input, chars)** - remove characters that appear in the blacklist. The characters are used in a RegExp and so you will need to escape some chars, e.g. blacklist(input, '\\[\\]'). -- **normalizeEmail(email)** - canonicalize a gmail address. - -### Strings only +- **normalizeEmail(email [, options])** - canonicalize an email address. `options` is an object which defaults to `{ lowercase: true }`. With `lowercase` set to `true`, the local part of the email address is lowercased for all domains; the hostname is always lowercased and the local part of the email address is always lowercased for hosts that are known to be case-insensitive (currently only GMail). Normalization follows special rules for known providers: currently, GMail addresses have dots removed in the local part and are stripped of tags (e.g. `some.one+tag@gmail.com` becomes `someone@gmail.com`) and all `@googlemail.com` addresses are normalized to `@gmail.com`. This library validates and sanitizes **strings** only. All input will be coerced to a string using the following rules diff --git a/test/sanitizers.js b/test/sanitizers.js index 6a14f4499..63201f4cd 100644 --- a/test/sanitizers.js +++ b/test/sanitizers.js @@ -183,17 +183,37 @@ describe('Sanitizers', function () { test({ sanitizer: 'normalizeEmail' , expect: { - 'some.name@gmail.com': 'somename@gmail.com' - , 'some.name@googleMail.com': 'somename@googlemail.com' + 'test@me.com': 'test@me.com' + , 'some.name@gmail.com': 'somename@gmail.com' + , 'some.name@googleMail.com': 'somename@gmail.com' , 'some.name+extension@gmail.com': 'somename@gmail.com' - , 'some.Name+extension@GoogleMail.com': 'somename@googlemail.com' + , 'some.Name+extension@GoogleMail.com': 'somename@gmail.com' , 'some.name.middleName+extension@gmail.com': 'somenamemiddlename@gmail.com' - , 'some.name.middleName+extension@GoogleMail.com': 'somenamemiddlename@googlemail.com' - , 'some.name.midd..leNa...me...+extension@gmail.com': 'somenamemiddlename@gmail.com' - , 'some.name.midd..leNa...me...+extension@GoogleMail.com': 'somenamemiddlename@googlemail.com' + , 'some.name.middleName+extension@GoogleMail.com': 'somenamemiddlename@gmail.com' + , 'some.name.midd.leNa.me.+extension@gmail.com': 'somenamemiddlename@gmail.com' + , 'some.name.midd.leNa.me.+extension@GoogleMail.com': 'somenamemiddlename@gmail.com' , 'some.name+extension@unknown.com': 'some.name+extension@unknown.com' - , 'an invalid email address': 'an invalid email address' - , '': '' + , 'hans@m端ller.com': 'hans@m端ller.com' + , 'an invalid email address': false + , '': false + // some.name.midd..leNa...me...+extension@GoogleMail.com was removed from test cases because of a bug with validator.isEmail. See issue #258 + } + }); + test({ + sanitizer: 'normalizeEmail' + , args: [{lowercase: false}] + , expect: { + 'test@me.com': 'test@me.com' + , 'hans@m端ller.com': 'hans@m端ller.com' + , 'test@ME.COM': 'test@me.com' // Hostname is always lowercased + , 'TEST@me.com': 'TEST@me.com' + , 'TEST@ME.COM': 'TEST@me.com' + , 'blAH@x.com': 'blAH@x.com' + + // Domains that are known for being case-insensitive are always lowercased + , 'SOME.name@GMAIL.com': 'somename@gmail.com' + , 'SOME.name.middleName+extension@GoogleMail.com': 'somenamemiddlename@gmail.com' + , 'SOME.name.midd.leNa.me.+extension@gmail.com': 'somenamemiddlename@gmail.com' } }); }); diff --git a/test/validators.js b/test/validators.js index 7f4e700e5..9f11a27aa 100644 --- a/test/validators.js +++ b/test/validators.js @@ -45,6 +45,8 @@ describe('Validators', function () { , 'hans.m端ller@test.com' , 'hans@m端ller.com' , 'test|123@m端ller.com' + , 'test+ext@gmail.com' + , 'some.name.midd.leNa.me.+extension@GoogleMail.com' ] , invalid: [ 'invalidemail@' diff --git a/validator.js b/validator.js index 7a5918fa8..3d28c4dc9 100644 --- a/validator.js +++ b/validator.js @@ -433,14 +433,38 @@ validator.blacklist = function (str, chars) { return str.replace(new RegExp('[' + chars + ']+', 'g'), ''); }; + + var default_normalize_email_options = { + // Lowercase the local part for all domains (domains that are known for having case-insensitive local parts are always lowercased, such as gmail.com) + lowercase: true + }; - validator.normalizeEmail = function (email) { - var parts = email.toLowerCase().split('@', 2); + validator.normalizeEmail = function (email, options) { + options = merge(options, default_normalize_email_options); + + // Fail if the email address is invalid + if (!validator.isEmail(email)) { + return false; + } + + var parts = email.split('@', 2); + + // Always lowercase the domain, but the local part only if requested + parts[1] = parts[1].toLowerCase(); + if (options.lowercase) { + parts[0] = parts[0].toLowerCase(); + } + + // gmail.com and googlemail.com if (parts[1] === 'gmail.com' || parts[1] === 'googlemail.com') { + if (!options.lowercase) { // case-insensitive + parts[0] = parts[0].toLowerCase(); + } parts[0] = parts[0].replace(/\./g, '').split('+')[0]; - email = parts.join('@'); + parts[1] = 'gmail.com'; // Always replace googlemail.com to gmail.com } - return email; + + return parts.join('@'); }; function merge(obj, defaults) {