Skip to content

Commit

Permalink
NFC-normalize display names per UTS #39
Browse files Browse the repository at this point in the history
  • Loading branch information
JoshData committed Nov 25, 2024
1 parent bc08faa commit 8043de4
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 5 deletions.
4 changes: 2 additions & 2 deletions email_validator/syntax.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,8 +302,8 @@ def validate_email_local_part(local: str, allow_smtputf8: bool = True, allow_emp
valid = "dot-atom"
requires_smtputf8 = True

# There are no syntactic restrictions on quoted local parts, so if
# it was originally quoted, it is probably valid. More characters
# There are no dot-atom syntax restrictions on quoted local parts, so
# if it was originally quoted, it is probably valid. More characters
# are allowed, like @-signs, spaces, and quotes, and there are no
# restrictions on the placement of dots, as in dot-atom local parts.
elif quoted_local_part:
Expand Down
17 changes: 17 additions & 0 deletions email_validator/validate_email.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,14 @@ def validate_email(
display_name, local_part, domain_part, is_quoted_local_part \
= split_email(email)

if display_name:
# UTS #39 3.3 Email Security Profiles for Identifiers requires
# display names (incorrectly called "quoted-string-part" there)
# to be NFC normalized. Since these are not a part of what we
# are really validating, we won't check that the input was NFC
# normalized, but we'll normalize in output.
display_name = unicodedata.normalize("NFC", display_name)

# Collect return values in this instance.
ret = ValidatedEmail()
ret.original = ((local_part if not is_quoted_local_part
Expand All @@ -95,6 +103,15 @@ def validate_email(
# RFC 6532 section 3.1 says that Unicode NFC normalization should be applied,
# so we'll return the NFC-normalized local part. Since the caller may use that
# string in place of the original string, ensure it is also valid.
#
# UTS #39 3.3 Email Security Profiles for Identifiers requires local parts
# to be NFKC normalized, which loses some information in characters that can
# be decomposed. We might want to consider applying NFKC normalization, but
# we can't make the change easily because it would break database lookups
# for any caller that put a normalized address from a previous version of
# this library. (UTS #39 seems to require that the *input* be NKFC normalized
# and has other requirements that are hard to check without additional Unicode
# data, and I don't know whether the rules really apply in the wild.)
normalized_local_part = unicodedata.normalize("NFC", ret.local_part)
if normalized_local_part != ret.local_part:
try:
Expand Down
7 changes: 4 additions & 3 deletions tests/test_syntax.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,13 +295,14 @@ def test_email_valid(email_input: str, output: ValidatedEmail) -> None:
),
),
(
's\u0323\u0307@nfc.tld',
'\"s\u0323\u0307\" <s\u0323\u0307@nfc.tld>',
MakeValidatedEmail(
local_part='\u1E69',
smtputf8=True,
ascii_domain='nfc.tld',
domain='nfc.tld',
normalized='\u1E69@nfc.tld',
display_name='\u1E69'
),
),
(
Expand All @@ -318,11 +319,11 @@ def test_email_valid(email_input: str, output: ValidatedEmail) -> None:
)
def test_email_valid_intl_local_part(email_input: str, output: ValidatedEmail) -> None:
# Check that it passes when allow_smtputf8 is True.
assert validate_email(email_input, check_deliverability=False) == output
assert validate_email(email_input, check_deliverability=False, allow_display_name=True) == output

# Check that it fails when allow_smtputf8 is False.
with pytest.raises(EmailSyntaxError) as exc_info:
validate_email(email_input, allow_smtputf8=False, check_deliverability=False)
validate_email(email_input, allow_smtputf8=False, check_deliverability=False, allow_display_name=True)
assert "Internationalized characters before the @-sign are not supported: " in str(exc_info.value)


Expand Down

0 comments on commit 8043de4

Please sign in to comment.