From b34fae7880ebfbbff16dc274000b8781e81c3ad0 Mon Sep 17 00:00:00 2001 From: Dan Koller <57103678+dan-koller@users.noreply.github.com> Date: Tue, 13 Aug 2024 10:08:25 +0200 Subject: [PATCH] chore: Fixed a bug that treated every string after a header field as valid email address --- mapy/utils.py | 26 ++++++++++++++++---------- tests/test_utils.py | 4 ++++ 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/mapy/utils.py b/mapy/utils.py index e3d4805..733fa6b 100644 --- a/mapy/utils.py +++ b/mapy/utils.py @@ -6,6 +6,7 @@ from email.parser import HeaderParser from email import message_from_string from email.message import Message +from email.utils import parseaddr import dateutil.parser import pygal @@ -71,23 +72,28 @@ def parse_date(line: str) -> datetime: def get_header_value(h: str, data: str, rex: str = r'\s*(.*?)(?:\n\S+:|$)') -> str | None: """ This function takes a header name and the email header data and - returns the value of the header. - - Note: Changed regex from r'\s*(.*?)\n\S+:\s' to r'\s*(.*?)(?:\n\S+:|$)', - to handle headers with no trailing newline. If there are errors, revert. + returns the value of the header. For 'from', 'to', or 'cc' headers, + it validates if the value is a valid email address. :param h: The header name :param data: The email header data :param rex: The regular expression pattern for matching the header value - :return: The value of the header or None if not found + :return: The value of the header or None if not found or invalid email """ - # Use regular expressions to find header values - r = re.findall('%s:%s' % (h, rex), data, re.X | re.DOTALL | re.I) - if r: - return r[0].strip() - else: + matches = re.findall(f'{h}:{rex}', data, re.X | re.DOTALL | re.I) + + if not matches: return None + + header_value = matches[0].strip() + + if h.lower() not in {'from', 'to', 'cc'}: + return header_value + + _, email = parseaddr(header_value) + + return header_value if re.match(r'^[^@]+@[^@]+\.[^@]+$', email) else None def parse_received_headers(mail_data: str) -> list: diff --git a/tests/test_utils.py b/tests/test_utils.py index 3a4a43b..5f113df 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -97,6 +97,10 @@ def test_get_header_value(): # Test non-existing header assert get_header_value('Cc', header_data) is None + # Test a header that contains an invalid cc: address + header_data = "Cc: <@example.com:" + assert get_header_value('Cc', header_data) is None + def test_parse_received_headers(): received_headers = parse_received_headers(mail_data)