From 75e48cd5d47c2854a3b7fcf17bd48f78c06b7e92 Mon Sep 17 00:00:00 2001 From: Kevin Carrogan Date: Fri, 20 Dec 2024 16:43:44 +0000 Subject: [PATCH 01/38] Remove unused mail service functions --- mail/libraries/mailbox_service.py | 38 --------- mail/libraries/routing_controller.py | 3 +- mail/tests/test_mail_service.py | 118 --------------------------- 3 files changed, 1 insertion(+), 158 deletions(-) diff --git a/mail/libraries/mailbox_service.py b/mail/libraries/mailbox_service.py index 06c4584c..1e5d4e1c 100644 --- a/mail/libraries/mailbox_service.py +++ b/mail/libraries/mailbox_service.py @@ -155,44 +155,6 @@ def mark_status(status): yield mail_message, mark_status -def read_last_message(pop3_connection: POP3_SSL) -> EmailMessageDto: - _, mails, _ = pop3_connection.list() - message_id, message_num = get_message_id(pop3_connection, mails[-1]) - - try: - message = pop3_connection.retr(message_num) - except error_proto as err: - raise Exception( - f"Unable to RETR message num {message_num} with Message-ID {message_id}", - ) from err - - return to_mail_message_dto(message) - - -def read_last_three_emails(pop3connection: POP3_SSL) -> list: - _, mails, _ = pop3connection.list() - - reversed_mails = list(reversed(mails)) - last_three_mails = reversed_mails[:3] - - message_ids = [get_message_id(pop3connection, line.decode(settings.DEFAULT_ENCODING)) for line in last_three_mails] - - emails = [] - for message_id, message_num in message_ids: - try: - emails.append(pop3connection.retr(message_num)) - except error_proto as err: - raise Exception( - f"Unable to RETR message num {message_num} with Message-ID {message_id}", - ) from err - - email_message_dtos = [] - for email in emails: - email_message_dtos.append(to_mail_message_dto(email)) - - return email_message_dtos - - def find_mail_of(extract_types: List[str], reception_status: str) -> Mail or None: try: mail = Mail.objects.get(status=reception_status, extract_type__in=extract_types) diff --git a/mail/libraries/routing_controller.py b/mail/libraries/routing_controller.py index 5d4b1adf..555a788c 100644 --- a/mail/libraries/routing_controller.py +++ b/mail/libraries/routing_controller.py @@ -177,7 +177,7 @@ def send(email_message_dto: EmailMessageDto): def _collect_and_send(mail: Mail): - from mail.celery_tasks import send_email_task, finalise_sending_spire_licence_details + from mail.celery_tasks import finalise_sending_spire_licence_details, send_email_task logger.info("Sending Mail [%s] of extract type %s", mail.id, mail.extract_type) @@ -214,7 +214,6 @@ def get_email_message_dtos(server: MailServer, number: Optional[int] = 3) -> Lis emails = list(islice(emails_iter, number)) else: emails = list(emails_iter) - # emails = read_last_three_emails(pop3_connection) server.quit_pop3_connection() return emails diff --git a/mail/tests/test_mail_service.py b/mail/tests/test_mail_service.py index 75b85f5f..29e1cb56 100644 --- a/mail/tests/test_mail_service.py +++ b/mail/tests/test_mail_service.py @@ -1,128 +1,10 @@ -from collections import OrderedDict from poplib import POP3_SSL from unittest.mock import MagicMock, Mock, patch from django.test import SimpleTestCase -from parameterized import parameterized from mail.auth import Authenticator -from mail.libraries.mailbox_service import read_last_message, read_last_three_emails from mail.servers import MailServer -from mail.tests.libraries.client import LiteHMRCTestClient - - -class MailServiceTests(LiteHMRCTestClient): - @parameterized.expand( - [ - ( - [b"1 1234"], - {b"1": [b"OK", [b"Subject: mock", b"Date: Mon, 17 May 2021 14:20:18 +0100", b"hello"], "\r\n.\r\n"]}, - ), - ( - [b"0 1234"], - {b"0": [b"OK", [b"Subject: mock", b"Date: Mon, 17 May 2021 14:20:18 +0100", b"hello"], "\r\n.\r\n"]}, - ), - ( - [b"0 1234", b"1 4321"], - { - b"0": [b"OK", [b"Subject: mock", b"Date: Mon, 17 May 2021 14:20:18 +0100", b"hello"], "\r\n.\r\n"], - b"1": [b"OK", [b"Subject: mock", b"Date: Mon, 17 May 2021 14:20:18 +0100", b"hello"], "\r\n.\r\n"], - }, - ), - ] - ) - def test_read_last_message(self, email_list, retr_data): - pop3conn = MagicMock(spec=POP3_SSL) - pop3conn.list.return_value = (None, email_list, None) - pop3conn.retr = MagicMock(side_effect=retr_data.__getitem__) - message = read_last_message(pop3conn) - self.assertEqual(message.subject, "mock") - message_id = email_list[-1].split()[0] - pop3conn.retr.assert_called_once_with(message_id) - - @parameterized.expand( - [ - ( - [b"0 1234", b"1 4321"], - OrderedDict( - { - "0": [ - b"OK", - [b"Subject: mock0", b"Date: Mon, 17 May 2021 14:20:18 +0100", b"hello"], - "\r\n.\r\n", - ], - "1": [ - b"OK", - [b"Subject: mock1", b"Date: Mon, 17 May 2021 14:20:18 +0100", b"hello"], - "\r\n.\r\n", - ], - } - ), - ), - ( - [b"0 1234", b"1 4321", b"4 4444"], - OrderedDict( - { - "0": [ - b"OK", - [b"Subject: mock0", b"Date: Mon, 17 May 2021 14:20:18 +0100", b"hello"], - "\r\n.\r\n", - ], - "1": [ - b"OK", - [b"Subject: mock1", b"Date: Mon, 17 May 2021 14:20:18 +0100", b"hello"], - "\r\n.\r\n", - ], - "4": [ - b"OK", - [b"Subject: mock4", b"Date: Mon, 17 May 2021 14:20:18 +0100", b"hello"], - "\r\n.\r\n", - ], - } - ), - ), - ( - [b"2 1234", b"1 4321", b"4 4444", b"5 5555"], - OrderedDict( - { - "2": [ - b"OK", - [b"Subject: mock2", b"Date: Mon, 17 May 2021 14:20:18 +0100", b"hello"], - "\r\n.\r\n", - ], - "1": [ - b"OK", - [b"Subject: mock1", b"Date: Mon, 17 May 2021 14:20:18 +0100", b"hello"], - "\r\n.\r\n", - ], - "4": [ - b"OK", - [b"Subject: mock4", b"Date: Mon, 17 May 2021 14:20:18 +0100", b"hello"], - "\r\n.\r\n", - ], - "5": [ - b"OK", - [b"Subject: mock5", b"Date: Mon, 17 May 2021 14:20:18 +0100", b"hello"], - "\r\n.\r\n", - ], - } - ), - ), - ] - ) - def test_read_last_three_emails(self, email_list, retr_data): - pop3conn = MagicMock(spec=POP3_SSL) - pop3conn.list.return_value = (None, email_list, None) - pop3conn.retr = MagicMock(side_effect=retr_data.__getitem__) - message_list = read_last_three_emails(pop3conn) - - # check it only gets up to 3 messages - self.assertEqual(len(message_list), min(len(email_list), 3)) - - # check they are the last 3 messages (reverse input order and take first 3) - message_list_and_expected_source = zip(message_list, reversed(retr_data.values())) - for message, retr_item in message_list_and_expected_source: - self.assertEqual(f"Subject: {message.subject}".encode("utf-8"), retr_item[1][0]) class MailServerTests(SimpleTestCase): From 99e116d92ee94a83c023acf5540a2549accb5639 Mon Sep 17 00:00:00 2001 From: Kevin Carrogan Date: Fri, 20 Dec 2024 16:46:07 +0000 Subject: [PATCH 02/38] Rename test file to match file under test --- mail/tests/test_mail_service.py | 89 --------------------------------- mail/tests/test_servers.py | 88 +++++++++++++++++++++++++++++++- 2 files changed, 86 insertions(+), 91 deletions(-) delete mode 100644 mail/tests/test_mail_service.py diff --git a/mail/tests/test_mail_service.py b/mail/tests/test_mail_service.py deleted file mode 100644 index 29e1cb56..00000000 --- a/mail/tests/test_mail_service.py +++ /dev/null @@ -1,89 +0,0 @@ -from poplib import POP3_SSL -from unittest.mock import MagicMock, Mock, patch - -from django.test import SimpleTestCase - -from mail.auth import Authenticator -from mail.servers import MailServer - - -class MailServerTests(SimpleTestCase): - def test_mail_server_equal(self): - auth = Mock(spec=Authenticator) - - m1 = MailServer(auth, hostname="host", pop3_port=1) # nosec - m2 = MailServer(auth, hostname="host", pop3_port=1) # nosec - - self.assertEqual(m1, m2) - - def test_mail_server_not_equal(self): - auth = Mock(spec=Authenticator) - - m1 = MailServer(auth, hostname="host", pop3_port=1) # nosec - m2 = MailServer(auth, hostname="host", pop3_port=2) # nosec - - self.assertNotEqual(m1, m2) - - auth = Mock(spec=Authenticator) - - m1 = MailServer(auth, hostname="host", pop3_port=1) # nosec - m2 = Mock() # nosec - - self.assertNotEqual(m1, m2) - - def test_mail_server_connect_to_pop3(self): - hostname = "host" - pop3_port = 1 - - auth = Mock(spec=Authenticator) - pop3conn = MagicMock(spec=POP3_SSL) - - with patch("mail.servers.poplib") as mock_poplib: - mock_poplib.POP3_SSL = pop3conn - - mail_server = MailServer( - auth, - hostname=hostname, - pop3_port=pop3_port, - ) - mail_server.connect_to_pop3() - - pop3conn.assert_called_with( - hostname, - pop3_port, - timeout=60, - ) - - mock_connection = pop3conn() - auth.authenticate.assert_called_with(mock_connection) - - def test_mail_server_quit_pop3_connection(self): - hostname = "host" - pop3_port = 1 - - auth = Mock(spec=Authenticator) - pop3conn = MagicMock(spec=POP3_SSL) - - with patch("mail.servers.poplib") as mock_poplib: - mock_poplib.POP3_SSL = pop3conn - - mail_server = MailServer( - auth, - hostname=hostname, - pop3_port=pop3_port, - ) - mail_server.connect_to_pop3() - mail_server.quit_pop3_connection() - - mock_connection = pop3conn() - mock_connection.quit.assert_called_once() - - def test_mail_server_user(self): - auth = Mock(spec=Authenticator) - auth.user = Mock() - mail_server = MailServer( - auth, - hostname="host", - pop3_port=1, - ) - self.assertEqual(mail_server.user, auth.user) diff --git a/mail/tests/test_servers.py b/mail/tests/test_servers.py index 5e9854b5..f4427542 100644 --- a/mail/tests/test_servers.py +++ b/mail/tests/test_servers.py @@ -1,8 +1,92 @@ -from unittest.mock import MagicMock, patch +from poplib import POP3_SSL +from unittest.mock import MagicMock, Mock, patch from django.test import SimpleTestCase, override_settings -from mail.servers import smtp_send +from mail.auth import Authenticator +from mail.servers import MailServer, smtp_send + + +class MailServerTests(SimpleTestCase): + def test_mail_server_equal(self): + auth = Mock(spec=Authenticator) + + m1 = MailServer(auth, hostname="host", pop3_port=1) # nosec + m2 = MailServer(auth, hostname="host", pop3_port=1) # nosec + + self.assertEqual(m1, m2) + + def test_mail_server_not_equal(self): + auth = Mock(spec=Authenticator) + + m1 = MailServer(auth, hostname="host", pop3_port=1) # nosec + m2 = MailServer(auth, hostname="host", pop3_port=2) # nosec + + self.assertNotEqual(m1, m2) + + auth = Mock(spec=Authenticator) + + m1 = MailServer(auth, hostname="host", pop3_port=1) # nosec + m2 = Mock() # nosec + + self.assertNotEqual(m1, m2) + + def test_mail_server_connect_to_pop3(self): + hostname = "host" + pop3_port = 1 + + auth = Mock(spec=Authenticator) + pop3conn = MagicMock(spec=POP3_SSL) + + with patch("mail.servers.poplib") as mock_poplib: + mock_poplib.POP3_SSL = pop3conn + + mail_server = MailServer( + auth, + hostname=hostname, + pop3_port=pop3_port, + ) + mail_server.connect_to_pop3() + + pop3conn.assert_called_with( + hostname, + pop3_port, + timeout=60, + ) + + mock_connection = pop3conn() + auth.authenticate.assert_called_with(mock_connection) + + def test_mail_server_quit_pop3_connection(self): + hostname = "host" + pop3_port = 1 + + auth = Mock(spec=Authenticator) + pop3conn = MagicMock(spec=POP3_SSL) + + with patch("mail.servers.poplib") as mock_poplib: + mock_poplib.POP3_SSL = pop3conn + + mail_server = MailServer( + auth, + hostname=hostname, + pop3_port=pop3_port, + ) + mail_server.connect_to_pop3() + mail_server.quit_pop3_connection() + + mock_connection = pop3conn() + mock_connection.quit.assert_called_once() + + def test_mail_server_user(self): + auth = Mock(spec=Authenticator) + auth.user = Mock() + mail_server = MailServer( + auth, + hostname="host", + pop3_port=1, + ) + self.assertEqual(mail_server.user, auth.user) @override_settings( From b53bc4a2d49bece0f3976cbd42ee3569f2a5d709 Mon Sep 17 00:00:00 2001 From: Kevin Carrogan Date: Sat, 21 Dec 2024 16:07:01 +0000 Subject: [PATCH 03/38] Use context manager for managing pop3 connection --- healthcheck/checks.py | 5 +- mail/icms/__init__.py | 0 mail/icms/tasks.py | 239 -------- mail/libraries/routing_controller.py | 13 +- .../management/commands/mark_mails_as_read.py | 17 +- mail/servers.py | 24 +- mail/tests/icms/__init__.py | 0 mail/tests/icms/test_tasks.py | 515 ------------------ mail/tests/test_dtos.py | 12 +- mail/tests/test_servers.py | 109 +++- mail/utils/pop3.py | 15 - mock_hmrc/handler.py | 38 +- 12 files changed, 145 insertions(+), 842 deletions(-) delete mode 100644 mail/icms/__init__.py delete mode 100644 mail/icms/tasks.py delete mode 100644 mail/tests/icms/__init__.py delete mode 100644 mail/tests/icms/test_tasks.py diff --git a/healthcheck/checks.py b/healthcheck/checks.py index d109dc6b..bc3eb9df 100644 --- a/healthcheck/checks.py +++ b/healthcheck/checks.py @@ -23,13 +23,12 @@ def check_status(self): for mailserver_factory in mailserver_factories: mailserver = mailserver_factory() try: - mailserver.connect_to_pop3() + with mailserver.connect_to_pop3(): + pass except poplib.error_proto as e: response, *_ = e.args error_message = f"Failed to connect to mailbox: {mailserver.hostname} ({response})" self.add_error(HealthCheckException(error_message)) - finally: - mailserver.quit_pop3_connection() class LicencePayloadsHealthCheck(BaseHealthCheckBackend): diff --git a/mail/icms/__init__.py b/mail/icms/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/mail/icms/tasks.py b/mail/icms/tasks.py deleted file mode 100644 index 21ae4651..00000000 --- a/mail/icms/tasks.py +++ /dev/null @@ -1,239 +0,0 @@ -"""ICMS tasks that get periodically ran via django management commands.""" - -import email -import logging -from email.headerregistry import Address, UniqueAddressHeader -from typing import Any, Dict -from urllib import parse - -import requests -from django.conf import settings -from django.db import transaction -from django.utils import timezone - -from mail import requests as mail_requests -from mail.auth import Authenticator, BasicAuthentication -from mail.chief.licence_reply import LicenceReplyProcessor -from mail.enums import ExtractTypeEnum, ReceptionStatusEnum -from mail.models import LicenceData, Mail -from mail.utils import pop3 - -logger = logging.getLogger(__name__) - - -def process_licence_reply_and_usage_emails(): - """Downloads licenceReply and usageData emails from HMRC mailbox and stores in Mail model.""" - - logger.info("Checking for reply and usage emails") - - auth = _get_hmrc_mailbox_auth() - mailbox_hostname = settings.INCOMING_EMAIL_HOSTNAME - port = settings.INCOMING_EMAIL_POP3_PORT - - with transaction.atomic(), pop3.get_connection(auth, mailbox_hostname, port) as con: - try: - for msg_id in pop3.list_messages_ids(con): - logger.debug("Processing msg_id %s", msg_id) - - mail = pop3.get_email(con, msg_id) - - _check_sender_valid( - mail, - expected_sender_domain=settings.HMRC_TO_DIT_EMAIL_HOSTNAME, - expected_sender_user=settings.HMRC_TO_DIT_EMAIL_USER, - ) - - subject = mail.get("Subject") - logger.debug("Subject of email being processed: %s", subject) - - if "licenceReply" in subject: - _save_licence_reply_email(mail) - con.dele(msg_id) - - elif "usageData" in subject: - _save_usage_data_email(mail) - con.dele(msg_id) - else: - raise ValueError(f"Unable to process email with subject: {subject}") - - except Exception as e: - con.rset() - raise e - - -@transaction.atomic() -def send_licence_data_to_icms(): - """Checks Mail model for any licence reply records to send to ICMS.""" - - logger.info("Checking for licence data to send to ICMS") - - licence_reply_mail_qs = Mail.objects.filter( - extract_type=ExtractTypeEnum.LICENCE_DATA, status=ReceptionStatusEnum.REPLY_RECEIVED - ) - - mail_to_process = licence_reply_mail_qs.count() - - if mail_to_process == 0: - logger.info("No licence date found to send to ICMS") - return - - if mail_to_process > 1: - raise ValueError("Unable to process mail as there are more than 1 records.") - - mail = licence_reply_mail_qs.select_for_update().first() - - processor = LicenceReplyProcessor.load_from_mail(mail) - - if not processor.file_valid(): - error_msg = f"Unable to process mail (id: {mail.id}, filename: {mail.response_filename}) as it has file errors." - logger.warning(error_msg) - - for file_error in processor.file_errors: - logger.warning( - "File error: position: %s, code: %s, error_msg: %s", - file_error.position, - file_error.code, - file_error.text, - ) - - if not processor.file_trailer_valid(): - logger.warning("File trailer count is different from processor count of accepted and rejected") - - raise ValueError(error_msg) - - licence_reply_data = _get_licence_reply_data(processor) - - url = parse.urljoin(settings.ICMS_API_URL, "chief/license-data-callback") - response: requests.Response = mail_requests.post( - url, - licence_reply_data, - hawk_credentials=settings.LITE_API_ID, - timeout=settings.LITE_API_REQUEST_TIMEOUT, - ) - - try: - response.raise_for_status() - except requests.HTTPError as e: - logger.error("Failed to send licence reply data to ICMS (Check ICMS sentry): %s", str(e)) - - return - - # Update the status if everything was successful - mail.status = ReceptionStatusEnum.REPLY_SENT - mail.save() - logger.info(f"Successfully sent mail (id: {mail.id}, filename: {mail.response_filename}) to ICMS for processing") - - -def _get_licence_reply_data(processor: LicenceReplyProcessor) -> Dict[str, Any]: - # Load all LicencePayload records linked to the current LicenceData record - ld: LicenceData = LicenceData.objects.get(hmrc_run_number=processor.file_header.run_num) - current_licences = ld.licence_payloads.all().values_list("lite_id", "reference", named=True) - - # Create a mapping of transaction reference to the UUID ICMS sent originally - id_map = {lp.reference: str(lp.lite_id) for lp in current_licences} - - licence_reply_data = { - "run_number": processor.file_header.run_num, - "accepted": [{"id": id_map[at.transaction_ref]} for at in processor.accepted_licences], - "rejected": [ - { - "id": id_map[rt.header.transaction_ref], - "errors": [{"error_code": error.code, "error_msg": error.text} for error in rt.errors], - } - for rt in processor.rejected_licences - ], - } - - return licence_reply_data - - -# -# -# def send_usage_data_to_icms(): -# """Checks Mail model for any usage data records to send to ICMS.""" -# raise NotImplementedError - - -def _get_hmrc_mailbox_auth() -> Authenticator: - # TODO: ICMSLST-1759 Replace with ModernAuthentication - - return BasicAuthentication( - user=settings.INCOMING_EMAIL_USER, - password=settings.INCOMING_EMAIL_PASSWORD, - ) - - -def _check_sender_valid(mail: email.message.EmailMessage, *, expected_sender_domain: str, expected_sender_user) -> None: - """Check the sender is valid""" - - # TODO: ICMSLST-1760 Revisit this before going live. - - mail_from_header: UniqueAddressHeader = mail.get("From") - mail_from: Address = mail_from_header.addresses[0] - - if mail_from.domain != expected_sender_domain or mail_from.username != expected_sender_user: - subject = mail.get("Subject") - err_msg = f"Unable to verify incoming email: From:{mail_from}, Subject: {subject}" - - logger.warning(err_msg) - - raise ValueError(err_msg) - - -def _save_licence_reply_email(reply_email: email.message.EmailMessage) -> None: - """Save the incoming licence reply email to the database.""" - - subject = reply_email.get("Subject") - run_number = _get_run_number_from_subject(subject) - - attachments = list(reply_email.iter_attachments()) - - if not len(attachments) == 1: - raise ValueError("Only one attachment is accepted per licence reply email.") - - reply_file = attachments[0] - file_name = reply_file.get_filename() - - # e.g. b"1\\fileHeader\\SPIRE\\CHIEF\\licenceData\\202203171606\\1\\N\r\n..." - payload_bytes = reply_file.get_payload(decode=True) - - ld = LicenceData.objects.get(hmrc_run_number=run_number) - - mail = Mail.objects.select_for_update().get(id=ld.mail.id, status=ReceptionStatusEnum.REPLY_PENDING) - - mail.status = ReceptionStatusEnum.REPLY_RECEIVED - mail.response_filename = file_name - mail.response_data = payload_bytes.decode() - mail.response_date = timezone.now() - mail.response_subject = subject - - mail.save() - - logger.info("Updated mail instance: %s with licence reply (subject: %s - filename: %s", mail.id, subject, file_name) - - -def _get_run_number_from_subject(s: str) -> int: - try: - if "licenceReply" in s: - # e.g. CHIEF_licenceReply_29229_202209201442 -> 29229 - return int(s.split("_")[2]) - - if "usageData" in s: - # e.g. ILBDOTI_live_CHIEF_usageData_7132_202209280300 - return int(s.split("_")[4]) - - except Exception as e: - logger.error(e) - - raise ValueError(f"Unable to parse run number from {s!r}") - - -def _save_usage_data_email(usage_email: email.message.EmailMessage) -> None: - subject = usage_email.get("Subject") - run_number = _get_run_number_from_subject(subject) - logger.debug(f"{subject} - {run_number}") - # Mail extract type when implementing - # Mail.objects.create( - # extract_type=ExtractTypeEnum.USAGE_DATA - # ) - raise NotImplementedError diff --git a/mail/libraries/routing_controller.py b/mail/libraries/routing_controller.py index 555a788c..3991344f 100644 --- a/mail/libraries/routing_controller.py +++ b/mail/libraries/routing_controller.py @@ -208,13 +208,12 @@ def _collect_and_send(mail: Mail): def get_email_message_dtos(server: MailServer, number: Optional[int] = 3) -> List[Tuple[EmailMessageDto, Callable]]: - pop3_connection = server.connect_to_pop3() - emails_iter = get_message_iterator(pop3_connection, server.user) - if number: - emails = list(islice(emails_iter, number)) - else: - emails = list(emails_iter) - server.quit_pop3_connection() + with server.connect_to_pop3() as pop3_connection: + emails_iter = get_message_iterator(pop3_connection, server.user) + if number: + emails = list(islice(emails_iter, number)) + else: + emails = list(emails_iter) return emails diff --git a/mail/management/commands/mark_mails_as_read.py b/mail/management/commands/mark_mails_as_read.py index 62a9b767..35962682 100644 --- a/mail/management/commands/mark_mails_as_read.py +++ b/mail/management/commands/mark_mails_as_read.py @@ -26,16 +26,17 @@ def handle(self, *args, **options): pop3_port=995, smtp_port=587, ) - pop3_connection = server.connect_to_pop3() - self.stdout.write(self.style.SUCCESS(f"Connected to {email_user}")) - _, mails, _ = pop3_connection.list() - self.stdout.write(self.style.SUCCESS(f"Found {len(mails)} in the inbox")) + with server.connect_to_pop3() as pop3_connection: + self.stdout.write(self.style.SUCCESS(f"Connected to {email_user}")) - mail_message_ids = [get_message_id(pop3_connection, m.decode(settings.DEFAULT_ENCODING)) for m in mails] - self.stdout.write( - self.style.SUCCESS(f"List of Message-Id and message numbers for existing mails:\n{mail_message_ids}") - ) + _, mails, _ = pop3_connection.list() + self.stdout.write(self.style.SUCCESS(f"Found {len(mails)} in the inbox")) + + mail_message_ids = [get_message_id(pop3_connection, m.decode(settings.DEFAULT_ENCODING)) for m in mails] + self.stdout.write( + self.style.SUCCESS(f"List of Message-Id and message numbers for existing mails:\n{mail_message_ids}") + ) if dry_run.lower() == "false": mailbox_config, _ = models.MailboxConfig.objects.get_or_create(username=email_user) diff --git a/mail/servers.py b/mail/servers.py index 305523b9..59378d77 100644 --- a/mail/servers.py +++ b/mail/servers.py @@ -7,8 +7,10 @@ from mail.auth import Authenticator +logger = logging.getLogger(__name__) -class MailServer(object): + +class MailServer: def __init__( self, auth: Authenticator, @@ -18,7 +20,6 @@ def __init__( self.auth = auth self.pop3_port = pop3_port self.hostname = hostname - self.pop3_connection = None def __eq__(self, other): if not isinstance(other, MailServer): @@ -26,15 +27,16 @@ def __eq__(self, other): return self.hostname == other.hostname and self.auth == other.auth and self.pop3_port == other.pop3_port - def connect_to_pop3(self) -> poplib.POP3_SSL: - logging.info("Establishing a pop3 connection to %s:%s", self.hostname, self.pop3_port) - self.pop3_connection = poplib.POP3_SSL(self.hostname, self.pop3_port, timeout=60) - self.auth.authenticate(self.pop3_connection) - logging.info("pop3 connection established") - return self.pop3_connection - - def quit_pop3_connection(self): - self.pop3_connection.quit() + @contextmanager + def connect_to_pop3(self): + logger.info("Establishing a pop3 connection to %s:%s", self.hostname, self.pop3_port) + pop3_connection = poplib.POP3_SSL(self.hostname, self.pop3_port, timeout=60) + logger.info("Pop3 connection established") + try: + self.auth.authenticate(pop3_connection) + yield pop3_connection + finally: + pop3_connection.quit() @property def user(self): diff --git a/mail/tests/icms/__init__.py b/mail/tests/icms/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/mail/tests/icms/test_tasks.py b/mail/tests/icms/test_tasks.py deleted file mode 100644 index 394aa8b6..00000000 --- a/mail/tests/icms/test_tasks.py +++ /dev/null @@ -1,515 +0,0 @@ -import pathlib -import poplib -import uuid -from http import HTTPStatus -from typing import TYPE_CHECKING, List -from unittest import mock -from urllib import parse - -import pytest -from django.conf import settings -from django.test import override_settings - -from mail import servers -from mail.enums import ChiefSystemEnum, ExtractTypeEnum, LicenceActionEnum, ReceptionStatusEnum -from mail.icms import tasks -from mail.models import LicenceData, LicencePayload, Mail, SourceEnum -from mail.utils import pop3 - -if TYPE_CHECKING: - from requests_mock import Mocker - - -@pytest.fixture(autouse=True) -def icms_source(): - """Ensure all tests have the CHIEF_SOURCE_SYSTEM set to ICMS""" - with override_settings(CHIEF_SOURCE_SYSTEM=ChiefSystemEnum.ICMS): - yield - - -@pytest.fixture -def mock_pop3(): - """Mock pop3 to return a known response (CHIEF_licenceReply_29236_202209231140.eml)""" - - mock_pop3 = mock.create_autospec(spec=poplib.POP3_SSL) - - # The return value of calling a magic mock is another instance of the mock with the same spec - # mock = mock.create_autospec(spec=SomeClass) - # mock.return_value == mock() - # Therefore we need to mock the methods on the return_value attribute - # See here: https://docs.python.org/3/library/unittest.mock.html#calling - mock_pop3.return_value.list.return_value = (b"+OK", [b"1 12345"], 1234) - - filename = "CHIEF_licenceReply_29236_202209231140.eml" - mock_pop3.return_value.retr.return_value = (b"+OK", get_licence_reply_msg_list(filename), 1234) - - return mock_pop3 - - -@pytest.fixture -def mock_pop3_multiple_attachments(mock_pop3): - """Override mock_pop3 to return an email with multiple attachments.""" - - filename = "CHIEF_licenceReply_29237_202209241020-two-attachments.eml" - mock_pop3.return_value.retr.return_value = (b"+OK", get_licence_reply_msg_list(filename), 1234) - - return mock_pop3 - - -@pytest.fixture -def mock_pop3_unknown_subject(mock_pop3): - """Override mock_pop3 to return an email with an unknown subject.""" - - filename = "CHIEF_licenceReply_29238_202209251140-unknown-subject.eml" - mock_pop3.return_value.retr.return_value = (b"+OK", get_licence_reply_msg_list(filename), 1234) - - return mock_pop3 - - -@pytest.fixture -def mock_pop3_invalid_reply_subject(mock_pop3): - """Override mock_pop3 to return an email with an invalid subject for a licenceReply email.""" - - filename = "CHIEF_licenceReply_29238_202209251140-invalid-reply-subject.eml" - mock_pop3.return_value.retr.return_value = (b"+OK", get_licence_reply_msg_list(filename), 1234) - - return mock_pop3 - - -@pytest.fixture -def mock_pop3_usage(mock_pop3): - filename = "ILBDOTI_live_CHIEF_usageData_7132_202209280300.eml" - mock_pop3.return_value.retr.return_value = (b"+OK", get_licence_reply_msg_list(filename), 1234) - - return mock_pop3 - - -@pytest.fixture() -def correct_email_settings(): - """Correct test email settings""" - with override_settings(HMRC_TO_DIT_EMAIL_USER="chief", HMRC_TO_DIT_EMAIL_HOSTNAME="hmrc_test_email.com"): - yield None - - -@pytest.fixture -def licence_reply_example() -> str: - filename = "mail/tests/files/icms/CHIEF_licenceReply_accepted_and_rejected_example" - return pathlib.Path(filename).read_text() - - -def test_can_mock_email(mock_pop3): - con = mock_pop3("dummy-host") - - ids = pop3.list_messages_ids(con) - assert ids == ["1"] - - email = pop3.get_email(con, "1") - assert email.get("Subject") == "CHIEF_licenceReply_29236_202209231140" - - attachments = list(email.iter_attachments()) - reply_file = attachments[0] - file_name = reply_file.get_filename() - - assert file_name == "CHIEF_licenceReply_29236_202209231140" - - expected_file = TestProcessLicenceReplyAndUsageEmailTask.get_expected_file() - - actual_file = reply_file.get_payload(decode=True).decode() - - assert expected_file == actual_file - - -class TestProcessLicenceReplyAndUsageEmailTask: - @pytest.fixture(autouse=True) - def _setup(self, db, monkeypatch): - self.con = None - self.monkeypatch = monkeypatch - - # Create a mail object that is waiting for a licence reply from HMRC - self.mail = Mail.objects.create( - status=ReceptionStatusEnum.REPLY_PENDING, - extract_type=ExtractTypeEnum.LICENCE_DATA, - edi_filename="the_licence_data_file", - edi_data="lovely data", - sent_filename="the_licence_data_file", - sent_data="lovely data", - ) - LicenceData.objects.create(licence_ids="", hmrc_run_number=29236, source=SourceEnum.ICMS, mail=self.mail) - - def _patch_pop3_class(self, mock): - # Store con reference to check what was called later - self.con = mock.return_value - - # Patch where the pop3 connection is made - self.monkeypatch.setattr(servers.poplib, "POP3_SSL", mock) - - @staticmethod - def get_expected_file() -> str: - filename = "mail/tests/files/icms/CHIEF_licenceReply_29236_202209231140_attachment" - return pathlib.Path(filename).read_text() - - def test_process_licence_reply_email_success(self, correct_email_settings, mock_pop3): - self._patch_pop3_class(mock_pop3) - - # Check for emails and process them - tasks.process_licence_reply_and_usage_emails() - - # Test the licence mail has been updated with the response email - mail = LicenceData.objects.get(hmrc_run_number=29236).mail - assert mail.status == ReceptionStatusEnum.REPLY_RECEIVED - assert mail.response_filename == "CHIEF_licenceReply_29236_202209231140" - assert mail.response_data == self.get_expected_file() - - # Successful processing should delete the message - self.con.dele.assert_called_with("1") - - # Check the connection was closed automatically - self.con.quit.assert_called_once() - - def test_process_licence_reply_errors_with_multiple_attachments( - self, correct_email_settings, mock_pop3_multiple_attachments - ): - self._patch_pop3_class(mock_pop3_multiple_attachments) - - # Only one attachment is accepted per licence reply email. - with pytest.raises(ValueError, match="Only one attachment is accepted per licence reply email."): - tasks.process_licence_reply_and_usage_emails() - - # Check the connection was closed automatically - self.con.quit.assert_called_once() - - # Check any scheduled deletes were reset - self.con.rset.assert_called_once() - - def test_process_licence_reply_errors_with_unknown_email_subject( - self, correct_email_settings, mock_pop3_unknown_subject - ): - self._patch_pop3_class(mock_pop3_unknown_subject) - - with pytest.raises(ValueError, match="Unable to process email with subject: All about cakes"): - tasks.process_licence_reply_and_usage_emails() - - # Check the connection was closed automatically - self.con.quit.assert_called_once() - - # Check any scheduled deletes were reset - self.con.rset.assert_called_once() - - def test_process_licence_reply_errors_with_invalid_reply_subject( - self, correct_email_settings, mock_pop3_invalid_reply_subject - ): - self._patch_pop3_class(mock_pop3_invalid_reply_subject) - - with pytest.raises( - ValueError, match="Unable to parse run number from 'CHIEF_licenceReply_INVALID_29236_202209231140'" - ): - tasks.process_licence_reply_and_usage_emails() - - # Check the connection was closed automatically - self.con.quit.assert_called_once() - - # Check any scheduled deletes were reset - self.con.rset.assert_called_once() - - def test_process_usage_email_errors_until_implemented(self, correct_email_settings, mock_pop3_usage): - self._patch_pop3_class(mock_pop3_usage) - - with pytest.raises(NotImplementedError): - tasks.process_licence_reply_and_usage_emails() - - # Check the connection was closed automatically - self.con.quit.assert_called_once() - - # Check any scheduled deletes were reset - self.con.rset.assert_called_once() - - def test_process_email_usage_fake_success(self, correct_email_settings, mock_pop3_usage): - self._patch_pop3_class(mock_pop3_usage) - mock_save_usage = mock.create_autospec(spec=tasks._save_usage_data_email) - self.monkeypatch.setattr(tasks, "_save_usage_data_email", mock_save_usage) - - tasks.process_licence_reply_and_usage_emails() - - # Successful processing should delete the message - self.con.dele.assert_called_with("1") - - # Check the connection was closed automatically - self.con.quit.assert_called_once() - - def test_rollback_when_we_get_an_error_in_one_of_the_files(self, correct_email_settings, transactional_db): - """Test a scenario where there is a good licence file and a bad usage file. - - Everything should roll back if we can't process everything. - """ - - mock_pop3_cls = mock.create_autospec(spec=poplib.POP3_SSL) - # Return multiple message id's (licenceReply and Usage) - mock_pop3_cls.return_value.list.return_value = (b"+OK", [b"1 12345", b"2 54321"], 1234) - - def mock_retr(msg_id): - if msg_id == "1": - return b"+OK", get_licence_reply_msg_list("CHIEF_licenceReply_29236_202209231140.eml"), 12345 - - if msg_id == "2": - return b"+OK", get_licence_reply_msg_list("ILBDOTI_live_CHIEF_usageData_7132_202209280300.eml"), 54321 - - mock_pop3_cls.return_value.retr = mock_retr - self._patch_pop3_class(mock_pop3_cls) - - # Let's make the _save_usage_data_email fail with a custom error - mock__save_usage_data_email = mock.create_autospec( - spec=tasks._save_usage_data_email, side_effect=RuntimeError("Something unexpected has happened...") - ) - - self.monkeypatch.setattr(tasks, "_save_usage_data_email", mock__save_usage_data_email) - - with pytest.raises(RuntimeError, match="Something unexpected has happened..."): - tasks.process_licence_reply_and_usage_emails() - - # The licenceReply mail should have processed correctly and therefore the mail would be marked for deletion - self.con.dele.assert_called_once() - self.con.dele.assert_called_with("1") - - # Check the connection was closed automatically - self.con.quit.assert_called_once() - - # Check any scheduled deletes were reset - self.con.rset.assert_called_once() - - # Check the Mail model changes have been rolled back (as the task errored) - self.mail.refresh_from_db() - assert self.mail.status == ReceptionStatusEnum.REPLY_PENDING - assert not self.mail.response_filename - assert not self.mail.response_data - - @override_settings(HMRC_TO_DIT_EMAIL_USER="chief", HMRC_TO_DIT_EMAIL_HOSTNAME="fake-valid-domain.com") - def test_unknown_email_sender_domain(self, mock_pop3): - self._patch_pop3_class(mock_pop3) - - with pytest.raises( - ValueError, - match=( - 'Unable to verify incoming email: From:"HMRC, CHIEF" , ' - "Subject: CHIEF_licenceReply_29236_202209231140" - ), - ): - tasks.process_licence_reply_and_usage_emails() - - # Check the connection was closed automatically - self.con.quit.assert_called_once() - - # Check any scheduled deletes were reset - self.con.rset.assert_called_once() - - @override_settings(HMRC_TO_DIT_EMAIL_USER="fake-valid-user", HMRC_TO_DIT_EMAIL_HOSTNAME="hmrc_test_email.com") - def test_unknown_email_sender_user(self, mock_pop3): - self._patch_pop3_class(mock_pop3) - - with pytest.raises( - ValueError, - match=( - 'Unable to verify incoming email: From:"HMRC, CHIEF" , ' - "Subject: CHIEF_licenceReply_29236_202209231140" - ), - ): - tasks.process_licence_reply_and_usage_emails() - - # Check the connection was closed automatically - self.con.quit.assert_called_once() - - # Check any scheduled deletes were reset - self.con.rset.assert_called_once() - - -# Note: Only added this test to cover a "partial" coverage line. -def test_get_run_number_from_unknown_subject(): - with pytest.raises(ValueError, match="Unable to parse run number from 'unknown_subject'"): - tasks._get_run_number_from_subject("unknown_subject") - - -def get_licence_reply_msg_list(filename: str) -> List[bytes]: - return pathlib.Path(f"mail/tests/files/icms/{filename}").read_bytes().split(b"\r\n") - - -@mock.patch("mail.requests.hawk_authentication_enabled", lambda: True) -class TestSendLicenceDataToICMSTask: - @pytest.fixture(autouse=True) - def _setup(self, transactional_db, requests_mock: "Mocker", licence_reply_example): - self.rq = requests_mock - - # Create a mail object that has data to send to ICMS - self.mail = Mail.objects.create( - status=ReceptionStatusEnum.REPLY_RECEIVED, - extract_type=ExtractTypeEnum.LICENCE_DATA, - edi_filename="the_licence_data_file", - edi_data="lovely data", - sent_filename="the_licence_data_file", - sent_data="lovely data", - response_filename="CHIEF_licenceReply_29236_202209231140", - response_data=licence_reply_example, - ) - ld = LicenceData.objects.create(licence_ids="", hmrc_run_number=29236, source=SourceEnum.ICMS, mail=self.mail) - - # fake some licence payload references for the test file - for reference in ["ABC12345", "ABC12346", "ABC12348", "ABC12347"]: - payload = LicencePayload.objects.create( - lite_id=uuid.uuid4(), reference=reference, action=LicenceActionEnum.INSERT, is_processed=True - ) - ld.licence_payloads.add(payload) - - # Store the ICMS UUID that was sent from ICMS. - self.id_1 = str(LicencePayload.objects.get(reference="ABC12345").lite_id) - self.id_2 = str(LicencePayload.objects.get(reference="ABC12346").lite_id) - self.id_3 = str(LicencePayload.objects.get(reference="ABC12348").lite_id) - self.id_4 = str(LicencePayload.objects.get(reference="ABC12347").lite_id) - - def test_send_licence_data_to_icms_success(self, caplog): - # Mock the response that ICMS sends back - url = parse.urljoin(settings.ICMS_API_URL, "chief/license-data-callback") - self.rq.post(url, status_code=HTTPStatus.OK, json={}) - - with mock.patch("mail.requests.verify_api_response", spec=True) as verify_resp: - # Send the licence data to ICMS - tasks.send_licence_data_to_icms() - - verify_resp.assert_called_once() - - self.mail.refresh_from_db() - - # Check the hawk headers is there - assert "hawk-authentication" in self.rq.last_request.headers - - # Check the mail has been updated - assert self.mail.status == ReceptionStatusEnum.REPLY_SENT - - # Check what data we sent to ICMS - assert self.rq.last_request.json() == { - "accepted": [{"id": self.id_1}, {"id": self.id_2}, {"id": self.id_3}], - "rejected": [ - { - "id": self.id_4, - "errors": [ - {"error_code": "1234", "error_msg": "Invalid thingy"}, - {"error_code": "76543", "error_msg": "Invalid commodity “1234A6” in line " "23"}, - ], - } - ], - "run_number": "29236", - } - - last_log_msg = caplog.messages[-1] - assert last_log_msg == ( - f"Successfully sent mail (id: {self.mail.id}, filename: {self.mail.response_filename})" - f" to ICMS for processing" - ) - - def test_send_licence_data_to_icms_http_error(self, caplog): - # Mock the response that ICMS sends back - an internal server error - url = parse.urljoin(settings.ICMS_API_URL, "chief/license-data-callback") - self.rq.post(url, status_code=HTTPStatus.INTERNAL_SERVER_ERROR, json={}, reason="test reason") - - # Send the licence data to ICMS using the data we know should pass - with mock.patch("mail.requests.verify_api_response", spec=True) as verify_resp: - # Send the licence data to ICMS - tasks.send_licence_data_to_icms() - - verify_resp.assert_called_once() - - last_log_msg = caplog.messages[-1] - assert last_log_msg == ( - "Failed to send licence reply data to ICMS (Check ICMS sentry):" - " 500 Server Error: test reason for url: http://web:8080/chief/license-data-callback" - ) - - # Check the mail status hasn't changed - self.mail.refresh_from_db() - - assert self.mail.status == ReceptionStatusEnum.REPLY_RECEIVED - - -def test_no_mail_found(db, caplog): - tasks.send_licence_data_to_icms() - - assert caplog.messages == ["Checking for licence data to send to ICMS", "No licence date found to send to ICMS"] - - -def test_multiple_mail_raises_error(db, licence_reply_example): - Mail.objects.create( - status=ReceptionStatusEnum.REPLY_RECEIVED, - extract_type=ExtractTypeEnum.LICENCE_DATA, - edi_filename="the_licence_data_file", - edi_data="lovely data", - sent_filename="the_licence_data_file", - sent_data="lovely data", - response_filename="CHIEF_licenceReply_29236_202209231140", - response_data=licence_reply_example, - ) - - Mail.objects.create( - status=ReceptionStatusEnum.REPLY_RECEIVED, - extract_type=ExtractTypeEnum.LICENCE_DATA, - edi_filename="the_licence_data_file", - edi_data="lovely data", - sent_filename="the_licence_data_file", - sent_data="lovely data", - response_filename="CHIEF_licenceReply_29237_202209231140", - response_data=licence_reply_example, - ) - - with pytest.raises(ValueError, match="Unable to process mail as there are more than 1 records."): - tasks.send_licence_data_to_icms() - - -def test_file_with_errors_raises_errors(db, caplog): - mail = Mail.objects.create( - status=ReceptionStatusEnum.REPLY_RECEIVED, - extract_type=ExtractTypeEnum.LICENCE_DATA, - edi_filename="the_licence_data_file", - edi_data="lovely data", - sent_filename="the_licence_data_file", - sent_data="lovely data", - response_filename="CHIEF_licenceReply_29237_202209231140", - ) - - file_with_file_error = ( - "1\\fileHeader\\CHIEF\\ILBDOTI\\licenceReply\\202209231140\\29236\n" - "2\\fileError\\18\\Record type 'fileHeader' not recognised\\99\n" - "3\\accepted\\ABC12348\n" - "4\\fileTrailer\\1\\0\\1\n" - ) - - mail.response_data = file_with_file_error - mail.save() - - with pytest.raises( - ValueError, - match=rf"Unable to process mail \(id: {mail.id}, filename: {mail.response_filename}\) as it has file errors.", - ): - tasks.send_licence_data_to_icms() - - assert caplog.messages == [ - "Checking for licence data to send to ICMS", - f"Unable to process mail (id: {mail.id}, filename: {mail.response_filename}) as it has file errors.", - "File error: position: 99, code: 18, error_msg: Record type 'fileHeader' not recognised", - ] - - file_with_file_error_and_file_trailer_errors = ( - "1\\fileHeader\\CHIEF\\ILBDOTI\\licenceReply\\202209231140\\29236\n" - "2\\fileError\\18\\Record type 'fileHeader' not recognised\\99\n" - "3\\accepted\\ABC12348\n" - "4\\fileTrailer\\0\\1\\1\n" - ) - mail.response_data = file_with_file_error_and_file_trailer_errors - mail.save() - with pytest.raises( - ValueError, - match=rf"Unable to process mail \(id: {mail.id}, filename: {mail.response_filename}\) as it has file errors.", - ): - tasks.send_licence_data_to_icms() - - assert caplog.messages == [ - "Checking for licence data to send to ICMS", - f"Unable to process mail (id: {mail.id}, filename: {mail.response_filename}) as it has file errors.", - "File error: position: 99, code: 18, error_msg: Record type 'fileHeader' not recognised", - "File trailer count is different from processor count of accepted and rejected", - ] diff --git a/mail/tests/test_dtos.py b/mail/tests/test_dtos.py index 460fd871..66f778f1 100644 --- a/mail/tests/test_dtos.py +++ b/mail/tests/test_dtos.py @@ -1,3 +1,4 @@ +from contextlib import contextmanager from unittest.mock import Mock, patch from dateutil.parser import parse @@ -90,15 +91,20 @@ class TestGetEmailMessagesDTOs(SimpleTestCase): @patch("mail.libraries.routing_controller.get_message_iterator") def test_get_email_message_dtos(self, mock_get_message_iterator): mock_mail_server = Mock(spec=MailServer) + mock_connection = Mock() + + @contextmanager + def _mock_connect_to_pop3(): + yield mock_connection + + mock_mail_server.connect_to_pop3.side_effect = _mock_connect_to_pop3 mock_emails = [Mock(), Mock()] mock_get_message_iterator.return_value = mock_emails emails = get_email_message_dtos(mock_mail_server) self.assertEqual(emails, mock_emails) - mock_mail_server.connect_to_pop3.assert_called_once_with() mock_get_message_iterator.assert_called_once_with( - mock_mail_server.connect_to_pop3(), + mock_connection, mock_mail_server.user, ) - mock_mail_server.quit_pop3_connection.assert_called_once_with() diff --git a/mail/tests/test_servers.py b/mail/tests/test_servers.py index f4427542..f17ff02d 100644 --- a/mail/tests/test_servers.py +++ b/mail/tests/test_servers.py @@ -31,52 +31,119 @@ def test_mail_server_not_equal(self): self.assertNotEqual(m1, m2) - def test_mail_server_connect_to_pop3(self): + @patch("mail.servers.poplib") + def test_mail_server_connect_to_pop3(self, mock_poplib): hostname = "host" pop3_port = 1 auth = Mock(spec=Authenticator) pop3conn = MagicMock(spec=POP3_SSL) + mock_poplib.POP3_SSL = pop3conn - with patch("mail.servers.poplib") as mock_poplib: - mock_poplib.POP3_SSL = pop3conn - - mail_server = MailServer( - auth, - hostname=hostname, - pop3_port=pop3_port, - ) - mail_server.connect_to_pop3() + mail_server = MailServer( + auth, + hostname=hostname, + pop3_port=pop3_port, + ) + with mail_server.connect_to_pop3() as pop3connection: + pop3connection.list() pop3conn.assert_called_with( hostname, pop3_port, timeout=60, ) + mock_connection = pop3conn() + self.assertEqual(pop3connection, mock_connection) + auth.authenticate.assert_called_with(pop3connection) + pop3connection.quit.assert_called_once() + pop3connection.list.assert_called_once() + + @patch("mail.servers.poplib") + def test_mail_server_connection_exception(self, mock_poplib): + hostname = "host" + pop3_port = 1 + auth = Mock(spec=Authenticator) + pop3conn = MagicMock(spec=POP3_SSL) mock_connection = pop3conn() - auth.authenticate.assert_called_with(mock_connection) + pop3conn.side_effect = Exception() + mock_poplib.POP3_SSL = pop3conn - def test_mail_server_quit_pop3_connection(self): + mail_server = MailServer( + auth, + hostname=hostname, + pop3_port=pop3_port, + ) + with self.assertRaises(Exception): + with mail_server.connect_to_pop3() as pop3connection: + pop3connection.list() + + pop3conn.assert_called_with( + hostname, + pop3_port, + timeout=60, + ) + auth.authenticate.assert_not_called() + mock_connection.quit.assert_not_called() + mock_connection.list.assert_not_called() + + @patch("mail.servers.poplib") + def test_mail_server_connection_context_exception(self, mock_poplib): hostname = "host" pop3_port = 1 auth = Mock(spec=Authenticator) pop3conn = MagicMock(spec=POP3_SSL) + mock_connection = pop3conn() + mock_poplib.POP3_SSL = pop3conn + + mail_server = MailServer( + auth, + hostname=hostname, + pop3_port=pop3_port, + ) + with self.assertRaises(Exception): + with mail_server.connect_to_pop3() as pop3connection: + raise Exception() - with patch("mail.servers.poplib") as mock_poplib: - mock_poplib.POP3_SSL = pop3conn + pop3conn.assert_called_with( + hostname, + pop3_port, + timeout=60, + ) + self.assertEqual(pop3connection, mock_connection) + auth.authenticate.assert_called_with(pop3connection) + pop3connection.quit.assert_called_once() - mail_server = MailServer( - auth, - hostname=hostname, - pop3_port=pop3_port, - ) - mail_server.connect_to_pop3() - mail_server.quit_pop3_connection() + @patch("mail.servers.poplib") + def test_mail_server_connection_authentication_exception(self, mock_poplib): + hostname = "host" + pop3_port = 1 + auth = Mock(spec=Authenticator) + auth.authenticate.side_effect = Exception() + pop3conn = MagicMock(spec=POP3_SSL) mock_connection = pop3conn() + mock_poplib.POP3_SSL = pop3conn + + mail_server = MailServer( + auth, + hostname=hostname, + pop3_port=pop3_port, + ) + with self.assertRaises(Exception): + with mail_server.connect_to_pop3() as pop3connection: + pop3connection.list() + + pop3conn.assert_called_with( + hostname, + pop3_port, + timeout=60, + ) + auth.authenticate.assert_called_with(mock_connection) mock_connection.quit.assert_called_once() + mock_connection.list.assert_not_called() def test_mail_server_user(self): auth = Mock(spec=Authenticator) diff --git a/mail/utils/pop3.py b/mail/utils/pop3.py index 4aa47114..0a727e3f 100644 --- a/mail/utils/pop3.py +++ b/mail/utils/pop3.py @@ -1,22 +1,7 @@ -import contextlib import email import poplib from typing import List -from mail.auth import Authenticator -from mail.servers import MailServer - - -@contextlib.contextmanager -def get_connection(auth: Authenticator, hostname: str, port: int) -> poplib.POP3: - ms = MailServer(auth, hostname=hostname, pop3_port=port) - - try: - yield ms.connect_to_pop3() - - finally: - ms.quit_pop3_connection() - def list_messages_ids(con: poplib.POP3) -> List[str]: resp, messages, octets = con.list() diff --git a/mock_hmrc/handler.py b/mock_hmrc/handler.py index 0aaca919..46aa424b 100644 --- a/mock_hmrc/handler.py +++ b/mock_hmrc/handler.py @@ -10,26 +10,24 @@ def get_hmrc_email_message_dto(server): - conn = server.connect_to_pop3() - _, mails, _ = conn.list() - message_ids = [get_message_id(line.decode(settings.DEFAULT_ENCODING)) for line in mails] - - if models.RetrievedMail.objects.count(): - recent_mail = models.RetrievedMail.objects.all().order_by("message_id").last() - target_id = None - for msg_id in message_ids: - if msg_id > recent_mail.message_id: - target_id = msg_id - break - else: - target_id = message_ids[0] - - dto = None - if target_id: - email = conn.retr(target_id) - dto = to_hmrc_mail_message_dto(target_id, email) - - server.quit_pop3_connection() + with server.connect_to_pop3() as conn: + _, mails, _ = conn.list() + message_ids = [get_message_id(line.decode(settings.DEFAULT_ENCODING)) for line in mails] + + if models.RetrievedMail.objects.count(): + recent_mail = models.RetrievedMail.objects.all().order_by("message_id").last() + target_id = None + for msg_id in message_ids: + if msg_id > recent_mail.message_id: + target_id = msg_id + break + else: + target_id = message_ids[0] + + dto = None + if target_id: + email = conn.retr(target_id) + dto = to_hmrc_mail_message_dto(target_id, email) return dto From 5b35c4cd31d5b5e2f49039608ade2b6e59d9eb01 Mon Sep 17 00:00:00 2001 From: Kevin Carrogan Date: Sat, 21 Dec 2024 16:15:28 +0000 Subject: [PATCH 04/38] Remove default settings for mail server initialisation --- mail/celery_tasks.py | 2 +- mail/libraries/routing_controller.py | 4 +- .../management/commands/mark_mails_as_read.py | 2 +- .../libraries/test_routing_controller.py | 2 +- mail/tests/test_celery_tasks.py | 50 +++--- mail/tests/test_dtos.py | 2 +- mail/tests/test_end_to_end.py | 4 +- mail_servers/__init__.py | 0 {mail => mail_servers}/auth.py | 4 +- {mail => mail_servers}/servers.py | 6 +- mail_servers/tests/__init__.py | 0 {mail => mail_servers}/tests/test_auth.py | 154 +++++++++--------- {mail => mail_servers}/tests/test_servers.py | 14 +- 13 files changed, 123 insertions(+), 121 deletions(-) create mode 100644 mail_servers/__init__.py rename {mail => mail_servers}/auth.py (95%) rename {mail => mail_servers}/servers.py (92%) create mode 100644 mail_servers/tests/__init__.py rename {mail => mail_servers}/tests/test_auth.py (54%) rename {mail => mail_servers}/tests/test_servers.py (95%) diff --git a/mail/celery_tasks.py b/mail/celery_tasks.py index 31d69cb6..bc654ecf 100644 --- a/mail/celery_tasks.py +++ b/mail/celery_tasks.py @@ -19,7 +19,7 @@ from mail.libraries.routing_controller import check_and_route_emails, update_mail from mail.libraries.usage_data_decomposition import build_json_payload_from_data_blocks, split_edi_data_by_id from mail.models import LicenceIdMapping, LicencePayload, Mail, UsageData -from mail.servers import smtp_send +from mail_servers.servers import smtp_send logger = get_task_logger(__name__) diff --git a/mail/libraries/routing_controller.py b/mail/libraries/routing_controller.py index 3991344f..75e7a89b 100644 --- a/mail/libraries/routing_controller.py +++ b/mail/libraries/routing_controller.py @@ -6,7 +6,6 @@ from django.utils import timezone from rest_framework.exceptions import ValidationError -from mail.auth import BasicAuthentication, ModernAuthentication from mail.enums import ExtractTypeEnum, MailReadStatuses, ReceptionStatusEnum, ReplyStatusEnum, SourceEnum from mail.libraries.builders import build_email_message from mail.libraries.data_processors import ( @@ -23,7 +22,8 @@ ) from mail.libraries.mailbox_service import get_message_iterator from mail.models import Mail -from mail.servers import MailServer, smtp_send +from mail_servers.auth import BasicAuthentication, ModernAuthentication +from mail_servers.servers import MailServer, smtp_send logger = logging.getLogger(__name__) diff --git a/mail/management/commands/mark_mails_as_read.py b/mail/management/commands/mark_mails_as_read.py index 35962682..82dd1ad5 100644 --- a/mail/management/commands/mark_mails_as_read.py +++ b/mail/management/commands/mark_mails_as_read.py @@ -3,7 +3,7 @@ from mail import enums, models from mail.libraries.mailbox_service import get_message_id -from mail.servers import MailServer +from mail_servers.servers import MailServer class Command(BaseCommand): diff --git a/mail/tests/libraries/test_routing_controller.py b/mail/tests/libraries/test_routing_controller.py index fee776e6..8bd3563c 100644 --- a/mail/tests/libraries/test_routing_controller.py +++ b/mail/tests/libraries/test_routing_controller.py @@ -3,12 +3,12 @@ from django.test import override_settings -from mail.auth import BasicAuthentication, ModernAuthentication from mail.libraries.routing_controller import ( get_hmrc_to_dit_mailserver, get_mock_hmrc_mailserver, get_spire_to_dit_mailserver, ) +from mail_servers.auth import BasicAuthentication, ModernAuthentication class RoutingControllerTest(unittest.TestCase): diff --git a/mail/tests/test_celery_tasks.py b/mail/tests/test_celery_tasks.py index 0a8bc8cf..1bfce43b 100644 --- a/mail/tests/test_celery_tasks.py +++ b/mail/tests/test_celery_tasks.py @@ -133,13 +133,13 @@ def test_sending_of_new_message_from_spire_success( True, [ { - "sender": "lite-hmrc@gov.uk", - "recipients": "spire@example.com", + "sender": "lite-hmrc@gov.uk", # /PS-IGNORE + "recipients": "spire@example.com", # /PS-IGNORE "subject": "ILBDOTI_live_CHIEF_licenceReply_78120_202403060300", }, { - "sender": "lite-hmrc@gov.uk", - "recipients": "ecju@gov.uk", + "sender": "lite-hmrc@gov.uk", # /PS-IGNORE + "recipients": "ecju@gov.uk", # /PS-IGNORE "subject": "Licence rejected by HMRC", }, ], @@ -148,8 +148,8 @@ def test_sending_of_new_message_from_spire_success( False, [ { - "sender": "lite-hmrc@gov.uk", - "recipients": "spire@example.com", + "sender": "lite-hmrc@gov.uk", # /PS-IGNORE + "recipients": "spire@example.com", # /PS-IGNORE "subject": "ILBDOTI_live_CHIEF_licenceReply_78120_202403060300", } ], @@ -157,9 +157,9 @@ def test_sending_of_new_message_from_spire_success( ] ) @override_settings( - EMAIL_USER="lite-hmrc@gov.uk", - NOTIFY_USERS=["ecju@gov.uk"], - SPIRE_ADDRESS="spire@example.com", + EMAIL_USER="lite-hmrc@gov.uk", # /PS-IGNORE + NOTIFY_USERS=["ecju@gov.uk"], # /PS-IGNORE + SPIRE_ADDRESS="spire@example.com", # /PS-IGNORE ) @mock.patch("mail.libraries.routing_controller.get_spire_to_dit_mailserver") @mock.patch("mail.libraries.routing_controller.get_hmrc_to_dit_mailserver") @@ -206,8 +206,8 @@ def test_processing_of_licence_reply_with_rejected_licences( email_message_dto = EmailMessageDto( run_number=f"{run_number}", - sender="test@example.com", - receiver="receiver@example.com", + sender="test@example.com", # /PS-IGNORE + receiver="receiver@example.com", # /PS-IGNORE date="Mon, 17 May 2021 14:20:18 +0100", body="licence rejected", subject=licence_reply_filename, @@ -240,12 +240,12 @@ def inject_fixtures(self, caplog): self.caplog = caplog @mock.patch("mail.celery_tasks.cache") - @mock.patch("mail.servers.get_smtp_connection") + @mock.patch("mail_servers.servers.get_smtp_connection") def test_sends_email(self, mock_get_smtp_connection, mock_cache): mock_conn = mock_get_smtp_connection().__enter__() message = { - "From": "from@example.com", - "To": "to@example.com", + "From": "from@example.com", # /PS-IGNORE + "To": "to@example.com", # /PS-IGNORE } send_email_task.apply(args=[message]) mock_conn.send_message.assert_called_with(message) @@ -258,12 +258,12 @@ def test_sends_email(self, mock_get_smtp_connection, mock_cache): ] ) @mock.patch("mail.celery_tasks.cache") - @mock.patch("mail.servers.get_smtp_connection") + @mock.patch("mail_servers.servers.get_smtp_connection") def test_sends_email_failed_then_succeeds(self, exception_class, mock_get_smtp_connection, mock_cache): mock_conn = mock_get_smtp_connection().__enter__() message = { - "From": "from@example.com", - "To": "to@example.com", + "From": "from@example.com", # /PS-IGNORE + "To": "to@example.com", # /PS-IGNORE } mock_conn.send_message.side_effect = [exception_class(), None] send_email_task.apply(args=[message]) @@ -278,12 +278,12 @@ def test_sends_email_failed_then_succeeds(self, exception_class, mock_get_smtp_c ] ) @mock.patch("mail.celery_tasks.cache") - @mock.patch("mail.servers.get_smtp_connection") + @mock.patch("mail_servers.servers.get_smtp_connection") def test_sends_email_max_retry_failures(self, exception_class, mock_get_smtp_connection, mock_cache): mock_conn = mock_get_smtp_connection().__enter__() message = { - "From": "from@example.com", - "To": "to@example.com", + "From": "from@example.com", # /PS-IGNORE + "To": "to@example.com", # /PS-IGNORE } mock_conn.send_message.side_effect = exception_class() send_email_task.apply(args=[message]) @@ -294,7 +294,7 @@ def test_sends_email_max_retry_failures(self, exception_class, mock_get_smtp_con ) mock_cache.lock.assert_called_with("global_send_email_lock", timeout=600) - @mock.patch("mail.servers.get_smtp_connection") + @mock.patch("mail_servers.servers.get_smtp_connection") def test_locking(self, mock_get_smtp_connection): results = [] @@ -318,14 +318,14 @@ def _sleepy(message): with concurrent.futures.ThreadPoolExecutor() as executor: message_1 = { - "From": "from1@example.com", - "To": "to1@example.com", + "From": "from1@example.com", # /PS-IGNORE + "To": "to1@example.com", # /PS-IGNORE } future_1 = executor.submit(send_email_task.apply, args=[message_1]) message_2 = { - "From": "from2@example.com", - "To": "to2@example.com", + "From": "from2@example.com", # /PS-IGNORE + "To": "to2@example.com", # /PS-IGNORE } future_2 = executor.submit(send_email_task.apply, args=[message_2]) diff --git a/mail/tests/test_dtos.py b/mail/tests/test_dtos.py index 66f778f1..4f9bc3bb 100644 --- a/mail/tests/test_dtos.py +++ b/mail/tests/test_dtos.py @@ -8,8 +8,8 @@ from mail.libraries.email_message_dto import * from mail.libraries.helpers import read_file, sort_dtos_by_date, to_mail_message_dto from mail.libraries.routing_controller import get_email_message_dtos -from mail.servers import MailServer from mail.tests.libraries.client import LiteHMRCTestClient +from mail_servers.servers import MailServer class TestDtos(LiteHMRCTestClient): diff --git a/mail/tests/test_end_to_end.py b/mail/tests/test_end_to_end.py index 108ec98f..f73ec05b 100644 --- a/mail/tests/test_end_to_end.py +++ b/mail/tests/test_end_to_end.py @@ -10,12 +10,12 @@ from pytest_django.asserts import assertQuerySetEqual from rest_framework import status -from mail.auth import BasicAuthentication from mail.celery_tasks import manage_inbox, send_licence_details_to_hmrc from mail.enums import ExtractTypeEnum, ReceptionStatusEnum, SourceEnum from mail.libraries.helpers import read_file from mail.models import LicenceData, LicencePayload, Mail, UsageData -from mail.servers import MailServer +from mail_servers.auth import BasicAuthentication +from mail_servers.servers import MailServer pytestmark = pytest.mark.django_db diff --git a/mail_servers/__init__.py b/mail_servers/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/mail/auth.py b/mail_servers/auth.py similarity index 95% rename from mail/auth.py rename to mail_servers/auth.py index 89d6ffbb..d2563c8e 100644 --- a/mail/auth.py +++ b/mail_servers/auth.py @@ -77,7 +77,9 @@ def _get_access_token(self): return result["access_token"] def _encode_access_string(self, username, access_token): - return base64.b64encode(f"user={username}\x01auth=Bearer {access_token}\x01\x01".encode()).decode() + return base64.b64encode( # /PS-IGNORE + f"user={username}\x01auth=Bearer {access_token}\x01\x01".encode() # /PS-IGNORE + ).decode() def authenticate(self, connection: poplib.POP3_SSL): logger.info("Authenticating using OAuth authentication: %s", self.user) diff --git a/mail/servers.py b/mail_servers/servers.py similarity index 92% rename from mail/servers.py rename to mail_servers/servers.py index 59378d77..eef04833 100644 --- a/mail/servers.py +++ b/mail_servers/servers.py @@ -5,7 +5,7 @@ from django.conf import settings -from mail.auth import Authenticator +from mail_servers.auth import Authenticator logger = logging.getLogger(__name__) @@ -14,8 +14,8 @@ class MailServer: def __init__( self, auth: Authenticator, - hostname: str = settings.EMAIL_HOSTNAME, - pop3_port: int = settings.EMAIL_POP3_PORT, + hostname: str, + pop3_port: int, ): self.auth = auth self.pop3_port = pop3_port diff --git a/mail_servers/tests/__init__.py b/mail_servers/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/mail/tests/test_auth.py b/mail_servers/tests/test_auth.py similarity index 54% rename from mail/tests/test_auth.py rename to mail_servers/tests/test_auth.py index 77bd6c2e..e66f78aa 100644 --- a/mail/tests/test_auth.py +++ b/mail_servers/tests/test_auth.py @@ -5,7 +5,7 @@ from django.test import SimpleTestCase -from mail.auth import AuthenticationError, BasicAuthentication, ModernAuthentication +from mail_servers.auth import AuthenticationError, BasicAuthentication, ModernAuthentication class BasicAuthenticationTests(SimpleTestCase): @@ -31,8 +31,9 @@ def test_not_equal(self): self.assertNotEqual(auth, equal_auth) +@patch("mail_servers.auth.msal") class ModernAuthenticationTests(SimpleTestCase): - def test_authenticates_connection_with_silent_acquisition(self): + def test_authenticates_connection_with_silent_acquisition(self, mock_msal): pop3conn = MagicMock(spec=POP3_SSL) mock_conn = pop3conn() @@ -45,18 +46,17 @@ def test_authenticates_connection_with_silent_acquisition(self): "access_token": "access_token", } - with patch("mail.auth.msal") as mock_msal: - mock_ConfidentialClientApplication = mock_msal.ConfidentialClientApplication() - mock_acquire_token_silent = mock_ConfidentialClientApplication.acquire_token_silent - mock_acquire_token_silent.return_value = mock_access_token - - auth = ModernAuthentication( - username, - client_id, - client_secret, - tenant_id, - ) - auth.authenticate(mock_conn) + mock_ConfidentialClientApplication = mock_msal.ConfidentialClientApplication() + mock_acquire_token_silent = mock_ConfidentialClientApplication.acquire_token_silent + mock_acquire_token_silent.return_value = mock_access_token + + auth = ModernAuthentication( + username, + client_id, + client_secret, + tenant_id, + ) + auth.authenticate(mock_conn) mock_msal.ConfidentialClientApplication.assert_called_with( client_id, @@ -69,7 +69,9 @@ def test_authenticates_connection_with_silent_acquisition(self): account=None, ) - access_string = base64.b64encode("user=username\x01auth=Bearer access_token\x01\x01".encode()).decode() + access_string = base64.b64encode( # /PS-IGNORE + "user=username\x01auth=Bearer access_token\x01\x01".encode() # /PS-IGNORE + ).decode() mock_conn._shortcmd.assert_has_calls( [ call("AUTH XOAUTH2"), @@ -77,7 +79,7 @@ def test_authenticates_connection_with_silent_acquisition(self): ] ) - def test_authenticates_connection_without_silent_acquisition(self): + def test_authenticates_connection_without_silent_acquisition(self, mock_msal): pop3conn = MagicMock(spec=POP3_SSL) mock_conn = pop3conn() @@ -90,22 +92,21 @@ def test_authenticates_connection_without_silent_acquisition(self): "access_token": "access_token", } - with patch("mail.auth.msal") as mock_msal: - mock_ConfidentialClientApplication = mock_msal.ConfidentialClientApplication() + mock_ConfidentialClientApplication = mock_msal.ConfidentialClientApplication() - mock_acquire_token_silent = mock_ConfidentialClientApplication.acquire_token_silent - mock_acquire_token_silent.return_value = None + mock_acquire_token_silent = mock_ConfidentialClientApplication.acquire_token_silent + mock_acquire_token_silent.return_value = None - mock_acquire_token_for_client = mock_ConfidentialClientApplication.acquire_token_for_client - mock_acquire_token_for_client.return_value = mock_access_token + mock_acquire_token_for_client = mock_ConfidentialClientApplication.acquire_token_for_client + mock_acquire_token_for_client.return_value = mock_access_token - auth = ModernAuthentication( - username, - client_id, - client_secret, - tenant_id, - ) - auth.authenticate(mock_conn) + auth = ModernAuthentication( + username, + client_id, + client_secret, + tenant_id, + ) + auth.authenticate(mock_conn) mock_msal.ConfidentialClientApplication.assert_called_with( client_id, @@ -121,7 +122,9 @@ def test_authenticates_connection_without_silent_acquisition(self): scopes=["https://outlook.office.com/.default"], ) - access_string = base64.b64encode("user=username\x01auth=Bearer access_token\x01\x01".encode()).decode() + access_string = base64.b64encode( # /PS-IGNORE + "user=username\x01auth=Bearer access_token\x01\x01".encode() # /PS-IGNORE + ).decode() mock_conn._shortcmd.assert_has_calls( [ call("AUTH XOAUTH2"), @@ -129,7 +132,7 @@ def test_authenticates_connection_without_silent_acquisition(self): ] ) - def test_error_acquiring_access_token(self): + def test_error_acquiring_access_token(self, mock_msal): pop3conn = MagicMock(spec=POP3_SSL) mock_conn = pop3conn() @@ -148,60 +151,57 @@ def test_error_acquiring_access_token(self): "error_uri": "https://login.microsoftonline.com/error?code=1234567", } - with patch("mail.auth.msal") as mock_msal: - mock_ConfidentialClientApplication = mock_msal.ConfidentialClientApplication() - mock_acquire_token_silent = mock_ConfidentialClientApplication.acquire_token_silent - mock_acquire_token_silent.return_value = mock_failed_result - - auth = ModernAuthentication( - username, - client_id, - client_secret, - tenant_id, - ) - with self.assertRaises(AuthenticationError), self.assertLogs(logger="mail.auth", level="INFO") as cm: - auth.authenticate(mock_conn) - - self.assertIn( - f"INFO:mail.auth:{mock_failed_result}", - cm.output, - ) + mock_ConfidentialClientApplication = mock_msal.ConfidentialClientApplication() + mock_acquire_token_silent = mock_ConfidentialClientApplication.acquire_token_silent + mock_acquire_token_silent.return_value = mock_failed_result - def test_equal(self): + auth = ModernAuthentication( + username, + client_id, + client_secret, + tenant_id, + ) + with self.assertRaises(AuthenticationError), self.assertLogs(logger="mail_servers.auth", level="INFO") as cm: + auth.authenticate(mock_conn) + + self.assertIn( + f"INFO:mail_servers.auth:{mock_failed_result}", + cm.output, + ) + + def test_equal(self, mock_msal): username = "username" client_id = "client_id" client_secret = "client_secret" # nosec tenant_id = "tenant_id" - with patch("mail.auth.msal"): - auth = ModernAuthentication( - username, - client_id, - client_secret, - tenant_id, - ) - equal_auth = ModernAuthentication( - username, - client_id, - client_secret, - tenant_id, - ) + auth = ModernAuthentication( + username, + client_id, + client_secret, + tenant_id, + ) + equal_auth = ModernAuthentication( + username, + client_id, + client_secret, + tenant_id, + ) self.assertEqual(auth, equal_auth) - def test_not_equal(self): - with patch("mail.auth.msal"): - auth = ModernAuthentication( - "username", - "client_id", - "client_secret", - "tenant_id", - ) - equal_auth = ModernAuthentication( - "other_username", - "other_client_id", - "other_client_secret", - "other_tenant_id", - ) + def test_not_equal(self, mock_msal): + auth = ModernAuthentication( + "username", + "client_id", + "client_secret", + "tenant_id", + ) + equal_auth = ModernAuthentication( + "other_username", + "other_client_id", + "other_client_secret", + "other_tenant_id", + ) self.assertNotEqual(auth, equal_auth) diff --git a/mail/tests/test_servers.py b/mail_servers/tests/test_servers.py similarity index 95% rename from mail/tests/test_servers.py rename to mail_servers/tests/test_servers.py index f17ff02d..9a9478b1 100644 --- a/mail/tests/test_servers.py +++ b/mail_servers/tests/test_servers.py @@ -3,8 +3,8 @@ from django.test import SimpleTestCase, override_settings -from mail.auth import Authenticator -from mail.servers import MailServer, smtp_send +from mail_servers.auth import Authenticator +from mail_servers.servers import MailServer, smtp_send class MailServerTests(SimpleTestCase): @@ -31,7 +31,7 @@ def test_mail_server_not_equal(self): self.assertNotEqual(m1, m2) - @patch("mail.servers.poplib") + @patch("mail_servers.servers.poplib") def test_mail_server_connect_to_pop3(self, mock_poplib): hostname = "host" pop3_port = 1 @@ -59,7 +59,7 @@ def test_mail_server_connect_to_pop3(self, mock_poplib): pop3connection.quit.assert_called_once() pop3connection.list.assert_called_once() - @patch("mail.servers.poplib") + @patch("mail_servers.servers.poplib") def test_mail_server_connection_exception(self, mock_poplib): hostname = "host" pop3_port = 1 @@ -88,7 +88,7 @@ def test_mail_server_connection_exception(self, mock_poplib): mock_connection.quit.assert_not_called() mock_connection.list.assert_not_called() - @patch("mail.servers.poplib") + @patch("mail_servers.servers.poplib") def test_mail_server_connection_context_exception(self, mock_poplib): hostname = "host" pop3_port = 1 @@ -116,7 +116,7 @@ def test_mail_server_connection_context_exception(self, mock_poplib): auth.authenticate.assert_called_with(pop3connection) pop3connection.quit.assert_called_once() - @patch("mail.servers.poplib") + @patch("mail_servers.servers.poplib") def test_mail_server_connection_authentication_exception(self, mock_poplib): hostname = "host" pop3_port = 1 @@ -162,7 +162,7 @@ def test_mail_server_user(self): EMAIL_USER="test.user", EMAIL_PASSWORD="test_password", ) -@patch("mail.servers.smtplib.SMTP", autospec=True) +@patch("mail_servers.servers.smtplib.SMTP", autospec=True) class SmtpSendTests(SimpleTestCase): def test_smtp_send(self, mock_SMTP): mock_result = MagicMock() From 5431d0a56a1f18fb1629a278ffe03a24a5c4321f Mon Sep 17 00:00:00 2001 From: Kevin Carrogan Date: Sat, 21 Dec 2024 20:40:28 +0000 Subject: [PATCH 05/38] Allow configuration of mailservers to be defined in config --- conf/settings.py | 42 +++++++++- mail/libraries/routing_controller.py | 41 ++-------- .../libraries/test_routing_controller.py | 79 ++++++++++++------- mail_servers/tests/test_utils.py | 30 +++++++ mail_servers/utils.py | 18 +++++ 5 files changed, 144 insertions(+), 66 deletions(-) create mode 100644 mail_servers/tests/test_utils.py create mode 100644 mail_servers/utils.py diff --git a/conf/settings.py b/conf/settings.py index 85fa1ca0..dda0d83d 100644 --- a/conf/settings.py +++ b/conf/settings.py @@ -138,6 +138,44 @@ SPIRE_ADDRESS = env("SPIRE_ADDRESS", default="test-spire-address@example.com") # /PS-IGNORE HMRC_ADDRESS = env("HMRC_ADDRESS", default="test-hmrc-address@example.com") # /PS-IGNORE +AZURE_AUTH_CLIENT_ID = env.str("AZURE_AUTH_CLIENT_ID") +AZURE_AUTH_CLIENT_SECRET = env.str("AZURE_AUTH_CLIENT_SECRET") +AZURE_AUTH_TENANT_ID = env.str("AZURE_AUTH_TENANT_ID") + +MAIL_SERVERS = { + "spire_to_dit": { + "HOSTNAME": INCOMING_EMAIL_HOSTNAME, + "POP3_PORT": INCOMING_EMAIL_POP3_PORT, + "AUTHENTICATION_CLASS": "mail_servers.auth.ModernAuthentication", + "AUTHENTICATION_OPTIONS": { + "user": INCOMING_EMAIL_USER, + "client_id": AZURE_AUTH_CLIENT_ID, + "client_secret": AZURE_AUTH_CLIENT_SECRET, + "tenant_id": AZURE_AUTH_TENANT_ID, + }, + }, + "hmrc_to_dit": { + "HOSTNAME": HMRC_TO_DIT_EMAIL_HOSTNAME, + "POP3_PORT": HMRC_TO_DIT_EMAIL_POP3_PORT, + "AUTHENTICATION_CLASS": "mail_servers.auth.ModernAuthentication", + "AUTHENTICATION_OPTIONS": { + "user": HMRC_TO_DIT_EMAIL_USER, + "client_id": AZURE_AUTH_CLIENT_ID, + "client_secret": AZURE_AUTH_CLIENT_SECRET, + "tenant_id": AZURE_AUTH_TENANT_ID, + }, + }, + "mock_hmrc": { + "HOSTNAME": MOCK_HMRC_EMAIL_HOSTNAME, + "POP3_PORT": MOCK_HMRC_EMAIL_POP3_PORT, + "AUTHENTICATION_CLASS": "mail_servers.auth.BasicAuthentication", + "AUTHENTICATION_OPTIONS": { + "user": MOCK_HMRC_EMAIL_USER, + "password": MOCK_HMRC_EMAIL_PASSWORD, + }, + }, +} + TIME_TESTS = env.bool("TIME_TESTS", default=False) LOCK_INTERVAL = env.float("LOCK_INTERVAL", default=120.0) @@ -238,10 +276,6 @@ DEFAULT_ENCODING = "iso-8859-1" -AZURE_AUTH_CLIENT_ID = env.str("AZURE_AUTH_CLIENT_ID") -AZURE_AUTH_CLIENT_SECRET = env.str("AZURE_AUTH_CLIENT_SECRET") -AZURE_AUTH_TENANT_ID = env.str("AZURE_AUTH_TENANT_ID") - SEND_REJECTED_EMAIL = env.bool("SEND_REJECTED_EMAIL", default=True) DEFAULT_AUTO_FIELD = "django.db.models.AutoField" diff --git a/mail/libraries/routing_controller.py b/mail/libraries/routing_controller.py index 75e7a89b..aa6a6d0f 100644 --- a/mail/libraries/routing_controller.py +++ b/mail/libraries/routing_controller.py @@ -22,8 +22,8 @@ ) from mail.libraries.mailbox_service import get_message_iterator from mail.models import Mail -from mail_servers.auth import BasicAuthentication, ModernAuthentication from mail_servers.servers import MailServer, smtp_send +from mail_servers.utils import get_mail_server logger = logging.getLogger(__name__) @@ -34,18 +34,9 @@ def get_spire_to_dit_mailserver() -> MailServer: These are licenceData and usageReply emails. They are processed by the service and sent to HMRC. """ - auth = ModernAuthentication( - user=settings.INCOMING_EMAIL_USER, - client_id=settings.AZURE_AUTH_CLIENT_ID, - client_secret=settings.AZURE_AUTH_CLIENT_SECRET, - tenant_id=settings.AZURE_AUTH_TENANT_ID, - ) + mail_server = get_mail_server("spire_to_dit") - return MailServer( - auth, - hostname=settings.INCOMING_EMAIL_HOSTNAME, - pop3_port=settings.INCOMING_EMAIL_POP3_PORT, - ) + return mail_server def get_hmrc_to_dit_mailserver() -> MailServer: @@ -54,31 +45,15 @@ def get_hmrc_to_dit_mailserver() -> MailServer: These are licenceReply and usageData emails """ - auth = ModernAuthentication( - user=settings.HMRC_TO_DIT_EMAIL_USER, - client_id=settings.AZURE_AUTH_CLIENT_ID, - client_secret=settings.AZURE_AUTH_CLIENT_SECRET, - tenant_id=settings.AZURE_AUTH_TENANT_ID, - ) + mail_server = get_mail_server("hmrc_to_dit") - return MailServer( - auth, - hostname=settings.HMRC_TO_DIT_EMAIL_HOSTNAME, - pop3_port=settings.HMRC_TO_DIT_EMAIL_POP3_PORT, - ) + return mail_server def get_mock_hmrc_mailserver() -> MailServer: - auth = BasicAuthentication( - user=settings.MOCK_HMRC_EMAIL_USER, - password=settings.MOCK_HMRC_EMAIL_PASSWORD, - ) - - return MailServer( - auth, - hostname=settings.MOCK_HMRC_EMAIL_HOSTNAME, - pop3_port=settings.MOCK_HMRC_EMAIL_POP3_PORT, - ) + mail_server = get_mail_server("mock") + + return mail_server def check_and_route_emails(): diff --git a/mail/tests/libraries/test_routing_controller.py b/mail/tests/libraries/test_routing_controller.py index 8bd3563c..f8124b73 100644 --- a/mail/tests/libraries/test_routing_controller.py +++ b/mail/tests/libraries/test_routing_controller.py @@ -13,17 +13,24 @@ class RoutingControllerTest(unittest.TestCase): @patch( - "mail.libraries.routing_controller.ModernAuthentication", + "mail_servers.auth.ModernAuthentication", spec=ModernAuthentication, ) - @patch("mail.libraries.routing_controller.MailServer") - @override_settings( # nosec - INCOMING_EMAIL_USER="incoming.email.user@example.com", - INCOMING_EMAIL_HOSTNAME="host.example.com", - INCOMING_EMAIL_POP3_PORT="123", - AZURE_AUTH_CLIENT_ID="azure-auth-client-id", - AZURE_AUTH_CLIENT_SECRET="azure-auth-client-secret", - AZURE_AUTH_TENANT_ID="azure-auth-tenant-id", + @patch("mail_servers.utils.MailServer") + @override_settings( + MAIL_SERVERS={ + "spire_to_dit": { + "HOSTNAME": "host.example.com", + "POP3_PORT": 123, + "AUTHENTICATION_CLASS": "mail_servers.auth.ModernAuthentication", + "AUTHENTICATION_OPTIONS": { + "user": "incoming.email.user@example.com", + "client_id": "azure-auth-client-id", + "client_secret": "azure-auth-client-secret", + "tenant_id": "azure-auth-tenant-id", + }, + }, + } ) def test_get_spire_to_dit_mailserver(self, mock_MailServer, mock_ModernAuthentication): spire_to_dit_mailserver = get_spire_to_dit_mailserver() @@ -37,23 +44,30 @@ def test_get_spire_to_dit_mailserver(self, mock_MailServer, mock_ModernAuthentic mock_MailServer.assert_called_with( mock_ModernAuthentication(), hostname="host.example.com", - pop3_port="123", + pop3_port=123, ) self.assertEqual(spire_to_dit_mailserver, mock_MailServer()) @patch( - "mail.libraries.routing_controller.ModernAuthentication", + "mail_servers.auth.ModernAuthentication", spec=ModernAuthentication, ) - @patch("mail.libraries.routing_controller.MailServer") - @override_settings( # nosec - HMRC_TO_DIT_EMAIL_USER="hmrc.to.dit.email.user@example.com", - HMRC_TO_DIT_EMAIL_HOSTNAME="host.example.com", - HMRC_TO_DIT_EMAIL_POP3_PORT="123", - AZURE_AUTH_CLIENT_ID="azure-auth-client-id", - AZURE_AUTH_CLIENT_SECRET="azure-auth-client-secret", - AZURE_AUTH_TENANT_ID="azure-auth-tenant-id", + @patch("mail_servers.utils.MailServer") + @override_settings( + MAIL_SERVERS={ + "hmrc_to_dit": { + "HOSTNAME": "host.example.com", + "POP3_PORT": 123, + "AUTHENTICATION_CLASS": "mail_servers.auth.ModernAuthentication", + "AUTHENTICATION_OPTIONS": { + "user": "hmrc.to.dit.email.user@example.com", + "client_id": "azure-auth-client-id", + "client_secret": "azure-auth-client-secret", + "tenant_id": "azure-auth-tenant-id", + }, + }, + } ) def test_get_hmrc_to_dit_mailserver(self, mock_MailServer, mock_ModernAuthentication): hmrc_to_dit_mailserver = get_hmrc_to_dit_mailserver() @@ -67,33 +81,40 @@ def test_get_hmrc_to_dit_mailserver(self, mock_MailServer, mock_ModernAuthentica mock_MailServer.assert_called_with( mock_ModernAuthentication(), hostname="host.example.com", - pop3_port="123", + pop3_port=123, ) self.assertEqual(hmrc_to_dit_mailserver, mock_MailServer()) @patch( - "mail.libraries.routing_controller.BasicAuthentication", + "mail_servers.auth.BasicAuthentication", spec=BasicAuthentication, ) - @patch("mail.libraries.routing_controller.MailServer") - @override_settings( # nosec - MOCK_HMRC_EMAIL_USER="mock.hmrc.email.user@example.com", - MOCK_HMRC_EMAIL_PASSWORD="shhh", - MOCK_HMRC_EMAIL_HOSTNAME="host.example.com", - MOCK_HMRC_EMAIL_POP3_PORT="123", + @patch("mail_servers.utils.MailServer") + @override_settings( + MAIL_SERVERS={ + "mock": { + "HOSTNAME": "host.example.com", + "POP3_PORT": 123, + "AUTHENTICATION_CLASS": "mail_servers.auth.BasicAuthentication", + "AUTHENTICATION_OPTIONS": { + "user": "mock.hmrc.email.user@example.com", + "password": "shhh", + }, + }, + } ) def test_get_mock_hmrc_mailserver(self, mock_MailServer, mock_BasicAuthentication): mock_hmrc_mailserver = get_mock_hmrc_mailserver() - mock_BasicAuthentication.assert_called_with( # nosec + mock_BasicAuthentication.assert_called_with( user="mock.hmrc.email.user@example.com", password="shhh", ) mock_MailServer.assert_called_with( mock_BasicAuthentication(), hostname="host.example.com", - pop3_port="123", + pop3_port=123, ) self.assertEqual(mock_hmrc_mailserver, mock_MailServer()) diff --git a/mail_servers/tests/test_utils.py b/mail_servers/tests/test_utils.py new file mode 100644 index 00000000..be2ff06b --- /dev/null +++ b/mail_servers/tests/test_utils.py @@ -0,0 +1,30 @@ +from unittest.mock import Mock + +from django.test import SimpleTestCase, override_settings + +from mail_servers.auth import Authenticator +from mail_servers.utils import get_mail_server + +FakeAuth = Mock(spec=Authenticator) + + +class GetMailServerTests(SimpleTestCase): + @override_settings( + MAIL_SERVERS={ + "config": { + "HOSTNAME": "hostname", + "POP3_PORT": 12345, + "AUTHENTICATION_CLASS": "mail_servers.tests.test_utils.FakeAuth", + "AUTHENTICATION_OPTIONS": { + "first_arg": "something", + "second_arg": "something_else", + }, + }, + } + ) + def test_get_mail_server(self): + mail_server = get_mail_server("config") + mail_server.hostname = "hostname" + mail_server.pop3_port = 12345 + FakeAuth.assert_called_with(first_arg="something", second_arg="something_else") + self.assertEqual(mail_server.auth, FakeAuth(first_arg="something", second_arg="something_else")) diff --git a/mail_servers/utils.py b/mail_servers/utils.py new file mode 100644 index 00000000..5903915a --- /dev/null +++ b/mail_servers/utils.py @@ -0,0 +1,18 @@ +from django.conf import settings +from django.utils.module_loading import import_string + +from mail_servers.servers import MailServer + + +def get_mail_server(config_key): + config = settings.MAIL_SERVERS[config_key] + + Auth = import_string(config["AUTHENTICATION_CLASS"]) + auth = Auth(**config["AUTHENTICATION_OPTIONS"]) + mail_server = MailServer( + auth, + hostname=config["HOSTNAME"], + pop3_port=config["POP3_PORT"], + ) + + return mail_server From 2cb3741ce225ea7d311fc51328ff0cc06fda6117 Mon Sep 17 00:00:00 2001 From: Kevin Carrogan Date: Sat, 21 Dec 2024 21:08:57 +0000 Subject: [PATCH 06/38] Fix management command to check mailserver connections --- mail/management/commands/check_mailserver_connections.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/mail/management/commands/check_mailserver_connections.py b/mail/management/commands/check_mailserver_connections.py index 6e1ea83a..0e8787f2 100644 --- a/mail/management/commands/check_mailserver_connections.py +++ b/mail/management/commands/check_mailserver_connections.py @@ -17,11 +17,8 @@ def handle(self, *args, **options): self.stdout.write(f"Checking {mailserver_to_check_factory.__name__}") mailserver = mailserver_to_check_factory() try: - pop3_connection = mailserver.connect_to_pop3() + with mailserver.connect_to_pop3() as pop3_connection: + self.stdout.write(pop3_connection.welcome.decode("ascii")) except poplib.error_proto as e: response, *_ = e.args self.stdout.write(self.style.ERROR(response)) - else: - self.stdout.write(pop3_connection.welcome.decode("ascii")) - finally: - mailserver.quit_pop3_connection() From 3ad7b34e28b368273e12bb0ba374952b58438df7 Mon Sep 17 00:00:00 2001 From: Kevin Carrogan Date: Sun, 22 Dec 2024 19:50:20 +0000 Subject: [PATCH 07/38] Move mailboxes code to separate app --- conf/settings.py | 1 + mail/admin.py | 4 +- mail/enums.py | 94 +----------------- mail/libraries/mailbox_service.py | 5 +- mail/libraries/routing_controller.py | 3 +- ...s_mailbox_delete_mailboxconfig_and_more.py | 27 +++++ mail/models.py | 32 +----- mailboxes/__init__.py | 0 mailboxes/apps.py | 6 ++ mailboxes/enums.py | 7 ++ mailboxes/migrations/0001_initial.py | 98 +++++++++++++++++++ mailboxes/migrations/__init__.py | 0 mailboxes/models.py | 38 +++++++ 13 files changed, 186 insertions(+), 129 deletions(-) create mode 100644 mail/migrations/0023_remove_mailreadstatus_mailbox_delete_mailboxconfig_and_more.py create mode 100644 mailboxes/__init__.py create mode 100644 mailboxes/apps.py create mode 100644 mailboxes/enums.py create mode 100644 mailboxes/migrations/0001_initial.py create mode 100644 mailboxes/migrations/__init__.py create mode 100644 mailboxes/models.py diff --git a/conf/settings.py b/conf/settings.py index dda0d83d..6d77e8d0 100644 --- a/conf/settings.py +++ b/conf/settings.py @@ -47,6 +47,7 @@ "django.contrib.messages", "django.contrib.staticfiles", "mail", + "mailboxes", "health_check", "health_check.db", "health_check.cache", diff --git a/mail/admin.py b/mail/admin.py index f8ce9ad6..aa435947 100644 --- a/mail/admin.py +++ b/mail/admin.py @@ -1,6 +1,6 @@ from django.contrib import admin -from mail.models import LicenceData, LicenceIdMapping, LicencePayload, Mail, MailboxConfig, MailReadStatus, UsageData +from mail.models import LicenceData, LicenceIdMapping, LicencePayload, Mail, UsageData class LicencePayloadInline(admin.TabularInline): @@ -30,8 +30,6 @@ class MailAdmin(admin.ModelAdmin): list_display = ["pk", "edi_filename", "status", "extract_type", "sent_at", "response_date"] -admin.site.register(MailboxConfig) -admin.site.register(MailReadStatus) admin.site.register(UsageData) admin.site.register(LicenceData, LicenceDataAdmin) admin.site.register(Mail, MailAdmin) diff --git a/mail/enums.py b/mail/enums.py index 4fe75d11..d8b49c35 100644 --- a/mail/enums.py +++ b/mail/enums.py @@ -1,91 +1,7 @@ import enum -from types import DynamicClassAttribute -from django.utils.functional import Promise, classproperty - - -# Backported from Django 4.x -# https://github.com/django/django/blob/9c19aff7c/django/db/models/enums.py /PS-IGNORE -class ChoicesMeta(enum.EnumMeta): - """A metaclass for creating a enum choices.""" - - def __new__(metacls, classname, bases, classdict, **kwds): - labels = [] - for key in classdict._member_names: - value = classdict[key] - if isinstance(value, (list, tuple)) and len(value) > 1 and isinstance(value[-1], (Promise, str)): - *value, label = value - value = tuple(value) - else: - label = key.replace("_", " ").title() - labels.append(label) - # Use dict.__setitem__() to suppress defenses against double - # assignment in enum's classdict. - dict.__setitem__(classdict, key, value) - cls = super().__new__(metacls, classname, bases, classdict, **kwds) - for member, label in zip(cls.__members__.values(), labels): - member._label_ = label - return enum.unique(cls) - - def __contains__(cls, member): - if not isinstance(member, enum.Enum): - # Allow non-enums to match against member values. - return any(x.value == member for x in cls) - return super().__contains__(member) - - @property - def names(cls): - empty = ["__empty__"] if hasattr(cls, "__empty__") else [] - return empty + [member.name for member in cls] - - @property - def choices(cls): - empty = [(None, cls.__empty__)] if hasattr(cls, "__empty__") else [] - return empty + [(member.value, member.label) for member in cls] - - @property - def labels(cls): - return [label for _, label in cls.choices] - - @property - def values(cls): - return [value for value, _ in cls.choices] - - -class Choices(enum.Enum, metaclass=ChoicesMeta): - """Class for creating enumerated choices.""" - - @DynamicClassAttribute - def label(self): - return self._label_ - - @property - def do_not_call_in_templates(self): - return True - - def __str__(self): - """ - Use value when cast to str, so that Choices set as model instance - attributes are rendered as expected in templates and similar contexts. - """ - return str(self.value) - - # A similar format was proposed for Python 3.10. - def __repr__(self): - return f"{self.__class__.__qualname__}.{self._name_}" - - -class IntegerChoices(int, Choices): - """Class for creating enumerated integer choices.""" - - pass - - -class TextChoices(str, Choices): - """Class for creating enumerated string choices.""" - - def _generate_next_value_(name, start, count, last_values): - return name +from django.db.models import IntegerChoices, TextChoices +from django.utils.functional import classproperty class LicenceActionEnum(TextChoices): @@ -213,12 +129,6 @@ def serializer_choices(cls): return list(cls.__members__.keys()) -class MailReadStatuses(TextChoices): - READ = "READ" - UNREAD = "UNREAD" - UNPROCESSABLE = "UNPROCESSABLE" - - class LicenceStatusEnum(TextChoices): OPEN = "open" EXHAUST = "exhaust" diff --git a/mail/libraries/mailbox_service.py b/mail/libraries/mailbox_service.py index 1e5d4e1c..2a129b2b 100644 --- a/mail/libraries/mailbox_service.py +++ b/mail/libraries/mailbox_service.py @@ -6,10 +6,11 @@ from django.conf import settings -from mail.enums import MailReadStatuses from mail.libraries.email_message_dto import EmailMessageDto from mail.libraries.helpers import to_mail_message_dto -from mail.models import Mail, MailboxConfig, MailReadStatus +from mail.models import Mail +from mailboxes.enums import MailReadStatuses +from mailboxes.models import MailboxConfig, MailReadStatus def is_from_valid_sender(msg_header, valid_addresses): diff --git a/mail/libraries/routing_controller.py b/mail/libraries/routing_controller.py index aa6a6d0f..f017c631 100644 --- a/mail/libraries/routing_controller.py +++ b/mail/libraries/routing_controller.py @@ -6,7 +6,7 @@ from django.utils import timezone from rest_framework.exceptions import ValidationError -from mail.enums import ExtractTypeEnum, MailReadStatuses, ReceptionStatusEnum, ReplyStatusEnum, SourceEnum +from mail.enums import ExtractTypeEnum, ReceptionStatusEnum, ReplyStatusEnum, SourceEnum from mail.libraries.builders import build_email_message from mail.libraries.data_processors import ( lock_db_for_sending_transaction, @@ -24,6 +24,7 @@ from mail.models import Mail from mail_servers.servers import MailServer, smtp_send from mail_servers.utils import get_mail_server +from mailboxes.enums import MailReadStatuses logger = logging.getLogger(__name__) diff --git a/mail/migrations/0023_remove_mailreadstatus_mailbox_delete_mailboxconfig_and_more.py b/mail/migrations/0023_remove_mailreadstatus_mailbox_delete_mailboxconfig_and_more.py new file mode 100644 index 00000000..02571126 --- /dev/null +++ b/mail/migrations/0023_remove_mailreadstatus_mailbox_delete_mailboxconfig_and_more.py @@ -0,0 +1,27 @@ +# Generated by Django 4.2.16 on 2024-12-21 22:01 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ("mail", "0022_alter_licencedata_licence_payloads"), + ] + + operations = [ + migrations.SeparateDatabaseAndState( + state_operations=[ + migrations.RemoveField( + model_name="mailreadstatus", + name="mailbox", + ), + migrations.DeleteModel( + name="MailboxConfig", + ), + migrations.DeleteModel( + name="MailReadStatus", + ), + ], + ), + ] diff --git a/mail/models.py b/mail/models.py index 7bb4226c..f61ae6df 100644 --- a/mail/models.py +++ b/mail/models.py @@ -7,16 +7,8 @@ from django.conf import settings from django.db import IntegrityError, models from django.utils import timezone -from model_utils.models import TimeStampedModel -from mail.enums import ( - ChiefSystemEnum, - ExtractTypeEnum, - LicenceActionEnum, - MailReadStatuses, - ReceptionStatusEnum, - SourceEnum, -) +from mail.enums import ChiefSystemEnum, ExtractTypeEnum, LicenceActionEnum, ReceptionStatusEnum, SourceEnum logger = logging.getLogger(__name__) @@ -198,25 +190,3 @@ class TransactionMapping(models.Model): line_number = models.PositiveIntegerField(null=True, blank=True) usage_transaction = models.CharField(null=False, blank=False, max_length=35) usage_data = models.ForeignKey(UsageData, on_delete=models.DO_NOTHING) - - -class MailboxConfig(TimeStampedModel): - username = models.TextField(null=False, blank=False, primary_key=True, help_text="Username of the POP3 mailbox") - - -class MailReadStatus(TimeStampedModel): - id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) - message_num = models.TextField( - default="", - help_text="Sequence number of the message as assigned by pop3 when the messages list is requested from the mailbox", - ) - message_id = models.TextField( - default=uuid.uuid4, - unique=True, - help_text="Unique Message-ID of the message that is retrieved from the message header", - ) - status = models.TextField(choices=MailReadStatuses.choices, default=MailReadStatuses.UNREAD, db_index=True) - mailbox = models.ForeignKey(MailboxConfig, on_delete=models.CASCADE) - - def __repr__(self): - return f"message_id={self.message_id} status={self.status}" diff --git a/mailboxes/__init__.py b/mailboxes/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/mailboxes/apps.py b/mailboxes/apps.py new file mode 100644 index 00000000..4b199e58 --- /dev/null +++ b/mailboxes/apps.py @@ -0,0 +1,6 @@ +from django.apps import AppConfig + + +class MailboxesConfig(AppConfig): + default_auto_field = "django.db.models.BigAutoField" + name = "mailboxes" diff --git a/mailboxes/enums.py b/mailboxes/enums.py new file mode 100644 index 00000000..eaa32984 --- /dev/null +++ b/mailboxes/enums.py @@ -0,0 +1,7 @@ +from django.db.models import TextChoices + + +class MailReadStatuses(TextChoices): + READ = "READ" + UNREAD = "UNREAD" + UNPROCESSABLE = "UNPROCESSABLE" diff --git a/mailboxes/migrations/0001_initial.py b/mailboxes/migrations/0001_initial.py new file mode 100644 index 00000000..15ea0d4c --- /dev/null +++ b/mailboxes/migrations/0001_initial.py @@ -0,0 +1,98 @@ +# Generated by Django 4.2.16 on 2024-12-21 22:00 + +import uuid + +import django.db.models.deletion +import django.utils.timezone +import model_utils.fields +from django.db import migrations, models + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [("mail", "0023_remove_mailreadstatus_mailbox_delete_mailboxconfig_and_more")] + + operations = [ + migrations.SeparateDatabaseAndState( + state_operations=[ + migrations.CreateModel( + name="MailboxConfig", + fields=[ + ( + "created", + model_utils.fields.AutoCreatedField( + default=django.utils.timezone.now, editable=False, verbose_name="created" + ), + ), + ( + "modified", + model_utils.fields.AutoLastModifiedField( + default=django.utils.timezone.now, editable=False, verbose_name="modified" + ), + ), + ( + "username", + models.TextField( + help_text="Username of the POP3 mailbox", primary_key=True, serialize=False + ), + ), + ], + options={ + "db_table": "mail_mailboxconfig", + }, + ), + migrations.CreateModel( + name="MailReadStatus", + fields=[ + ( + "created", + model_utils.fields.AutoCreatedField( + default=django.utils.timezone.now, editable=False, verbose_name="created" + ), + ), + ( + "modified", + model_utils.fields.AutoLastModifiedField( + default=django.utils.timezone.now, editable=False, verbose_name="modified" + ), + ), + ("id", models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)), + ( + "message_num", + models.TextField( + default="", + help_text="Sequence number of the message as assigned by pop3 when the messages list is requested from the mailbox", + ), + ), + ( + "message_id", + models.TextField( + default=uuid.uuid4, + help_text="Unique Message-ID of the message that is retrieved from the message header", + unique=True, + ), + ), + ( + "status", + models.TextField( + choices=[("READ", "Read"), ("UNREAD", "Unread"), ("UNPROCESSABLE", "Unprocessable")], + db_index=True, + default="UNREAD", + ), + ), + ( + "mailbox", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, to="mailboxes.mailboxconfig" + ), + ), + ], + options={ + "db_table": "mail_mailreadstatus", + }, + ), + ], + ), + ] diff --git a/mailboxes/migrations/__init__.py b/mailboxes/migrations/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/mailboxes/models.py b/mailboxes/models.py new file mode 100644 index 00000000..dd3a003a --- /dev/null +++ b/mailboxes/models.py @@ -0,0 +1,38 @@ +import uuid + +from django.db import models +from model_utils.models import TimeStampedModel + +from mailboxes.enums import MailReadStatuses + + +class MailboxConfig(TimeStampedModel): + username = models.TextField(null=False, blank=False, primary_key=True, help_text="Username of the POP3 mailbox") + + class Meta: + db_table = ( + "mail_mailboxconfig" # This was moved from another app and this makes the migrations backwards compatible + ) + + +class MailReadStatus(TimeStampedModel): + id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) + message_num = models.TextField( + default="", + help_text="Sequence number of the message as assigned by pop3 when the messages list is requested from the mailbox", + ) + message_id = models.TextField( + default=uuid.uuid4, + unique=True, + help_text="Unique Message-ID of the message that is retrieved from the message header", + ) + status = models.TextField(choices=MailReadStatuses.choices, default=MailReadStatuses.UNREAD, db_index=True) + mailbox = models.ForeignKey(MailboxConfig, on_delete=models.CASCADE) + + class Meta: + db_table = ( + "mail_mailreadstatus" # This was moved from another app and this makes the migrations backwards compatible + ) + + def __str__(self): + return f"{self.__class__.__name__}(message_id={self.message_id}, status={self.status})" From f19405f6ab2173938222dc7932e341e42cc4d044 Mon Sep 17 00:00:00 2001 From: Kevin Carrogan Date: Sun, 22 Dec 2024 20:21:29 +0000 Subject: [PATCH 08/38] Add str methods for mailbox models --- .coveragerc | 4 +++- mailboxes/models.py | 5 ++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/.coveragerc b/.coveragerc index f04f2346..7925660d 100644 --- a/.coveragerc +++ b/.coveragerc @@ -23,9 +23,11 @@ exclude_lines = pragma: no cover # Don't complain about missing debug-only code: - def __repr__ if self\.debug + def __repr__ + def __str__ + # Don't complain if tests don't hit defensive assertion code: raise AssertionError raise NotImplementedError diff --git a/mailboxes/models.py b/mailboxes/models.py index dd3a003a..4dfbd114 100644 --- a/mailboxes/models.py +++ b/mailboxes/models.py @@ -14,6 +14,9 @@ class Meta: "mail_mailboxconfig" # This was moved from another app and this makes the migrations backwards compatible ) + def __str__(self): + return f"username={self.username}" + class MailReadStatus(TimeStampedModel): id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) @@ -35,4 +38,4 @@ class Meta: ) def __str__(self): - return f"{self.__class__.__name__}(message_id={self.message_id}, status={self.status})" + return f"message_id={self.message_id} status={self.status}" From 2c327981eb17eb392de61385cf2b36cdc4be2697 Mon Sep 17 00:00:00 2001 From: Kevin Carrogan Date: Sun, 22 Dec 2024 20:56:29 +0000 Subject: [PATCH 09/38] Move get_message_id function to mailboxes --- mail/libraries/mailbox_service.py | 60 +------------------ .../management/commands/mark_mails_as_read.py | 1 + mailboxes/utils.py | 52 ++++++++++++++++ mock_hmrc/handler.py | 2 +- 4 files changed, 55 insertions(+), 60 deletions(-) create mode 100644 mailboxes/utils.py diff --git a/mail/libraries/mailbox_service.py b/mail/libraries/mailbox_service.py index 2a129b2b..9e0304dd 100644 --- a/mail/libraries/mailbox_service.py +++ b/mail/libraries/mailbox_service.py @@ -1,6 +1,4 @@ import logging -from email.parser import BytesHeaderParser -from email.utils import parseaddr from poplib import POP3_SSL, error_proto from typing import Callable, Iterator, List, Tuple @@ -11,63 +9,7 @@ from mail.models import Mail from mailboxes.enums import MailReadStatuses from mailboxes.models import MailboxConfig, MailReadStatus - - -def is_from_valid_sender(msg_header, valid_addresses): - parser = BytesHeaderParser() - header = parser.parsebytes(b"\n".join(msg_header)) - - logging.info("Parsing address header From: %s", header["From"]) - name, from_address = parseaddr(str(header["From"])) - logging.info("Found from address %s, %s", name, from_address) - valid_addresses = [address.replace("From: ", "") for address in valid_addresses] - - return from_address in valid_addresses - - -def get_message_id(pop3_connection, listing_msg): - """ - Takes a single line from pop3 LIST command and extracts - the message num. Uses the message number further to extract header information - from which the actual Message-ID is extracted. - - :param pop3_connection: pop3 connection instance - :param listing_msg: a line returned from the pop3.list command, e.g. b"2 5353" - :return: the message-id and message_num extracted from the input, for the above example: b"2" - """ - msg_num = listing_msg.split()[0] - - # retrieves the header information - # 0 indicates the number of lines of message to be retrieved after the header - msg_header = pop3_connection.top(msg_num, 0) - - if not is_from_valid_sender(msg_header[1], [settings.SPIRE_FROM_ADDRESS, settings.HMRC_TO_DIT_REPLY_ADDRESS]): - logging.warning( - "Found mail with message_num %s that is not from SPIRE (%s) or HMRC (%s), skipping ...", - msg_num, - settings.SPIRE_FROM_ADDRESS, - settings.HMRC_TO_DIT_REPLY_ADDRESS, - ) - logging.debug("Mail was from %s", msg_header[1]) - return None, msg_num - - message_id = None - for index, item in enumerate(msg_header[1]): - hdr_item_fields = item.decode("utf-8").split(" ") - # message id is of the form b"Message-ID: <963d810e-c573-ef26-4ac0-151572b3524b@email-domail.co.uk>" - - if len(hdr_item_fields) == 2: - if hdr_item_fields[0].lower() == "message-id:": - value = hdr_item_fields[1].replace("<", "").replace(">", "") - message_id = value.split("@")[0] - elif len(hdr_item_fields) == 1: - if hdr_item_fields[0].lower() == "message-id:": - value = msg_header[1][index + 1].decode("utf-8") - value = value.replace("<", "").replace(">", "").strip(" ") - message_id = value.split("@")[0] - - logging.info("Extracted Message-Id as %s for the message_num %s", message_id, msg_num) - return message_id, msg_num +from mailboxes.utils import get_message_id def get_read_messages(mailbox_config): diff --git a/mail/management/commands/mark_mails_as_read.py b/mail/management/commands/mark_mails_as_read.py index 82dd1ad5..456ec64b 100644 --- a/mail/management/commands/mark_mails_as_read.py +++ b/mail/management/commands/mark_mails_as_read.py @@ -4,6 +4,7 @@ from mail import enums, models from mail.libraries.mailbox_service import get_message_id from mail_servers.servers import MailServer +from mailboxes.utils import get_message_id class Command(BaseCommand): diff --git a/mailboxes/utils.py b/mailboxes/utils.py new file mode 100644 index 00000000..6eca463b --- /dev/null +++ b/mailboxes/utils.py @@ -0,0 +1,52 @@ +import logging + +from django.conf import settings + +logger = logging.getLogger(__name__) + + +def get_message_id(pop3_connection, listing_msg): + """ + Takes a single line from pop3 LIST command and extracts + the message num. Uses the message number further to extract header information + from which the actual Message-ID is extracted. + + :param pop3_connection: pop3 connection instance + :param listing_msg: a line returned from the pop3.list command, e.g. b"2 5353" + :return: the message-id and message_num extracted from the input, for the above example: b"2" + """ + msg_num = listing_msg.split()[0] + + # retrieves the header information + # 0 indicates the number of lines of message to be retrieved after the header + msg_header = pop3_connection.top(msg_num, 0) + + spire_from_address = settings.SPIRE_FROM_ADDRESS.encode("utf-8") + hmrc_dit_reply_address = settings.HMRC_TO_DIT_REPLY_ADDRESS.encode("utf-8") + + if spire_from_address not in msg_header[1] and hmrc_dit_reply_address not in msg_header[1]: + logger.warning( + "Found mail with message_num %s that is not from SPIRE (%s) or HMRC (%s), skipping ...", + msg_num, + spire_from_address, + hmrc_dit_reply_address, + ) + return None, msg_num + + message_id = None + for index, item in enumerate(msg_header[1]): + hdr_item_fields = item.decode("utf-8").split(" ") + # message id is of the form b"Message-ID: <963d810e-c573-ef26-4ac0-151572b3524b@email-domail.co.uk>" /PS-IGNORE + + if len(hdr_item_fields) == 2: + if hdr_item_fields[0].lower() == "message-id:": + value = hdr_item_fields[1].replace("<", "").replace(">", "") + message_id = value.split("@")[0] + elif len(hdr_item_fields) == 1: + if hdr_item_fields[0].lower() == "message-id:": + value = msg_header[1][index + 1].decode("utf-8") + value = value.replace("<", "").replace(">", "").strip(" ") + message_id = value.split("@")[0] + + logging.info("Extracted Message-Id as %s for the message_num %s", message_id, msg_num) + return message_id, msg_num diff --git a/mock_hmrc/handler.py b/mock_hmrc/handler.py index 46aa424b..c7a96215 100644 --- a/mock_hmrc/handler.py +++ b/mock_hmrc/handler.py @@ -3,8 +3,8 @@ from django.conf import settings from mail.libraries.helpers import to_hmrc_mail_message_dto -from mail.libraries.mailbox_service import get_message_id from mail.libraries.routing_controller import get_mock_hmrc_mailserver +from mailboxes.utils import get_message_id from mock_hmrc import enums, models from mock_hmrc.data_processors import save_hmrc_email_message_data, send_reply From 8901195df0f202d93f8fd0958d3855db5bae74ce Mon Sep 17 00:00:00 2001 From: Kevin Carrogan Date: Sun, 22 Dec 2024 22:27:47 +0000 Subject: [PATCH 10/38] Extract out utils for getting headers and ids from emails --- mail/libraries/mailbox_service.py | 4 +- .../management/commands/mark_mails_as_read.py | 7 ++- mailboxes/tests/__init__.py | 0 mailboxes/tests/test_utils.py | 59 +++++++++++++++++++ mailboxes/utils.py | 41 +++++-------- 5 files changed, 80 insertions(+), 31 deletions(-) create mode 100644 mailboxes/tests/__init__.py create mode 100644 mailboxes/tests/test_utils.py diff --git a/mail/libraries/mailbox_service.py b/mail/libraries/mailbox_service.py index 9e0304dd..d2dc5708 100644 --- a/mail/libraries/mailbox_service.py +++ b/mail/libraries/mailbox_service.py @@ -9,7 +9,7 @@ from mail.models import Mail from mailboxes.enums import MailReadStatuses from mailboxes.models import MailboxConfig, MailReadStatus -from mailboxes.utils import get_message_id +from mailboxes.utils import get_message_header, get_message_id def get_read_messages(mailbox_config): @@ -33,7 +33,7 @@ def get_message_iterator(pop3_connection: POP3_SSL, username: str) -> Iterator[T # The mails is a list of message number and size - message number is an increasing value so the # latest emails will always be at the end. mail_message_ids = [ - get_message_id(pop3_connection, m.decode(settings.DEFAULT_ENCODING)) + get_message_id(*get_message_header(pop3_connection, m.decode(settings.DEFAULT_ENCODING))) for m in mails[-incoming_email_check_limit:] ] diff --git a/mail/management/commands/mark_mails_as_read.py b/mail/management/commands/mark_mails_as_read.py index 456ec64b..7b951e9a 100644 --- a/mail/management/commands/mark_mails_as_read.py +++ b/mail/management/commands/mark_mails_as_read.py @@ -2,9 +2,8 @@ from django.core.management.base import BaseCommand from mail import enums, models -from mail.libraries.mailbox_service import get_message_id from mail_servers.servers import MailServer -from mailboxes.utils import get_message_id +from mailboxes.utils import get_message_header, get_message_id class Command(BaseCommand): @@ -34,7 +33,9 @@ def handle(self, *args, **options): _, mails, _ = pop3_connection.list() self.stdout.write(self.style.SUCCESS(f"Found {len(mails)} in the inbox")) - mail_message_ids = [get_message_id(pop3_connection, m.decode(settings.DEFAULT_ENCODING)) for m in mails] + mail_message_ids = [ + get_message_id(*get_message_header(pop3_connection, m.decode(settings.DEFAULT_ENCODING))) for m in mails + ] self.stdout.write( self.style.SUCCESS(f"List of Message-Id and message numbers for existing mails:\n{mail_message_ids}") ) diff --git a/mailboxes/tests/__init__.py b/mailboxes/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/mailboxes/tests/test_utils.py b/mailboxes/tests/test_utils.py new file mode 100644 index 00000000..f7dbb6fd --- /dev/null +++ b/mailboxes/tests/test_utils.py @@ -0,0 +1,59 @@ +from email.header import Header +from email.message import Message +from unittest.mock import MagicMock + +from django.test import SimpleTestCase, override_settings + +from mailboxes.utils import get_message_header, get_message_id + + +class GetMessageHeaderTests(SimpleTestCase): + def test_get_message_header(self): + msg = Message() + header = Header("value") + msg["header"] = header + + pop3_connection = MagicMock() + pop3_connection().top.return_value = MagicMock(), msg.as_bytes().split(b"\n"), MagicMock() + + returned_header, msg_num = get_message_header(pop3_connection(), "4 12345") + pop3_connection().top.assert_called_with("4", 0) + self.assertIsInstance(returned_header, Message) + self.assertEqual(returned_header["header"], "value") + self.assertEqual(msg_num, "4") + + +class GetMessageId(SimpleTestCase): + @override_settings( + SPIRE_FROM_ADDRESS="spire.from.address@example.com", # /PS-IGNORE + HMRC_TO_DIT_REPLY_ADDRESS="hmrc.to.dit.reply.address@example.com", # /PS-IGNORE + ) + def test_get_message_id_not_from_valid_email(self): + message = Message() + from_header = Header("from@example.com") # /PS-IGNORE + message["From"] = from_header + reply_to_header = Header("reply-to@example.com") # /PS-IGNORE + message["Reply-To"] = reply_to_header + + message_id, message_number = get_message_id(message, "4") + + self.assertIsNone(message_id) + self.assertEqual(message_number, "4") + + @override_settings( + SPIRE_FROM_ADDRESS="spire.from.address@example.com", # /PS-IGNORE + HMRC_TO_DIT_REPLY_ADDRESS="hmrc.to.dit.reply.address@example.com", # /PS-IGNORE + ) + def test_get_message_id_not_from_valid_email(self): + message = Message() + from_header = Header("Spire ") # /PS-IGNORE + message["From"] = from_header + reply_to_header = Header("HMRC ") # /PS-IGNORE + message["Reply-To"] = reply_to_header + message_id_header = Header("<123456@example.com>") # /PS-IGNORE + message["Message-ID"] = message_id_header + + message_id, message_number = get_message_id(message, "4") + + self.assertEqual(message_id, "123456") + self.assertEqual(message_number, "4") diff --git a/mailboxes/utils.py b/mailboxes/utils.py index 6eca463b..a5152d44 100644 --- a/mailboxes/utils.py +++ b/mailboxes/utils.py @@ -1,30 +1,31 @@ import logging +from email.headerregistry import Address +from email.parser import BytesHeaderParser +from email.utils import parseaddr from django.conf import settings logger = logging.getLogger(__name__) -def get_message_id(pop3_connection, listing_msg): - """ - Takes a single line from pop3 LIST command and extracts - the message num. Uses the message number further to extract header information - from which the actual Message-ID is extracted. - - :param pop3_connection: pop3 connection instance - :param listing_msg: a line returned from the pop3.list command, e.g. b"2 5353" - :return: the message-id and message_num extracted from the input, for the above example: b"2" - """ +def get_message_header(pop3_connection, listing_msg): msg_num = listing_msg.split()[0] # retrieves the header information # 0 indicates the number of lines of message to be retrieved after the header - msg_header = pop3_connection.top(msg_num, 0) + _, msg_header, _ = pop3_connection.top(msg_num, 0) + + parser = BytesHeaderParser() + header = parser.parsebytes(b"".join(msg_header)) + + return header, msg_num + +def get_message_id(msg_header, msg_num): spire_from_address = settings.SPIRE_FROM_ADDRESS.encode("utf-8") hmrc_dit_reply_address = settings.HMRC_TO_DIT_REPLY_ADDRESS.encode("utf-8") - if spire_from_address not in msg_header[1] and hmrc_dit_reply_address not in msg_header[1]: + if spire_from_address not in msg_header.as_bytes() and hmrc_dit_reply_address not in msg_header.as_bytes(): logger.warning( "Found mail with message_num %s that is not from SPIRE (%s) or HMRC (%s), skipping ...", msg_num, @@ -33,20 +34,8 @@ def get_message_id(pop3_connection, listing_msg): ) return None, msg_num - message_id = None - for index, item in enumerate(msg_header[1]): - hdr_item_fields = item.decode("utf-8").split(" ") - # message id is of the form b"Message-ID: <963d810e-c573-ef26-4ac0-151572b3524b@email-domail.co.uk>" /PS-IGNORE - - if len(hdr_item_fields) == 2: - if hdr_item_fields[0].lower() == "message-id:": - value = hdr_item_fields[1].replace("<", "").replace(">", "") - message_id = value.split("@")[0] - elif len(hdr_item_fields) == 1: - if hdr_item_fields[0].lower() == "message-id:": - value = msg_header[1][index + 1].decode("utf-8") - value = value.replace("<", "").replace(">", "").strip(" ") - message_id = value.split("@")[0] + _, message_id = parseaddr(str(msg_header["message-id"])) + message_id = Address(addr_spec=message_id).username logging.info("Extracted Message-Id as %s for the message_num %s", message_id, msg_num) return message_id, msg_num From 4588b5ee23f35ebb0ceea89d163b416bf7aa3d68 Mon Sep 17 00:00:00 2001 From: Kevin Carrogan Date: Sun, 22 Dec 2024 22:56:12 +0000 Subject: [PATCH 11/38] Change API for mailbox functions to separate out responsibilities --- mail/libraries/mailbox_service.py | 42 +++++++++++++++-------- mailboxes/tests/test_utils.py | 55 ++++++++++++++++++------------- mailboxes/utils.py | 31 ++++++----------- 3 files changed, 71 insertions(+), 57 deletions(-) diff --git a/mail/libraries/mailbox_service.py b/mail/libraries/mailbox_service.py index d2dc5708..b7386af9 100644 --- a/mail/libraries/mailbox_service.py +++ b/mail/libraries/mailbox_service.py @@ -9,7 +9,9 @@ from mail.models import Mail from mailboxes.enums import MailReadStatuses from mailboxes.models import MailboxConfig, MailReadStatus -from mailboxes.utils import get_message_header, get_message_id +from mailboxes.utils import get_message_header, get_message_id, is_from_valid_sender + +logger = logging.getLogger(__name__) def get_read_messages(mailbox_config): @@ -28,22 +30,34 @@ def get_message_iterator(pop3_connection: POP3_SSL, username: str) -> Iterator[T incoming_email_check_limit = settings.INCOMING_EMAIL_CHECK_LIMIT # Check only the emails specified in the setting - # Since we don't delete emails from these mailboxes the total number can be very high over a perio + # Since we don't delete emails from these mailboxes the total number can be very high over a period of time # and increases the processing time. # The mails is a list of message number and size - message number is an increasing value so the # latest emails will always be at the end. - mail_message_ids = [ - get_message_id(*get_message_header(pop3_connection, m.decode(settings.DEFAULT_ENCODING))) - for m in mails[-incoming_email_check_limit:] - ] + mail_message_ids = [] + for m in mails[-incoming_email_check_limit:]: + listing_msg = m.decode(settings.DEFAULT_ENCODING) + msg_num = listing_msg.split()[0] + message_header = get_message_header(pop3_connection, msg_num) + if not is_from_valid_sender(message_header, [settings.SPIRE_FROM_ADDRESS, settings.HMRC_TO_DIT_REPLY_ADDRESS]): + logger.warning( + "Found mail with message_num %s that is not from SPIRE (%s) or HMRC (%s), skipping ...", + msg_num, + settings.SPIRE_FROM_ADDRESS, + settings.HMRC_TO_DIT_REPLY_ADDRESS, + ) + continue + message_id = get_message_id(message_header) + logger.info("Extracted Message-Id as %s for the message_num %s", message_id, msg_num) + mail_message_ids.append(message_id, msg_num) # these are mailbox message ids we've seen before read_messages = get_read_messages(mailbox_config) - logging.info("Number of messages READ/UNPROCESSABLE in %s are %s", mailbox_config.username, len(read_messages)) + logger.info("Number of messages READ/UNPROCESSABLE in %s are %s", mailbox_config.username, len(read_messages)) for message_id, message_num in mail_message_ids: # only return messages we haven't seen before - if message_id is not None and message_id not in read_messages: + if message_id not in read_messages: read_status, _ = MailReadStatus.objects.get_or_create( message_id=message_id, message_num=message_num, mailbox=mailbox_config ) @@ -52,7 +66,7 @@ def mark_status(status): """ :param status: A choice from `MailReadStatuses.choices` """ - logging.info( + logger.info( "Marking message_id %s with message_num %s from %s as %s", message_id, message_num, @@ -64,14 +78,14 @@ def mark_status(status): try: m = pop3_connection.retr(message_num) - logging.info( + logger.info( "Retrieved message_id %s with message_num %s from %s", message_id, message_num, read_status.mailbox.username, ) except error_proto as err: - logging.error( + logger.error( "Unable to RETR message num %s with Message-ID %s in %s: %s", message_num, message_id, @@ -84,7 +98,7 @@ def mark_status(status): try: mail_message = to_mail_message_dto(m) except ValueError as ve: - logging.error( + logger.error( "Unable to convert message num %s with Message-Id %s to DTO in %s: %s", message_num, message_id, @@ -102,8 +116,8 @@ def find_mail_of(extract_types: List[str], reception_status: str) -> Mail or Non try: mail = Mail.objects.get(status=reception_status, extract_type__in=extract_types) except Mail.DoesNotExist: - logging.warning("Can not find any mail in [%s] of extract type [%s]", reception_status, extract_types) + logger.warning("Can not find any mail in [%s] of extract type [%s]", reception_status, extract_types) return - logging.info("Found mail in [%s] of extract type [%s]", reception_status, extract_types) + logger.info("Found mail in [%s] of extract type [%s]", reception_status, extract_types) return mail diff --git a/mailboxes/tests/test_utils.py b/mailboxes/tests/test_utils.py index f7dbb6fd..8859b0c9 100644 --- a/mailboxes/tests/test_utils.py +++ b/mailboxes/tests/test_utils.py @@ -3,8 +3,9 @@ from unittest.mock import MagicMock from django.test import SimpleTestCase, override_settings +from parameterized import parameterized -from mailboxes.utils import get_message_header, get_message_id +from mailboxes.utils import get_message_header, get_message_id, is_from_valid_sender class GetMessageHeaderTests(SimpleTestCase): @@ -16,11 +17,10 @@ def test_get_message_header(self): pop3_connection = MagicMock() pop3_connection().top.return_value = MagicMock(), msg.as_bytes().split(b"\n"), MagicMock() - returned_header, msg_num = get_message_header(pop3_connection(), "4 12345") + returned_header = get_message_header(pop3_connection(), "4") pop3_connection().top.assert_called_with("4", 0) self.assertIsInstance(returned_header, Message) self.assertEqual(returned_header["header"], "value") - self.assertEqual(msg_num, "4") class GetMessageId(SimpleTestCase): @@ -30,30 +30,41 @@ class GetMessageId(SimpleTestCase): ) def test_get_message_id_not_from_valid_email(self): message = Message() - from_header = Header("from@example.com") # /PS-IGNORE - message["From"] = from_header - reply_to_header = Header("reply-to@example.com") # /PS-IGNORE - message["Reply-To"] = reply_to_header + message_id_header = Header("<123456@example.com>") # /PS-IGNORE + message["Message-ID"] = message_id_header + + message_id = get_message_id(message) - message_id, message_number = get_message_id(message, "4") + self.assertEqual(message_id, "123456") - self.assertIsNone(message_id) - self.assertEqual(message_number, "4") - @override_settings( - SPIRE_FROM_ADDRESS="spire.from.address@example.com", # /PS-IGNORE - HMRC_TO_DIT_REPLY_ADDRESS="hmrc.to.dit.reply.address@example.com", # /PS-IGNORE +class IsFromValidSenderTests(SimpleTestCase): + @parameterized.expand( + [ + "From: spire.from.address@example.com", # /PS-IGNORE + "From: hmrc.to.dit.reply.address@example.com", # /PS-IGNORE + "spire.from.address@example.com", # /PS-IGNORE + "hmrc.to.dit.reply.address@example.com", # /PS-IGNORE + ] ) - def test_get_message_id_not_from_valid_email(self): + def test_is_from_valid_sender(self, valid_address): message = Message() - from_header = Header("Spire ") # /PS-IGNORE + from_header = Header(f"Sender <{valid_address.replace('From: ', '')}>") # /PS-IGNORE message["From"] = from_header - reply_to_header = Header("HMRC ") # /PS-IGNORE - message["Reply-To"] = reply_to_header - message_id_header = Header("<123456@example.com>") # /PS-IGNORE - message["Message-ID"] = message_id_header - message_id, message_number = get_message_id(message, "4") + self.assertTrue(is_from_valid_sender(message, [valid_address])) - self.assertEqual(message_id, "123456") - self.assertEqual(message_number, "4") + @parameterized.expand( + [ + "From: spire.from.address@example.com", # /PS-IGNORE + "From: hmrc.to.dit.reply.address@example.com", # /PS-IGNORE + "spire.from.address@example.com", # /PS-IGNORE + "hmrc.to.dit.reply.address@example.com", # /PS-IGNORE + ] + ) + def test_is_from_invalid_sender(self, valid_address): + message = Message() + from_header = Header("Invalid ") # /PS-IGNORE + message["From"] = from_header + + self.assertFalse(is_from_valid_sender(message, [valid_address])) diff --git a/mailboxes/utils.py b/mailboxes/utils.py index a5152d44..e5f3c35c 100644 --- a/mailboxes/utils.py +++ b/mailboxes/utils.py @@ -3,14 +3,10 @@ from email.parser import BytesHeaderParser from email.utils import parseaddr -from django.conf import settings - logger = logging.getLogger(__name__) -def get_message_header(pop3_connection, listing_msg): - msg_num = listing_msg.split()[0] - +def get_message_header(pop3_connection, msg_num): # retrieves the header information # 0 indicates the number of lines of message to be retrieved after the header _, msg_header, _ = pop3_connection.top(msg_num, 0) @@ -18,24 +14,17 @@ def get_message_header(pop3_connection, listing_msg): parser = BytesHeaderParser() header = parser.parsebytes(b"".join(msg_header)) - return header, msg_num - + return header -def get_message_id(msg_header, msg_num): - spire_from_address = settings.SPIRE_FROM_ADDRESS.encode("utf-8") - hmrc_dit_reply_address = settings.HMRC_TO_DIT_REPLY_ADDRESS.encode("utf-8") - - if spire_from_address not in msg_header.as_bytes() and hmrc_dit_reply_address not in msg_header.as_bytes(): - logger.warning( - "Found mail with message_num %s that is not from SPIRE (%s) or HMRC (%s), skipping ...", - msg_num, - spire_from_address, - hmrc_dit_reply_address, - ) - return None, msg_num +def get_message_id(msg_header): _, message_id = parseaddr(str(msg_header["message-id"])) message_id = Address(addr_spec=message_id).username + return message_id + + +def is_from_valid_sender(msg_header, valid_addresses): + _, from_address = parseaddr(str(msg_header["From"])) + valid_addresses = [address.replace("From: ", "") for address in valid_addresses] - logging.info("Extracted Message-Id as %s for the message_num %s", message_id, msg_num) - return message_id, msg_num + return from_address in valid_addresses From 6da0cd41be3aabe086d5ea83783bff658d550c6c Mon Sep 17 00:00:00 2001 From: Kevin Carrogan Date: Mon, 23 Dec 2024 11:22:27 +0000 Subject: [PATCH 12/38] Pull out utility for getting message number --- mail/libraries/mailbox_service.py | 5 ++--- mailboxes/tests/test_utils.py | 9 +++++++-- mailboxes/utils.py | 8 ++++++++ 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/mail/libraries/mailbox_service.py b/mail/libraries/mailbox_service.py index b7386af9..b0811b46 100644 --- a/mail/libraries/mailbox_service.py +++ b/mail/libraries/mailbox_service.py @@ -9,7 +9,7 @@ from mail.models import Mail from mailboxes.enums import MailReadStatuses from mailboxes.models import MailboxConfig, MailReadStatus -from mailboxes.utils import get_message_header, get_message_id, is_from_valid_sender +from mailboxes.utils import get_message_header, get_message_id, get_message_number, is_from_valid_sender logger = logging.getLogger(__name__) @@ -36,8 +36,7 @@ def get_message_iterator(pop3_connection: POP3_SSL, username: str) -> Iterator[T # latest emails will always be at the end. mail_message_ids = [] for m in mails[-incoming_email_check_limit:]: - listing_msg = m.decode(settings.DEFAULT_ENCODING) - msg_num = listing_msg.split()[0] + msg_num = get_message_number(m) message_header = get_message_header(pop3_connection, msg_num) if not is_from_valid_sender(message_header, [settings.SPIRE_FROM_ADDRESS, settings.HMRC_TO_DIT_REPLY_ADDRESS]): logger.warning( diff --git a/mailboxes/tests/test_utils.py b/mailboxes/tests/test_utils.py index 8859b0c9..3421f4ed 100644 --- a/mailboxes/tests/test_utils.py +++ b/mailboxes/tests/test_utils.py @@ -5,7 +5,7 @@ from django.test import SimpleTestCase, override_settings from parameterized import parameterized -from mailboxes.utils import get_message_header, get_message_id, is_from_valid_sender +from mailboxes.utils import get_message_header, get_message_id, get_message_number, is_from_valid_sender class GetMessageHeaderTests(SimpleTestCase): @@ -23,7 +23,7 @@ def test_get_message_header(self): self.assertEqual(returned_header["header"], "value") -class GetMessageId(SimpleTestCase): +class GetMessageIdTests(SimpleTestCase): @override_settings( SPIRE_FROM_ADDRESS="spire.from.address@example.com", # /PS-IGNORE HMRC_TO_DIT_REPLY_ADDRESS="hmrc.to.dit.reply.address@example.com", # /PS-IGNORE @@ -38,6 +38,11 @@ def test_get_message_id_not_from_valid_email(self): self.assertEqual(message_id, "123456") +class GetMessageNumberTests(SimpleTestCase): + def test_get_message_number(self): + self.assertEqual(get_message_number(b"22 12345"), "22") + + class IsFromValidSenderTests(SimpleTestCase): @parameterized.expand( [ diff --git a/mailboxes/utils.py b/mailboxes/utils.py index e5f3c35c..98daccaf 100644 --- a/mailboxes/utils.py +++ b/mailboxes/utils.py @@ -3,9 +3,17 @@ from email.parser import BytesHeaderParser from email.utils import parseaddr +from django.conf import settings + logger = logging.getLogger(__name__) +def get_message_number(listing_message): + listing_msg = listing_message.decode(settings.DEFAULT_ENCODING) + msg_num, _, _ = listing_msg.partition(" ") + return msg_num + + def get_message_header(pop3_connection, msg_num): # retrieves the header information # 0 indicates the number of lines of message to be retrieved after the header From 87de06c75516cbf9a7348984b5b7e682ea255c78 Mon Sep 17 00:00:00 2001 From: Kevin Carrogan Date: Mon, 23 Dec 2024 11:51:59 +0000 Subject: [PATCH 13/38] Move getting read message function into utils --- mail/libraries/mailbox_service.py | 17 ++++----- mailboxes/tests/factories.py | 15 ++++++++ mailboxes/tests/test_utils.py | 57 +++++++++++++++++++++++++++++-- mailboxes/utils.py | 12 +++++++ 4 files changed, 89 insertions(+), 12 deletions(-) create mode 100644 mailboxes/tests/factories.py diff --git a/mail/libraries/mailbox_service.py b/mail/libraries/mailbox_service.py index b0811b46..ea08ea0d 100644 --- a/mail/libraries/mailbox_service.py +++ b/mail/libraries/mailbox_service.py @@ -9,20 +9,17 @@ from mail.models import Mail from mailboxes.enums import MailReadStatuses from mailboxes.models import MailboxConfig, MailReadStatus -from mailboxes.utils import get_message_header, get_message_id, get_message_number, is_from_valid_sender +from mailboxes.utils import ( + get_message_header, + get_message_id, + get_message_number, + get_read_messages, + is_from_valid_sender, +) logger = logging.getLogger(__name__) -def get_read_messages(mailbox_config): - return [ - str(m.message_id) - for m in MailReadStatus.objects.filter( - mailbox=mailbox_config, status__in=[MailReadStatuses.READ, MailReadStatuses.UNPROCESSABLE] - ) - ] - - def get_message_iterator(pop3_connection: POP3_SSL, username: str) -> Iterator[Tuple[EmailMessageDto, Callable]]: mails: list _, mails, _ = pop3_connection.list() diff --git a/mailboxes/tests/factories.py b/mailboxes/tests/factories.py new file mode 100644 index 00000000..89339854 --- /dev/null +++ b/mailboxes/tests/factories.py @@ -0,0 +1,15 @@ +import factory + +from mailboxes.models import MailboxConfig, MailReadStatus + + +class MailboxConfigFactory(factory.django.DjangoModelFactory): + class Meta: + model = MailboxConfig + + username = factory.Faker("email") + + +class MailReadStatusFactory(factory.django.DjangoModelFactory): + class Meta: + model = MailReadStatus diff --git a/mailboxes/tests/test_utils.py b/mailboxes/tests/test_utils.py index 3421f4ed..c33a2fde 100644 --- a/mailboxes/tests/test_utils.py +++ b/mailboxes/tests/test_utils.py @@ -2,10 +2,18 @@ from email.message import Message from unittest.mock import MagicMock -from django.test import SimpleTestCase, override_settings +from django.test import SimpleTestCase, TestCase, override_settings from parameterized import parameterized -from mailboxes.utils import get_message_header, get_message_id, get_message_number, is_from_valid_sender +from mailboxes.enums import MailReadStatuses +from mailboxes.tests.factories import MailboxConfigFactory, MailReadStatusFactory +from mailboxes.utils import ( + get_message_header, + get_message_id, + get_message_number, + get_read_messages, + is_from_valid_sender, +) class GetMessageHeaderTests(SimpleTestCase): @@ -43,6 +51,51 @@ def test_get_message_number(self): self.assertEqual(get_message_number(b"22 12345"), "22") +class GetReadMessagesTests(TestCase): + def test_get_read_messages(self): + a_mailbox = MailboxConfigFactory() + MailReadStatusFactory( + mailbox=a_mailbox, + status=MailReadStatuses.READ, + message_id="a-read", + ) + MailReadStatusFactory( + mailbox=a_mailbox, + status=MailReadStatuses.UNREAD, + message_id="a-unread", + ) + MailReadStatusFactory( + mailbox=a_mailbox, + status=MailReadStatuses.UNPROCESSABLE, + message_id="a-unprocessable", + ) + self.assertEqual( + get_read_messages(a_mailbox), + ["a-read", "a-unprocessable"], + ) + + b_mailbox = MailboxConfigFactory() + MailReadStatusFactory( + mailbox=b_mailbox, + status=MailReadStatuses.READ, + message_id="b-read", + ) + MailReadStatusFactory( + mailbox=b_mailbox, + status=MailReadStatuses.UNREAD, + message_id="b-unread", + ) + MailReadStatusFactory( + mailbox=b_mailbox, + status=MailReadStatuses.UNPROCESSABLE, + message_id="b-unprocessable", + ) + self.assertEqual( + get_read_messages(b_mailbox), + ["b-read", "b-unprocessable"], + ) + + class IsFromValidSenderTests(SimpleTestCase): @parameterized.expand( [ diff --git a/mailboxes/utils.py b/mailboxes/utils.py index 98daccaf..e0319350 100644 --- a/mailboxes/utils.py +++ b/mailboxes/utils.py @@ -5,6 +5,9 @@ from django.conf import settings +from mailboxes.enums import MailReadStatuses +from mailboxes.models import MailReadStatus + logger = logging.getLogger(__name__) @@ -31,6 +34,15 @@ def get_message_id(msg_header): return message_id +def get_read_messages(mailbox_config): + return [ + str(m.message_id) + for m in MailReadStatus.objects.filter( + mailbox=mailbox_config, status__in=[MailReadStatuses.READ, MailReadStatuses.UNPROCESSABLE] + ) + ] + + def is_from_valid_sender(msg_header, valid_addresses): _, from_address = parseaddr(str(msg_header["From"])) valid_addresses = [address.replace("From: ", "") for address in valid_addresses] From 994ab1e97136d2ff3e690cee4ebe199a1aa016d2 Mon Sep 17 00:00:00 2001 From: Kevin Carrogan Date: Mon, 23 Dec 2024 11:54:53 +0000 Subject: [PATCH 14/38] Return a set of message ids --- mailboxes/tests/test_utils.py | 4 ++-- mailboxes/utils.py | 15 ++++++++------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/mailboxes/tests/test_utils.py b/mailboxes/tests/test_utils.py index c33a2fde..fe852df3 100644 --- a/mailboxes/tests/test_utils.py +++ b/mailboxes/tests/test_utils.py @@ -71,7 +71,7 @@ def test_get_read_messages(self): ) self.assertEqual( get_read_messages(a_mailbox), - ["a-read", "a-unprocessable"], + {"a-read", "a-unprocessable"}, ) b_mailbox = MailboxConfigFactory() @@ -92,7 +92,7 @@ def test_get_read_messages(self): ) self.assertEqual( get_read_messages(b_mailbox), - ["b-read", "b-unprocessable"], + {"b-read", "b-unprocessable"}, ) diff --git a/mailboxes/utils.py b/mailboxes/utils.py index e0319350..963f3174 100644 --- a/mailboxes/utils.py +++ b/mailboxes/utils.py @@ -6,7 +6,6 @@ from django.conf import settings from mailboxes.enums import MailReadStatuses -from mailboxes.models import MailReadStatus logger = logging.getLogger(__name__) @@ -35,12 +34,14 @@ def get_message_id(msg_header): def get_read_messages(mailbox_config): - return [ - str(m.message_id) - for m in MailReadStatus.objects.filter( - mailbox=mailbox_config, status__in=[MailReadStatuses.READ, MailReadStatuses.UNPROCESSABLE] - ) - ] + mail_read_statuses = mailbox_config.mailreadstatus_set.filter( + status__in=[ + MailReadStatuses.READ, + MailReadStatuses.UNPROCESSABLE, + ] + ) + read_message_ids = set(mail_read_statuses.values_list("message_id", flat=True)) + return read_message_ids def is_from_valid_sender(msg_header, valid_addresses): From d3cf6b3602bd3d218a68fcdb5ceb1cbeb4423730 Mon Sep 17 00:00:00 2001 From: Kevin Carrogan Date: Mon, 23 Dec 2024 13:24:55 +0000 Subject: [PATCH 15/38] Set reverse relation name for mail read status --- .../0002_alter_mailreadstatus_mailbox.py | 23 +++++++++++++++++++ mailboxes/models.py | 6 ++++- mailboxes/utils.py | 2 +- 3 files changed, 29 insertions(+), 2 deletions(-) create mode 100644 mailboxes/migrations/0002_alter_mailreadstatus_mailbox.py diff --git a/mailboxes/migrations/0002_alter_mailreadstatus_mailbox.py b/mailboxes/migrations/0002_alter_mailreadstatus_mailbox.py new file mode 100644 index 00000000..bd08b016 --- /dev/null +++ b/mailboxes/migrations/0002_alter_mailreadstatus_mailbox.py @@ -0,0 +1,23 @@ +# Generated by Django 4.2.16 on 2024-12-23 11:58 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("mailboxes", "0001_initial"), + ] + + operations = [ + migrations.AlterField( + model_name="mailreadstatus", + name="mailbox", + field=models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="mail_read_statuses", + to="mailboxes.mailboxconfig", + ), + ), + ] diff --git a/mailboxes/models.py b/mailboxes/models.py index 4dfbd114..7ee9d8f0 100644 --- a/mailboxes/models.py +++ b/mailboxes/models.py @@ -30,7 +30,11 @@ class MailReadStatus(TimeStampedModel): help_text="Unique Message-ID of the message that is retrieved from the message header", ) status = models.TextField(choices=MailReadStatuses.choices, default=MailReadStatuses.UNREAD, db_index=True) - mailbox = models.ForeignKey(MailboxConfig, on_delete=models.CASCADE) + mailbox = models.ForeignKey( + MailboxConfig, + on_delete=models.CASCADE, + related_name="mail_read_statuses", + ) class Meta: db_table = ( diff --git a/mailboxes/utils.py b/mailboxes/utils.py index 963f3174..f2a314f9 100644 --- a/mailboxes/utils.py +++ b/mailboxes/utils.py @@ -34,7 +34,7 @@ def get_message_id(msg_header): def get_read_messages(mailbox_config): - mail_read_statuses = mailbox_config.mailreadstatus_set.filter( + mail_read_statuses = mailbox_config.mail_read_statuses.filter( status__in=[ MailReadStatuses.READ, MailReadStatuses.UNPROCESSABLE, From 73740b3781d18a70c5394ea89b954383c052fa58 Mon Sep 17 00:00:00 2001 From: Kevin Carrogan Date: Mon, 23 Dec 2024 13:29:47 +0000 Subject: [PATCH 16/38] Jump out of email loop earlier if message id has already been read --- mail/libraries/mailbox_service.py | 107 +++++++++++++++--------------- 1 file changed, 55 insertions(+), 52 deletions(-) diff --git a/mail/libraries/mailbox_service.py b/mail/libraries/mailbox_service.py index ea08ea0d..e4dd82cd 100644 --- a/mail/libraries/mailbox_service.py +++ b/mail/libraries/mailbox_service.py @@ -8,7 +8,7 @@ from mail.libraries.helpers import to_mail_message_dto from mail.models import Mail from mailboxes.enums import MailReadStatuses -from mailboxes.models import MailboxConfig, MailReadStatus +from mailboxes.models import MailboxConfig from mailboxes.utils import ( get_message_header, get_message_id, @@ -52,60 +52,63 @@ def get_message_iterator(pop3_connection: POP3_SSL, username: str) -> Iterator[T logger.info("Number of messages READ/UNPROCESSABLE in %s are %s", mailbox_config.username, len(read_messages)) for message_id, message_num in mail_message_ids: + if message_id in read_messages: + continue + # only return messages we haven't seen before - if message_id not in read_messages: - read_status, _ = MailReadStatus.objects.get_or_create( - message_id=message_id, message_num=message_num, mailbox=mailbox_config + read_status, _ = mailbox_config.mail_read_statuses.get_or_create( + message_id=message_id, + message_num=message_num, + ) + + def mark_status(status): + """ + :param status: A choice from `MailReadStatuses.choices` + """ + logger.info( + "Marking message_id %s with message_num %s from %s as %s", + message_id, + message_num, + read_status.mailbox, + status, + ) + read_status.status = status + read_status.save() + + try: + m = pop3_connection.retr(message_num) + logger.info( + "Retrieved message_id %s with message_num %s from %s", + message_id, + message_num, + read_status.mailbox, ) + except error_proto as err: + logger.error( + "Unable to RETR message num %s with Message-ID %s in %s: %s", + message_num, + message_id, + mailbox_config, + err, + exc_info=True, + ) + continue + + try: + mail_message = to_mail_message_dto(m) + except ValueError as ve: + logger.error( + "Unable to convert message num %s with Message-Id %s to DTO in %s: %s", + message_num, + message_id, + mailbox_config, + ve, + exc_info=True, + ) + mark_status(MailReadStatuses.UNPROCESSABLE) + continue - def mark_status(status): - """ - :param status: A choice from `MailReadStatuses.choices` - """ - logger.info( - "Marking message_id %s with message_num %s from %s as %s", - message_id, - message_num, - read_status.mailbox.username, - status, - ) - read_status.status = status - read_status.save() - - try: - m = pop3_connection.retr(message_num) - logger.info( - "Retrieved message_id %s with message_num %s from %s", - message_id, - message_num, - read_status.mailbox.username, - ) - except error_proto as err: - logger.error( - "Unable to RETR message num %s with Message-ID %s in %s: %s", - message_num, - message_id, - mailbox_config, - err, - exc_info=True, - ) - continue - - try: - mail_message = to_mail_message_dto(m) - except ValueError as ve: - logger.error( - "Unable to convert message num %s with Message-Id %s to DTO in %s: %s", - message_num, - message_id, - mailbox_config, - ve, - exc_info=True, - ) - mark_status(MailReadStatuses.UNPROCESSABLE) - continue - - yield mail_message, mark_status + yield mail_message, mark_status def find_mail_of(extract_types: List[str], reception_status: str) -> Mail or None: From 1822cacc71c988a02141676bbb428bb3b9c6a9a1 Mon Sep 17 00:00:00 2001 From: Kevin Carrogan Date: Mon, 23 Dec 2024 14:29:06 +0000 Subject: [PATCH 17/38] Split out getting unread messages into separate function --- mail/libraries/mailbox_service.py | 46 +------- mailboxes/tests/test_utils.py | 186 ++++++++++++++++++++++++++++++ mailboxes/utils.py | 28 +++++ 3 files changed, 220 insertions(+), 40 deletions(-) diff --git a/mail/libraries/mailbox_service.py b/mail/libraries/mailbox_service.py index e4dd82cd..4b30af67 100644 --- a/mail/libraries/mailbox_service.py +++ b/mail/libraries/mailbox_service.py @@ -9,53 +9,19 @@ from mail.models import Mail from mailboxes.enums import MailReadStatuses from mailboxes.models import MailboxConfig -from mailboxes.utils import ( - get_message_header, - get_message_id, - get_message_number, - get_read_messages, - is_from_valid_sender, -) +from mailboxes.utils import get_unread_message_ids logger = logging.getLogger(__name__) def get_message_iterator(pop3_connection: POP3_SSL, username: str) -> Iterator[Tuple[EmailMessageDto, Callable]]: - mails: list - _, mails, _ = pop3_connection.list() mailbox_config, _ = MailboxConfig.objects.get_or_create(username=username) - incoming_email_check_limit = settings.INCOMING_EMAIL_CHECK_LIMIT - # Check only the emails specified in the setting - # Since we don't delete emails from these mailboxes the total number can be very high over a period of time - # and increases the processing time. - # The mails is a list of message number and size - message number is an increasing value so the - # latest emails will always be at the end. - mail_message_ids = [] - for m in mails[-incoming_email_check_limit:]: - msg_num = get_message_number(m) - message_header = get_message_header(pop3_connection, msg_num) - if not is_from_valid_sender(message_header, [settings.SPIRE_FROM_ADDRESS, settings.HMRC_TO_DIT_REPLY_ADDRESS]): - logger.warning( - "Found mail with message_num %s that is not from SPIRE (%s) or HMRC (%s), skipping ...", - msg_num, - settings.SPIRE_FROM_ADDRESS, - settings.HMRC_TO_DIT_REPLY_ADDRESS, - ) - continue - message_id = get_message_id(message_header) - logger.info("Extracted Message-Id as %s for the message_num %s", message_id, msg_num) - mail_message_ids.append(message_id, msg_num) - - # these are mailbox message ids we've seen before - read_messages = get_read_messages(mailbox_config) - logger.info("Number of messages READ/UNPROCESSABLE in %s are %s", mailbox_config.username, len(read_messages)) - - for message_id, message_num in mail_message_ids: - if message_id in read_messages: - continue - - # only return messages we haven't seen before + for message_id, message_num in get_unread_message_ids( + pop3_connection, + mailbox_config, + settings.INCOMING_EMAIL_CHECK_LIMIT, + ): read_status, _ = mailbox_config.mail_read_statuses.get_or_create( message_id=message_id, message_num=message_num, diff --git a/mailboxes/tests/test_utils.py b/mailboxes/tests/test_utils.py index fe852df3..906ed236 100644 --- a/mailboxes/tests/test_utils.py +++ b/mailboxes/tests/test_utils.py @@ -2,6 +2,7 @@ from email.message import Message from unittest.mock import MagicMock +from django.conf import settings from django.test import SimpleTestCase, TestCase, override_settings from parameterized import parameterized @@ -12,6 +13,7 @@ get_message_id, get_message_number, get_read_messages, + get_unread_message_ids, is_from_valid_sender, ) @@ -126,3 +128,187 @@ def test_is_from_invalid_sender(self, valid_address): message["From"] = from_header self.assertFalse(is_from_valid_sender(message, [valid_address])) + + +class GetUnreadMessageIdsTests(TestCase): + def test_unread_message_ids(self): + pop3_connection = MagicMock() + mailbox = MailboxConfigFactory() + + pop3_connection.list.return_value = ( + MagicMock(), + [ + b"1 11111", + b"2 22222", + b"3 33333", + ], + MagicMock(), + ) + + def _top(which, howmuch): + self.assertEqual(howmuch, 0) + msg = Message() + msg["Message-Id"] = Header(f"") # /PS-IGNORE + msg["From"] = Header(settings.SPIRE_FROM_ADDRESS) + return MagicMock(), msg.as_bytes().split(b"\n\n"), MagicMock() + + pop3_connection.top.side_effect = _top + + self.assertEqual( + list(get_unread_message_ids(pop3_connection, mailbox)), + [ + ("message-id-1", "1"), + ("message-id-2", "2"), + ("message-id-3", "3"), + ], + ) + + def test_unread_message_ids_skips_read_messages(self): + pop3_connection = MagicMock() + mailbox = MailboxConfigFactory() + + pop3_connection.list.return_value = ( + MagicMock(), + [ + b"1 11111", + b"2 22222", + b"3 33333", + ], + MagicMock(), + ) + + def _top(which, howmuch): + self.assertEqual(howmuch, 0) + msg = Message() + msg["Message-Id"] = Header(f"") # /PS-IGNORE + msg["From"] = Header(settings.SPIRE_FROM_ADDRESS) + return MagicMock(), msg.as_bytes().split(b"\n\n"), MagicMock() + + pop3_connection.top.side_effect = _top + + MailReadStatusFactory( + mailbox=mailbox, + status=MailReadStatuses.READ, + message_id="message-id-1", + ) + MailReadStatusFactory( + mailbox=mailbox, + status=MailReadStatuses.UNPROCESSABLE, + message_id="message-id-3", + ) + + self.assertEqual( + list(get_unread_message_ids(pop3_connection, mailbox)), + [ + ("message-id-2", "2"), + ], + ) + + def test_unread_message_ids_skips_read_messages(self): + pop3_connection = MagicMock() + mailbox = MailboxConfigFactory() + + pop3_connection.list.return_value = ( + MagicMock(), + [ + b"1 11111", + b"2 22222", + b"3 33333", + ], + MagicMock(), + ) + + def _top(which, howmuch): + self.assertEqual(howmuch, 0) + msg = Message() + msg["Message-Id"] = Header(f"") # /PS-IGNORE + msg["From"] = Header(settings.SPIRE_FROM_ADDRESS) + return MagicMock(), msg.as_bytes().split(b"\n\n"), MagicMock() + + pop3_connection.top.side_effect = _top + + MailReadStatusFactory( + mailbox=mailbox, + status=MailReadStatuses.READ, + message_id="message-id-1", + ) + MailReadStatusFactory( + mailbox=mailbox, + status=MailReadStatuses.UNPROCESSABLE, + message_id="message-id-3", + ) + + self.assertEqual( + list(get_unread_message_ids(pop3_connection, mailbox)), + [ + ("message-id-2", "2"), + ], + ) + + def test_unread_message_ids_skips_invalid_senders(self): + pop3_connection = MagicMock() + mailbox = MailboxConfigFactory() + + sender_map = { + "1": settings.SPIRE_FROM_ADDRESS, + "2": "invalid@example.com", # /PS-IGNORE + "3": settings.HMRC_TO_DIT_REPLY_ADDRESS, + } + + pop3_connection.list.return_value = ( + MagicMock(), + [ + b"1 11111", + b"2 22222", + b"3 33333", + ], + MagicMock(), + ) + + def _top(which, howmuch): + self.assertEqual(howmuch, 0) + msg = Message() + msg["Message-Id"] = Header(f"") # /PS-IGNORE + msg["From"] = Header(sender_map[which]) + return MagicMock(), msg.as_bytes().split(b"\n\n"), MagicMock() + + pop3_connection.top.side_effect = _top + + self.assertEqual( + list(get_unread_message_ids(pop3_connection, mailbox)), + [ + ("message-id-1", "1"), + ("message-id-3", "3"), + ], + ) + + def test_unread_message_ids_checks_up_to_limit(self): + pop3_connection = MagicMock() + mailbox = MailboxConfigFactory() + + pop3_connection.list.return_value = ( + MagicMock(), + [ + b"1 11111", + b"2 22222", + b"3 33333", + ], + MagicMock(), + ) + + def _top(which, howmuch): + self.assertEqual(howmuch, 0) + msg = Message() + msg["Message-Id"] = Header(f"") # /PS-IGNORE + msg["From"] = Header(settings.SPIRE_FROM_ADDRESS) + return MagicMock(), msg.as_bytes().split(b"\n\n"), MagicMock() + + pop3_connection.top.side_effect = _top + + self.assertEqual( + list(get_unread_message_ids(pop3_connection, mailbox, 2)), + [ + ("message-id-2", "2"), + ("message-id-3", "3"), + ], + ) diff --git a/mailboxes/utils.py b/mailboxes/utils.py index f2a314f9..e66ab50a 100644 --- a/mailboxes/utils.py +++ b/mailboxes/utils.py @@ -1,4 +1,5 @@ import logging +import sys from email.headerregistry import Address from email.parser import BytesHeaderParser from email.utils import parseaddr @@ -44,6 +45,33 @@ def get_read_messages(mailbox_config): return read_message_ids +def get_unread_message_ids(pop3_connection, mailbox_config, incoming_email_check_limit=sys.maxsize): + read_messages = get_read_messages(mailbox_config) + logger.info("Number of messages READ/UNPROCESSABLE in %s are %s", mailbox_config.username, len(read_messages)) + + _, mails, _ = pop3_connection.list() + mails = mails[-incoming_email_check_limit:] + for mail in mails: + message_num = get_message_number(mail) + message_header = get_message_header(pop3_connection, message_num) + + if not is_from_valid_sender(message_header, [settings.SPIRE_FROM_ADDRESS, settings.HMRC_TO_DIT_REPLY_ADDRESS]): + logger.warning( + "Found mail with message_num %s that is not from SPIRE (%s) or HMRC (%s), skipping ...", + message_num, + settings.SPIRE_FROM_ADDRESS, + settings.HMRC_TO_DIT_REPLY_ADDRESS, + ) + continue + + message_id = get_message_id(message_header) + logger.info("Extracted Message-Id as %s for the message_num %s", message_id, message_num) + if message_id in read_messages: + continue + + yield message_id, message_num + + def is_from_valid_sender(msg_header, valid_addresses): _, from_address = parseaddr(str(msg_header["From"])) valid_addresses = [address.replace("From: ", "") for address in valid_addresses] From e2c2b8dddd1b8f8b6016a8bb8e736acde71b2883 Mon Sep 17 00:00:00 2001 From: Kevin Carrogan Date: Mon, 23 Dec 2024 18:44:34 +0000 Subject: [PATCH 18/38] Move out message iterator function --- mail/libraries/helpers.py | 3 - mail/libraries/mailbox_service.py | 75 +---------- mail/libraries/routing_controller.py | 13 +- mailboxes/tests/test_utils.py | 190 ++++++++++++++++++++++++++- mailboxes/utils.py | 72 ++++++++++ 5 files changed, 269 insertions(+), 84 deletions(-) diff --git a/mail/libraries/helpers.py b/mail/libraries/helpers.py index c6e778e5..c1a28f9a 100644 --- a/mail/libraries/helpers.py +++ b/mail/libraries/helpers.py @@ -1,6 +1,5 @@ import json import logging - from email.message import Message from email.parser import Parser from json.decoder import JSONDecodeError @@ -14,7 +13,6 @@ from mail.libraries.email_message_dto import EmailMessageDto, HmrcEmailMessageDto from mail.models import GoodIdMapping, LicenceData, LicenceIdMapping, Mail, UsageData - ALLOWED_FILE_MIMETYPES = ["application/octet-stream", "text/plain"] @@ -117,7 +115,6 @@ def get_run_number(patterned_text: str) -> int: Gets run-number from a patterned text: abc_xyz_nnn_yyy_1234_datetime. :returns found number; ValueError if it not found or is not a number """ - if patterned_text is None: raise ValueError("None received") diff --git a/mail/libraries/mailbox_service.py b/mail/libraries/mailbox_service.py index 4b30af67..df62218f 100644 --- a/mail/libraries/mailbox_service.py +++ b/mail/libraries/mailbox_service.py @@ -1,83 +1,12 @@ import logging -from poplib import POP3_SSL, error_proto -from typing import Callable, Iterator, List, Tuple +from typing import List, Optional -from django.conf import settings - -from mail.libraries.email_message_dto import EmailMessageDto -from mail.libraries.helpers import to_mail_message_dto from mail.models import Mail -from mailboxes.enums import MailReadStatuses -from mailboxes.models import MailboxConfig -from mailboxes.utils import get_unread_message_ids logger = logging.getLogger(__name__) -def get_message_iterator(pop3_connection: POP3_SSL, username: str) -> Iterator[Tuple[EmailMessageDto, Callable]]: - mailbox_config, _ = MailboxConfig.objects.get_or_create(username=username) - - for message_id, message_num in get_unread_message_ids( - pop3_connection, - mailbox_config, - settings.INCOMING_EMAIL_CHECK_LIMIT, - ): - read_status, _ = mailbox_config.mail_read_statuses.get_or_create( - message_id=message_id, - message_num=message_num, - ) - - def mark_status(status): - """ - :param status: A choice from `MailReadStatuses.choices` - """ - logger.info( - "Marking message_id %s with message_num %s from %s as %s", - message_id, - message_num, - read_status.mailbox, - status, - ) - read_status.status = status - read_status.save() - - try: - m = pop3_connection.retr(message_num) - logger.info( - "Retrieved message_id %s with message_num %s from %s", - message_id, - message_num, - read_status.mailbox, - ) - except error_proto as err: - logger.error( - "Unable to RETR message num %s with Message-ID %s in %s: %s", - message_num, - message_id, - mailbox_config, - err, - exc_info=True, - ) - continue - - try: - mail_message = to_mail_message_dto(m) - except ValueError as ve: - logger.error( - "Unable to convert message num %s with Message-Id %s to DTO in %s: %s", - message_num, - message_id, - mailbox_config, - ve, - exc_info=True, - ) - mark_status(MailReadStatuses.UNPROCESSABLE) - continue - - yield mail_message, mark_status - - -def find_mail_of(extract_types: List[str], reception_status: str) -> Mail or None: +def find_mail_of(extract_types: List[str], reception_status: str) -> Optional[Mail]: try: mail = Mail.objects.get(status=reception_status, extract_type__in=extract_types) except Mail.DoesNotExist: diff --git a/mail/libraries/routing_controller.py b/mail/libraries/routing_controller.py index f017c631..4567f140 100644 --- a/mail/libraries/routing_controller.py +++ b/mail/libraries/routing_controller.py @@ -20,11 +20,11 @@ select_email_for_sending, sort_dtos_by_date, ) -from mail.libraries.mailbox_service import get_message_iterator from mail.models import Mail from mail_servers.servers import MailServer, smtp_send from mail_servers.utils import get_mail_server from mailboxes.enums import MailReadStatuses +from mailboxes.utils import get_message_iterator logger = logging.getLogger(__name__) @@ -184,12 +184,11 @@ def _collect_and_send(mail: Mail): def get_email_message_dtos(server: MailServer, number: Optional[int] = 3) -> List[Tuple[EmailMessageDto, Callable]]: - with server.connect_to_pop3() as pop3_connection: - emails_iter = get_message_iterator(pop3_connection, server.user) - if number: - emails = list(islice(emails_iter, number)) - else: - emails = list(emails_iter) + emails_iter = get_message_iterator(server) + if number: + emails = list(islice(emails_iter, number)) + else: + emails = list(emails_iter) return emails diff --git a/mailboxes/tests/test_utils.py b/mailboxes/tests/test_utils.py index 906ed236..9d116da0 100644 --- a/mailboxes/tests/test_utils.py +++ b/mailboxes/tests/test_utils.py @@ -1,16 +1,23 @@ +import datetime from email.header import Header from email.message import Message -from unittest.mock import MagicMock +from poplib import error_proto +from unittest.mock import MagicMock, patch +from dateutil.tz import tzlocal from django.conf import settings from django.test import SimpleTestCase, TestCase, override_settings from parameterized import parameterized +from mail.libraries.email_message_dto import EmailMessageDto +from mail_servers.servers import MailServer from mailboxes.enums import MailReadStatuses +from mailboxes.models import MailboxConfig from mailboxes.tests.factories import MailboxConfigFactory, MailReadStatusFactory from mailboxes.utils import ( get_message_header, get_message_id, + get_message_iterator, get_message_number, get_read_messages, get_unread_message_ids, @@ -312,3 +319,184 @@ def _top(which, howmuch): ("message-id-3", "3"), ], ) + + +class GetMessageIteratorTests(TestCase): + def test_get_message_iterator(self): + self.assertEqual(MailboxConfig.objects.count(), 0) + + mail_server = MagicMock(spec=MailServer) + type(mail_server).username = "test@example.com" # /PS-IGNORE + + mock_pop3_connection = mail_server.connect_to_pop3().__enter__() + mock_pop3_connection.list.return_value = ( + MagicMock(), + [ + b"1 11111", + b"2 22222", + b"3 33333", + ], + MagicMock(), + ) + + def _top(which, howmuch): + self.assertEqual(howmuch, 0) + msg = Message() + msg["Message-Id"] = Header(f"") # /PS-IGNORE + msg["To"] = Header("to@example.com") # /PS-IGNORE + msg["From"] = Header(settings.SPIRE_FROM_ADDRESS) + msg["Date"] = Header("2021-04-23T12:38Z") + msg["Subject"] = Header(f"abc_xyz_nnn_yyy_{which}_datetime") + return MagicMock(), msg.as_bytes().split(b"\n\n"), MagicMock() + + mock_pop3_connection.top.side_effect = _top + + def _retr(which): + msg = Message() + msg["Message-Id"] = Header(f"") # /PS-IGNORE + msg["To"] = Header("to@example.com") # /PS-IGNORE + msg["From"] = Header(settings.SPIRE_FROM_ADDRESS) + msg["Date"] = Header("2021-04-23T12:38Z") + msg["Subject"] = Header(f"abc_xyz_nnn_yyy_{which}_datetime") + return b"+OK", msg.as_bytes().split(b"\n\n"), len(msg.as_bytes()) + + mock_pop3_connection.retr.side_effect = _retr + + iterator = get_message_iterator(mail_server) + dtos, funcs = zip(*iterator) + + self.assertEqual( + list(dtos), + [ + EmailMessageDto( + run_number=1, + sender=settings.SPIRE_FROM_ADDRESS, + receiver="to@example.com", # /PS-IGNORE + date=datetime.datetime(2021, 4, 23, 12, 38, tzinfo=tzlocal()), + subject="abc_xyz_nnn_yyy_1_datetime", + body=b"", + attachment=[None, None], + raw_data=f"(b'+OK', [b'Message-Id: \\nTo: to@example.com\\nFrom: {settings.SPIRE_FROM_ADDRESS}\\nDate: 2021-04-23T12:38Z\\nSubject: abc_xyz_nnn_yyy_1_datetime', b''], 143)", # /PS-IGNORE + ), + EmailMessageDto( + run_number=2, + sender=settings.SPIRE_FROM_ADDRESS, + receiver="to@example.com", # /PS-IGNORE + date=datetime.datetime(2021, 4, 23, 12, 38, tzinfo=tzlocal()), + subject="abc_xyz_nnn_yyy_2_datetime", + body=b"", + attachment=[None, None], + raw_data=f"(b'+OK', [b'Message-Id: \\nTo: to@example.com\\nFrom: {settings.SPIRE_FROM_ADDRESS}\\nDate: 2021-04-23T12:38Z\\nSubject: abc_xyz_nnn_yyy_2_datetime', b''], 143)", # /PS-IGNORE + ), + EmailMessageDto( + run_number=3, + sender=settings.SPIRE_FROM_ADDRESS, + receiver="to@example.com", # /PS-IGNORE + date=datetime.datetime(2021, 4, 23, 12, 38, tzinfo=tzlocal()), + subject="abc_xyz_nnn_yyy_3_datetime", + body=b"", + attachment=[None, None], + raw_data=f"(b'+OK', [b'Message-Id: \\nTo: to@example.com\\nFrom: {settings.SPIRE_FROM_ADDRESS}\\nDate: 2021-04-23T12:38Z\\nSubject: abc_xyz_nnn_yyy_3_datetime', b''], 143)", # /PS-IGNORE + ), + ], + ) + + mailbox = MailboxConfig.objects.get(username="test@example.com") # /PS-IGNORE + self.assertQuerySetEqual( + mailbox.mail_read_statuses.order_by("message_id"), + [ + "", + "", + "", + ], + transform=repr, + ) + + funcs[1](MailReadStatuses.READ) + funcs[2](MailReadStatuses.UNPROCESSABLE) + self.assertQuerySetEqual( + mailbox.mail_read_statuses.order_by("message_id"), + [ + "", + "", + "", + ], + transform=repr, + ) + + def test_get_message_iterator_retr_failure(self): + self.assertEqual(MailboxConfig.objects.count(), 0) + + mail_server = MagicMock(spec=MailServer) + type(mail_server).username = "test@example.com" # /PS-IGNORE + + mock_pop3_connection = mail_server.connect_to_pop3().__enter__() + mock_pop3_connection.list.return_value = ( + MagicMock(), + [b"1 11111"], + MagicMock(), + ) + + def _top(which, howmuch): + self.assertEqual(howmuch, 0) + msg = Message() + msg["Message-Id"] = Header(f"") # /PS-IGNORE + msg["To"] = Header("to@example.com") # /PS-IGNORE + msg["From"] = Header(settings.SPIRE_FROM_ADDRESS) + msg["Date"] = Header("2021-04-23T12:38Z") + msg["Subject"] = Header(f"abc_xyz_nnn_yyy_{which}_datetime") + return MagicMock(), msg.as_bytes().split(b"\n\n"), MagicMock() + + mock_pop3_connection.top.side_effect = _top + + mock_pop3_connection.retr.side_effect = error_proto() + + iterator = get_message_iterator(mail_server) + self.assertEqual(list(iterator), []) + + mailbox = MailboxConfig.objects.get(username="test@example.com") # /PS-IGNORE + self.assertQuerySetEqual( + mailbox.mail_read_statuses.order_by("message_id"), + [""], # /PS-IGNORE + transform=repr, + ) + + @patch("mailboxes.utils.to_mail_message_dto") + def test_get_message_iterator_message_dto_failure(self, mock_to_mail_message_dto): + self.assertEqual(MailboxConfig.objects.count(), 0) + + mail_server = MagicMock(spec=MailServer) + type(mail_server).username = "test@example.com" # /PS-IGNORE + + mock_pop3_connection = mail_server.connect_to_pop3().__enter__() + mock_pop3_connection.list.return_value = ( + MagicMock(), + [b"1 11111"], + MagicMock(), + ) + + def _top(which, howmuch): + self.assertEqual(howmuch, 0) + msg = Message() + msg["Message-Id"] = Header(f"") # /PS-IGNORE + msg["To"] = Header("to@example.com") # /PS-IGNORE + msg["From"] = Header(settings.SPIRE_FROM_ADDRESS) + msg["Date"] = Header("2021-04-23T12:38Z") + msg["Subject"] = Header(f"abc_xyz_nnn_yyy_{which}_datetime") + return MagicMock(), msg.as_bytes().split(b"\n\n"), MagicMock() + + mock_pop3_connection.top.side_effect = _top + + mock_pop3_connection.retr.return_value = "UNCOVERTABLE" + + mock_to_mail_message_dto.side_effect = ValueError() + + iterator = get_message_iterator(mail_server) + self.assertEqual(list(iterator), []) + + mailbox = MailboxConfig.objects.get(username="test@example.com") # /PS-IGNORE + self.assertQuerySetEqual( + mailbox.mail_read_statuses.order_by("message_id"), + [""], # /PS-IGNORE + transform=repr, + ) diff --git a/mailboxes/utils.py b/mailboxes/utils.py index e66ab50a..0ca65e5e 100644 --- a/mailboxes/utils.py +++ b/mailboxes/utils.py @@ -3,10 +3,16 @@ from email.headerregistry import Address from email.parser import BytesHeaderParser from email.utils import parseaddr +from poplib import error_proto +from typing import Callable, Iterator, Tuple from django.conf import settings +from mail.libraries.email_message_dto import EmailMessageDto +from mail.libraries.helpers import to_mail_message_dto +from mail_servers.servers import MailServer from mailboxes.enums import MailReadStatuses +from mailboxes.models import MailboxConfig logger = logging.getLogger(__name__) @@ -77,3 +83,69 @@ def is_from_valid_sender(msg_header, valid_addresses): valid_addresses = [address.replace("From: ", "") for address in valid_addresses] return from_address in valid_addresses + + +class MarkStatus: + def __init__(self, message_id, message_num, read_status): + self.message_id = message_id + self.message_num = message_num + self.read_status = read_status + + def __call__(self, status): + logger.info( + "Marking message_id %s with message_num %s from %r as %s", + self.message_id, + self.message_num, + self.read_status.mailbox, + status, + ) + self.read_status.status = status + self.read_status.save() + + +def get_message_iterator(server: MailServer) -> Iterator[Tuple[EmailMessageDto, Callable]]: + mailbox_config, _ = MailboxConfig.objects.get_or_create(username=server.username) + + with server.connect_to_pop3() as pop3_connection: + for message_id, message_num in get_unread_message_ids( + pop3_connection, + mailbox_config, + settings.INCOMING_EMAIL_CHECK_LIMIT, + ): + read_status, _ = mailbox_config.mail_read_statuses.get_or_create( + message_id=message_id, + message_num=message_num, + ) + + mark_status = MarkStatus(message_id, message_num, read_status) + + try: + m = pop3_connection.retr(message_num) + logger.info( + "Retrieved message_id %s with message_num %s from %s", + message_id, + message_num, + read_status.mailbox, + ) + except error_proto: + logger.exception( + "Unable to RETR message num %s with Message-ID %s in %r", + message_num, + message_id, + mailbox_config, + ) + continue + + try: + mail_message = to_mail_message_dto(m) + except ValueError: + mark_status(MailReadStatuses.UNPROCESSABLE) + logger.exception( + "Unable to convert message num %s with Message-Id %s to DTO in %r", + message_num, + message_id, + mailbox_config, + ) + continue + + yield mail_message, mark_status From d46f598d3cd337ee809379da070020ff54410eeb Mon Sep 17 00:00:00 2001 From: Kevin Carrogan Date: Mon, 23 Dec 2024 20:40:12 +0000 Subject: [PATCH 19/38] Rewrite getting message iterator --- mail/tests/test_dtos.py | 12 +- mailboxes/tests/test_utils.py | 458 +++++++++++++++++++++++----------- mailboxes/utils.py | 182 ++++++++------ 3 files changed, 410 insertions(+), 242 deletions(-) diff --git a/mail/tests/test_dtos.py b/mail/tests/test_dtos.py index 4f9bc3bb..97a4a9c5 100644 --- a/mail/tests/test_dtos.py +++ b/mail/tests/test_dtos.py @@ -1,4 +1,3 @@ -from contextlib import contextmanager from unittest.mock import Mock, patch from dateutil.parser import parse @@ -91,20 +90,11 @@ class TestGetEmailMessagesDTOs(SimpleTestCase): @patch("mail.libraries.routing_controller.get_message_iterator") def test_get_email_message_dtos(self, mock_get_message_iterator): mock_mail_server = Mock(spec=MailServer) - mock_connection = Mock() - @contextmanager - def _mock_connect_to_pop3(): - yield mock_connection - - mock_mail_server.connect_to_pop3.side_effect = _mock_connect_to_pop3 mock_emails = [Mock(), Mock()] mock_get_message_iterator.return_value = mock_emails emails = get_email_message_dtos(mock_mail_server) self.assertEqual(emails, mock_emails) - mock_get_message_iterator.assert_called_once_with( - mock_connection, - mock_mail_server.user, - ) + mock_get_message_iterator.assert_called_once_with(mock_mail_server) diff --git a/mailboxes/tests/test_utils.py b/mailboxes/tests/test_utils.py index 9d116da0..7548e96c 100644 --- a/mailboxes/tests/test_utils.py +++ b/mailboxes/tests/test_utils.py @@ -15,12 +15,13 @@ from mailboxes.models import MailboxConfig from mailboxes.tests.factories import MailboxConfigFactory, MailReadStatusFactory from mailboxes.utils import ( + MailboxMessage, + MarkStatus, get_message_header, get_message_id, get_message_iterator, get_message_number, get_read_messages, - get_unread_message_ids, is_from_valid_sender, ) @@ -105,13 +106,11 @@ def test_get_read_messages(self): ) -class IsFromValidSenderTests(SimpleTestCase): +class IsFromValidSenderTests(TestCase): @parameterized.expand( [ - "From: spire.from.address@example.com", # /PS-IGNORE - "From: hmrc.to.dit.reply.address@example.com", # /PS-IGNORE - "spire.from.address@example.com", # /PS-IGNORE - "hmrc.to.dit.reply.address@example.com", # /PS-IGNORE + "From: valid@example.com", # /PS-IGNORE + "valid@example.com", # /PS-IGNORE ] ) def test_is_from_valid_sender(self, valid_address): @@ -119,14 +118,22 @@ def test_is_from_valid_sender(self, valid_address): from_header = Header(f"Sender <{valid_address.replace('From: ', '')}>") # /PS-IGNORE message["From"] = from_header + pop3_connection = MagicMock() + pop3_connection.top.return_value = MagicMock(), message.as_bytes().split(b"\n"), MagicMock() + + mailbox_config = MailboxConfigFactory() + + message = MailboxMessage( + pop3_connection, + mailbox_config, + "1", + ) self.assertTrue(is_from_valid_sender(message, [valid_address])) @parameterized.expand( [ - "From: spire.from.address@example.com", # /PS-IGNORE - "From: hmrc.to.dit.reply.address@example.com", # /PS-IGNORE - "spire.from.address@example.com", # /PS-IGNORE - "hmrc.to.dit.reply.address@example.com", # /PS-IGNORE + "From: valid@example.com", # /PS-IGNORE + "valid@example.com", # /PS-IGNORE ] ) def test_is_from_invalid_sender(self, valid_address): @@ -134,191 +141,193 @@ def test_is_from_invalid_sender(self, valid_address): from_header = Header("Invalid ") # /PS-IGNORE message["From"] = from_header - self.assertFalse(is_from_valid_sender(message, [valid_address])) - - -class GetUnreadMessageIdsTests(TestCase): - def test_unread_message_ids(self): pop3_connection = MagicMock() - mailbox = MailboxConfigFactory() + pop3_connection.top.return_value = MagicMock(), message.as_bytes().split(b"\n"), MagicMock() - pop3_connection.list.return_value = ( - MagicMock(), - [ - b"1 11111", - b"2 22222", - b"3 33333", - ], - MagicMock(), + mailbox_config = MailboxConfigFactory() + + message = MailboxMessage( + pop3_connection, + mailbox_config, + "1", ) - def _top(which, howmuch): - self.assertEqual(howmuch, 0) - msg = Message() - msg["Message-Id"] = Header(f"") # /PS-IGNORE - msg["From"] = Header(settings.SPIRE_FROM_ADDRESS) - return MagicMock(), msg.as_bytes().split(b"\n\n"), MagicMock() + self.assertFalse(is_from_valid_sender(message, [valid_address])) - pop3_connection.top.side_effect = _top - self.assertEqual( - list(get_unread_message_ids(pop3_connection, mailbox)), - [ - ("message-id-1", "1"), - ("message-id-2", "2"), - ("message-id-3", "3"), - ], - ) +class MarkStatusTests(TestCase): + def test_mark_status_read_status_without_initial_object(self): + mailbox_config = MailboxConfigFactory() + + message = Message() + message["Message-Id"] = Header("<12345@example.com>") # /PS-IGNORE - def test_unread_message_ids_skips_read_messages(self): pop3_connection = MagicMock() - mailbox = MailboxConfigFactory() + pop3_connection.top.return_value = MagicMock(), message.as_bytes().split(b"\n"), MagicMock() - pop3_connection.list.return_value = ( - MagicMock(), - [ - b"1 11111", - b"2 22222", - b"3 33333", - ], - MagicMock(), + mailbox_message = MailboxMessage( + pop3_connection, + mailbox_config, + "1", ) - def _top(which, howmuch): - self.assertEqual(howmuch, 0) - msg = Message() - msg["Message-Id"] = Header(f"") # /PS-IGNORE - msg["From"] = Header(settings.SPIRE_FROM_ADDRESS) - return MagicMock(), msg.as_bytes().split(b"\n\n"), MagicMock() - - pop3_connection.top.side_effect = _top - - MailReadStatusFactory( - mailbox=mailbox, - status=MailReadStatuses.READ, - message_id="message-id-1", + self.assertEqual(mailbox_config.mail_read_statuses.count(), 0) + mark_status = MarkStatus(mailbox_config, mailbox_message) + mail_read_status = mailbox_config.mail_read_statuses.get() + self.assertEqual( + mail_read_status.status, + MailReadStatuses.UNREAD, ) - MailReadStatusFactory( - mailbox=mailbox, - status=MailReadStatuses.UNPROCESSABLE, - message_id="message-id-3", + self.assertEqual( + mail_read_status.message_id, + mailbox_message.message_id, + ) + self.assertEqual( + mail_read_status.message_num, + mailbox_message.message_number, ) + mark_status(MailReadStatuses.READ) + mail_read_status.refresh_from_db() self.assertEqual( - list(get_unread_message_ids(pop3_connection, mailbox)), - [ - ("message-id-2", "2"), - ], + mail_read_status.status, + MailReadStatuses.READ, ) - def test_unread_message_ids_skips_read_messages(self): - pop3_connection = MagicMock() - mailbox = MailboxConfigFactory() + def test_mark_status_read_status_with_initial_object(self): + mailbox_config = MailboxConfigFactory() - pop3_connection.list.return_value = ( - MagicMock(), - [ - b"1 11111", - b"2 22222", - b"3 33333", - ], - MagicMock(), - ) + message = Message() + message["Message-Id"] = Header("<12345@example.com>") # /PS-IGNORE - def _top(which, howmuch): - self.assertEqual(howmuch, 0) - msg = Message() - msg["Message-Id"] = Header(f"") # /PS-IGNORE - msg["From"] = Header(settings.SPIRE_FROM_ADDRESS) - return MagicMock(), msg.as_bytes().split(b"\n\n"), MagicMock() + pop3_connection = MagicMock() + pop3_connection.top.return_value = MagicMock(), message.as_bytes().split(b"\n"), MagicMock() - pop3_connection.top.side_effect = _top + mailbox_message = MailboxMessage( + pop3_connection, + mailbox_config, + "1", + ) - MailReadStatusFactory( - mailbox=mailbox, - status=MailReadStatuses.READ, - message_id="message-id-1", + mailbox_config.mail_read_statuses.create( + message_id="12345", + message_num="1", ) - MailReadStatusFactory( - mailbox=mailbox, - status=MailReadStatuses.UNPROCESSABLE, - message_id="message-id-3", + mark_status = MarkStatus(mailbox_config, mailbox_message) + mail_read_status = mailbox_config.mail_read_statuses.get() + self.assertEqual( + mail_read_status.status, + MailReadStatuses.UNREAD, + ) + self.assertEqual( + mail_read_status.message_id, + mailbox_message.message_id, + ) + self.assertEqual( + mail_read_status.message_num, + mailbox_message.message_number, ) + mark_status(MailReadStatuses.READ) + mail_read_status.refresh_from_db() self.assertEqual( - list(get_unread_message_ids(pop3_connection, mailbox)), - [ - ("message-id-2", "2"), - ], + mail_read_status.status, + MailReadStatuses.READ, ) - def test_unread_message_ids_skips_invalid_senders(self): + +class MailboxMessageTests(TestCase): + def test_message_header(self): + msg = Message() + header = Header("value") + msg["header"] = header + pop3_connection = MagicMock() - mailbox = MailboxConfigFactory() + pop3_connection.top.return_value = MagicMock(), msg.as_bytes().split(b"\n"), MagicMock() + mailbox_config = MailboxConfigFactory() + message_number = "1" - sender_map = { - "1": settings.SPIRE_FROM_ADDRESS, - "2": "invalid@example.com", # /PS-IGNORE - "3": settings.HMRC_TO_DIT_REPLY_ADDRESS, - } + mailbox_message = MailboxMessage(pop3_connection, mailbox_config, message_number) + pop3_connection.top.assert_not_called() - pop3_connection.list.return_value = ( - MagicMock(), - [ - b"1 11111", - b"2 22222", - b"3 33333", - ], - MagicMock(), - ) + self.assertIsInstance(mailbox_message.message_header, Message) + self.assertEqual(mailbox_message.message_header["header"], "value") + pop3_connection.top.assert_called_once_with(message_number, 0) - def _top(which, howmuch): - self.assertEqual(howmuch, 0) - msg = Message() - msg["Message-Id"] = Header(f"") # /PS-IGNORE - msg["From"] = Header(sender_map[which]) - return MagicMock(), msg.as_bytes().split(b"\n\n"), MagicMock() + msg = Message() + header = Header("a different value") + msg["header"] = header + + pop3_connection.top.return_value = MagicMock(), msg.as_bytes().split(b"\n"), MagicMock() - pop3_connection.top.side_effect = _top + self.assertIsInstance(mailbox_message.message_header, Message) + self.assertEqual(mailbox_message.message_header["header"], "value") + pop3_connection.top.assert_called_once_with(message_number, 0) + def test_message_id(self): + mailbox_config = MailboxConfigFactory() + + message = Message() + message["Message-Id"] = Header("<12345@example.com>") # /PS-IGNORE + + pop3_connection = MagicMock() + pop3_connection.top.return_value = MagicMock(), message.as_bytes().split(b"\n"), MagicMock() + + mailbox_message = MailboxMessage( + pop3_connection, + mailbox_config, + "1", + ) self.assertEqual( - list(get_unread_message_ids(pop3_connection, mailbox)), - [ - ("message-id-1", "1"), - ("message-id-3", "3"), - ], + mailbox_message.message_id, + "12345", ) + pop3_connection.top.assert_called_once_with("1", 0) - def test_unread_message_ids_checks_up_to_limit(self): - pop3_connection = MagicMock() - mailbox = MailboxConfigFactory() + message = Message() + message["Message-Id"] = Header("<67890@example.com>") # /PS-IGNORE - pop3_connection.list.return_value = ( - MagicMock(), - [ - b"1 11111", - b"2 22222", - b"3 33333", - ], - MagicMock(), + pop3_connection.top.return_value = MagicMock(), message.as_bytes().split(b"\n"), MagicMock() + self.assertEqual( + mailbox_message.message_id, + "12345", ) + pop3_connection.top.assert_called_once_with("1", 0) - def _top(which, howmuch): - self.assertEqual(howmuch, 0) - msg = Message() - msg["Message-Id"] = Header(f"") # /PS-IGNORE - msg["From"] = Header(settings.SPIRE_FROM_ADDRESS) - return MagicMock(), msg.as_bytes().split(b"\n\n"), MagicMock() + def test_mail_data(self): + mailbox_config = MailboxConfigFactory() - pop3_connection.top.side_effect = _top + message = Message() + message["Message-Id"] = Header("<12345@example.com>") # /PS-IGNORE + pop3_connection = MagicMock() + pop3_connection.top.return_value = MagicMock(), message.as_bytes().split(b"\n"), MagicMock() + mail_data = MagicMock() + pop3_connection.retr.return_value = mail_data + + mailbox_message = MailboxMessage( + pop3_connection, + mailbox_config, + "1", + ) self.assertEqual( - list(get_unread_message_ids(pop3_connection, mailbox, 2)), - [ - ("message-id-2", "2"), - ("message-id-3", "3"), - ], + mailbox_message.mail_data, + mail_data, ) + pop3_connection.retr.assert_called_with("1") + + message = Message() + message["Message-Id"] = Header("<67890@example.com>") # /PS-IGNORE + + pop3_connection.top.return_value = MagicMock(), message.as_bytes().split(b"\n"), MagicMock() + different_mail_data = MagicMock() + pop3_connection.retr.return_value = different_mail_data + + self.assertEqual( + mailbox_message.mail_data, + mail_data, + ) + pop3_connection.retr.assert_called_with("1") class GetMessageIteratorTests(TestCase): @@ -424,6 +433,151 @@ def _retr(which): transform=repr, ) + def test_get_message_iterator_invalid_senders(self): + self.assertEqual(MailboxConfig.objects.count(), 0) + + mail_server = MagicMock(spec=MailServer) + type(mail_server).username = "test@example.com" # /PS-IGNORE + + mock_pop3_connection = mail_server.connect_to_pop3().__enter__() + mock_pop3_connection.list.return_value = ( + MagicMock(), + [ + b"1 11111", + b"2 22222", + b"3 33333", + ], + MagicMock(), + ) + + lookup_from = { + "1": "invalid@example.com", # /PS-IGNORE + "2": settings.SPIRE_FROM_ADDRESS, + "3": "invalid@example.com", # /PS-IGNORE + } + + def _top(which, howmuch): + self.assertEqual(howmuch, 0) + msg = Message() + msg["Message-Id"] = Header(f"") # /PS-IGNORE + msg["To"] = Header("to@example.com") # /PS-IGNORE + msg["From"] = Header(lookup_from[which]) + msg["Date"] = Header("2021-04-23T12:38Z") + msg["Subject"] = Header(f"abc_xyz_nnn_yyy_{which}_datetime") + return MagicMock(), msg.as_bytes().split(b"\n\n"), MagicMock() + + mock_pop3_connection.top.side_effect = _top + + def _retr(which): + msg = Message() + msg["Message-Id"] = Header(f"") # /PS-IGNORE + msg["To"] = Header("to@example.com") # /PS-IGNORE + msg["From"] = Header(lookup_from[which]) + msg["Date"] = Header("2021-04-23T12:38Z") + msg["Subject"] = Header(f"abc_xyz_nnn_yyy_{which}_datetime") + return b"+OK", msg.as_bytes().split(b"\n\n"), len(msg.as_bytes()) + + mock_pop3_connection.retr.side_effect = _retr + + iterator = get_message_iterator(mail_server) + dtos, _ = zip(*iterator) + + self.assertEqual( + list(dtos), + [ + EmailMessageDto( + run_number=2, + sender=settings.SPIRE_FROM_ADDRESS, + receiver="to@example.com", # /PS-IGNORE + date=datetime.datetime(2021, 4, 23, 12, 38, tzinfo=tzlocal()), + subject="abc_xyz_nnn_yyy_2_datetime", + body=b"", + attachment=[None, None], + raw_data=f"(b'+OK', [b'Message-Id: \\nTo: to@example.com\\nFrom: {settings.SPIRE_FROM_ADDRESS}\\nDate: 2021-04-23T12:38Z\\nSubject: abc_xyz_nnn_yyy_2_datetime', b''], 143)", # /PS-IGNORE + ), + ], + ) + + mailbox = MailboxConfig.objects.get(username="test@example.com") # /PS-IGNORE + self.assertQuerySetEqual( + mailbox.mail_read_statuses.order_by("message_id"), + [ + "", + ], + transform=repr, + ) + + def test_get_message_iterator_already_read(self): + self.assertEqual(MailboxConfig.objects.count(), 0) + + mail_server = MagicMock(spec=MailServer) + type(mail_server).username = "test@example.com" # /PS-IGNORE + + mailbox_config = MailboxConfigFactory(username="test@example.com") # /PS-IGNORE + mailbox_config.mail_read_statuses.create( + message_num=1, + message_id="message-id-1", + status=MailReadStatuses.READ, + ) + mailbox_config.mail_read_statuses.create( + message_num=3, + message_id="message-id-3", + status=MailReadStatuses.UNPROCESSABLE, + ) + + mock_pop3_connection = mail_server.connect_to_pop3().__enter__() + mock_pop3_connection.list.return_value = ( + MagicMock(), + [ + b"1 11111", + b"2 22222", + b"3 33333", + ], + MagicMock(), + ) + + def _top(which, howmuch): + self.assertEqual(howmuch, 0) + msg = Message() + msg["Message-Id"] = Header(f"") # /PS-IGNORE + msg["To"] = Header("to@example.com") # /PS-IGNORE + msg["From"] = Header(settings.SPIRE_FROM_ADDRESS) + msg["Date"] = Header("2021-04-23T12:38Z") + msg["Subject"] = Header(f"abc_xyz_nnn_yyy_{which}_datetime") + return MagicMock(), msg.as_bytes().split(b"\n\n"), MagicMock() + + mock_pop3_connection.top.side_effect = _top + + def _retr(which): + msg = Message() + msg["Message-Id"] = Header(f"") # /PS-IGNORE + msg["To"] = Header("to@example.com") # /PS-IGNORE + msg["From"] = Header(settings.SPIRE_FROM_ADDRESS) + msg["Date"] = Header("2021-04-23T12:38Z") + msg["Subject"] = Header(f"abc_xyz_nnn_yyy_{which}_datetime") + return b"+OK", msg.as_bytes().split(b"\n\n"), len(msg.as_bytes()) + + mock_pop3_connection.retr.side_effect = _retr + + iterator = get_message_iterator(mail_server) + dtos, _ = zip(*iterator) + + self.assertEqual( + list(dtos), + [ + EmailMessageDto( + run_number=2, + sender=settings.SPIRE_FROM_ADDRESS, + receiver="to@example.com", # /PS-IGNORE + date=datetime.datetime(2021, 4, 23, 12, 38, tzinfo=tzlocal()), + subject="abc_xyz_nnn_yyy_2_datetime", + body=b"", + attachment=[None, None], + raw_data=f"(b'+OK', [b'Message-Id: \\nTo: to@example.com\\nFrom: {settings.SPIRE_FROM_ADDRESS}\\nDate: 2021-04-23T12:38Z\\nSubject: abc_xyz_nnn_yyy_2_datetime', b''], 143)", # /PS-IGNORE + ), + ], + ) + def test_get_message_iterator_retr_failure(self): self.assertEqual(MailboxConfig.objects.count(), 0) diff --git a/mailboxes/utils.py b/mailboxes/utils.py index 0ca65e5e..7d86a1e1 100644 --- a/mailboxes/utils.py +++ b/mailboxes/utils.py @@ -1,5 +1,4 @@ import logging -import sys from email.headerregistry import Address from email.parser import BytesHeaderParser from email.utils import parseaddr @@ -7,6 +6,7 @@ from typing import Callable, Iterator, Tuple from django.conf import settings +from django.utils.functional import cached_property from mail.libraries.email_message_dto import EmailMessageDto from mail.libraries.helpers import to_mail_message_dto @@ -17,12 +17,6 @@ logger = logging.getLogger(__name__) -def get_message_number(listing_message): - listing_msg = listing_message.decode(settings.DEFAULT_ENCODING) - msg_num, _, _ = listing_msg.partition(" ") - return msg_num - - def get_message_header(pop3_connection, msg_num): # retrieves the header information # 0 indicates the number of lines of message to be retrieved after the header @@ -40,6 +34,12 @@ def get_message_id(msg_header): return message_id +def get_message_number(listing_message): + listing_msg = listing_message.decode(settings.DEFAULT_ENCODING) + msg_num, _, _ = listing_msg.partition(" ") + return msg_num + + def get_read_messages(mailbox_config): mail_read_statuses = mailbox_config.mail_read_statuses.filter( status__in=[ @@ -51,51 +51,26 @@ def get_read_messages(mailbox_config): return read_message_ids -def get_unread_message_ids(pop3_connection, mailbox_config, incoming_email_check_limit=sys.maxsize): - read_messages = get_read_messages(mailbox_config) - logger.info("Number of messages READ/UNPROCESSABLE in %s are %s", mailbox_config.username, len(read_messages)) - - _, mails, _ = pop3_connection.list() - mails = mails[-incoming_email_check_limit:] - for mail in mails: - message_num = get_message_number(mail) - message_header = get_message_header(pop3_connection, message_num) - - if not is_from_valid_sender(message_header, [settings.SPIRE_FROM_ADDRESS, settings.HMRC_TO_DIT_REPLY_ADDRESS]): - logger.warning( - "Found mail with message_num %s that is not from SPIRE (%s) or HMRC (%s), skipping ...", - message_num, - settings.SPIRE_FROM_ADDRESS, - settings.HMRC_TO_DIT_REPLY_ADDRESS, - ) - continue - - message_id = get_message_id(message_header) - logger.info("Extracted Message-Id as %s for the message_num %s", message_id, message_num) - if message_id in read_messages: - continue - - yield message_id, message_num - - -def is_from_valid_sender(msg_header, valid_addresses): - _, from_address = parseaddr(str(msg_header["From"])) +def is_from_valid_sender(message, valid_addresses): + _, from_address = parseaddr(str(message.message_header["From"])) valid_addresses = [address.replace("From: ", "") for address in valid_addresses] return from_address in valid_addresses class MarkStatus: - def __init__(self, message_id, message_num, read_status): - self.message_id = message_id - self.message_num = message_num - self.read_status = read_status + def __init__(self, mailbox_config, message): + self.message = message + self.read_status, _ = mailbox_config.mail_read_statuses.get_or_create( + message_id=message.message_id, + message_num=message.message_number, + ) def __call__(self, status): logger.info( "Marking message_id %s with message_num %s from %r as %s", - self.message_id, - self.message_num, + self.message.message_id, + self.message.message_number, self.read_status.mailbox, status, ) @@ -103,49 +78,98 @@ def __call__(self, status): self.read_status.save() +class MailboxMessage: + def __init__(self, pop3_connection, mailbox_config, message_number): + self.pop3_connection = pop3_connection + self.mailbox_config = mailbox_config + self.message_number = message_number + + @cached_property + def message_header(self): + return get_message_header(self.pop3_connection, self.message_number) + + @cached_property + def message_id(self): + message_id = get_message_id(self.message_header) + logger.info("Extracted Message-Id as %s for the message_num %s", message_id, self.message_number) + return message_id + + @cached_property + def mail_data(self): + return self.pop3_connection.retr(self.message_number) + + +def get_messages(pop3_connection, mailbox_config, max_limit): + _, mails, _ = pop3_connection.list() + mails = mails[-max_limit:] + for mail in mails: + message_number = get_message_number(mail) + message = MailboxMessage(pop3_connection, mailbox_config, message_number) + yield message + + +def is_read(message, read_messages): + return message.message_id in read_messages + + +def get_message_dto(message): + mail_data = message.mail_data + logger.info( + "Retrieved message_id %s with message_num %s from %s", + message.message_id, + message.message_number, + message.mailbox_config, + ) + + mail_message = to_mail_message_dto(mail_data) + + return mail_message + + def get_message_iterator(server: MailServer) -> Iterator[Tuple[EmailMessageDto, Callable]]: mailbox_config, _ = MailboxConfig.objects.get_or_create(username=server.username) + read_messages = get_read_messages(mailbox_config) with server.connect_to_pop3() as pop3_connection: - for message_id, message_num in get_unread_message_ids( + messages = get_messages( pop3_connection, mailbox_config, settings.INCOMING_EMAIL_CHECK_LIMIT, - ): - read_status, _ = mailbox_config.mail_read_statuses.get_or_create( - message_id=message_id, - message_num=message_num, + ) + + for message in messages: + if not is_from_valid_sender(message, [settings.SPIRE_FROM_ADDRESS, settings.HMRC_TO_DIT_REPLY_ADDRESS]): + logger.warning( + "Found mail with message_num %s that is not from SPIRE (%s) or HMRC (%s), skipping ...", + message.message_number, + settings.SPIRE_FROM_ADDRESS, + settings.HMRC_TO_DIT_REPLY_ADDRESS, ) + continue + + if is_read(message, read_messages): + continue + + mark_status = MarkStatus(mailbox_config, message) + + try: + message_dto = get_message_dto(message) + except error_proto: + logger.exception( + "Unable to RETR message num %s with Message-ID %s in %r", + message.message_number, + message.message_id, + message.mailbox_config, + ) + continue + except ValueError: + mark_status(MailReadStatuses.UNPROCESSABLE) + logger.exception( + "Unable to convert message num %s with Message-Id %s to DTO in %r", + message.message_number, + message.message_id, + message.mailbox_config, + ) + continue - mark_status = MarkStatus(message_id, message_num, read_status) - - try: - m = pop3_connection.retr(message_num) - logger.info( - "Retrieved message_id %s with message_num %s from %s", - message_id, - message_num, - read_status.mailbox, - ) - except error_proto: - logger.exception( - "Unable to RETR message num %s with Message-ID %s in %r", - message_num, - message_id, - mailbox_config, - ) - continue - - try: - mail_message = to_mail_message_dto(m) - except ValueError: - mark_status(MailReadStatuses.UNPROCESSABLE) - logger.exception( - "Unable to convert message num %s with Message-Id %s to DTO in %r", - message_num, - message_id, - mailbox_config, - ) - continue - - yield mail_message, mark_status + yield message_dto, mark_status From fb747f262d773edcc7795b1617169ca18264f1e9 Mon Sep 17 00:00:00 2001 From: Kevin Carrogan Date: Mon, 23 Dec 2024 21:41:41 +0000 Subject: [PATCH 20/38] Save email data to object --- .../0003_mailreadstatus_mail_data.py | 18 +++++ mailboxes/models.py | 1 + mailboxes/tests/test_utils.py | 68 +++---------------- mailboxes/utils.py | 26 ++++--- 4 files changed, 48 insertions(+), 65 deletions(-) create mode 100644 mailboxes/migrations/0003_mailreadstatus_mail_data.py diff --git a/mailboxes/migrations/0003_mailreadstatus_mail_data.py b/mailboxes/migrations/0003_mailreadstatus_mail_data.py new file mode 100644 index 00000000..86bf8bd1 --- /dev/null +++ b/mailboxes/migrations/0003_mailreadstatus_mail_data.py @@ -0,0 +1,18 @@ +# Generated by Django 4.2.16 on 2024-12-23 21:30 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("mailboxes", "0002_alter_mailreadstatus_mailbox"), + ] + + operations = [ + migrations.AddField( + model_name="mailreadstatus", + name="mail_data", + field=models.BinaryField(null=True), + ), + ] diff --git a/mailboxes/models.py b/mailboxes/models.py index 7ee9d8f0..3e87e2f7 100644 --- a/mailboxes/models.py +++ b/mailboxes/models.py @@ -35,6 +35,7 @@ class MailReadStatus(TimeStampedModel): on_delete=models.CASCADE, related_name="mail_read_statuses", ) + mail_data = models.BinaryField(null=True) class Meta: db_table = ( diff --git a/mailboxes/tests/test_utils.py b/mailboxes/tests/test_utils.py index 7548e96c..adf422a9 100644 --- a/mailboxes/tests/test_utils.py +++ b/mailboxes/tests/test_utils.py @@ -156,7 +156,7 @@ def test_is_from_invalid_sender(self, valid_address): class MarkStatusTests(TestCase): - def test_mark_status_read_status_without_initial_object(self): + def test_mark_status_read_status(self): mailbox_config = MailboxConfigFactory() message = Message() @@ -171,63 +171,13 @@ def test_mark_status_read_status_without_initial_object(self): "1", ) - self.assertEqual(mailbox_config.mail_read_statuses.count(), 0) - mark_status = MarkStatus(mailbox_config, mailbox_message) - mail_read_status = mailbox_config.mail_read_statuses.get() - self.assertEqual( - mail_read_status.status, - MailReadStatuses.UNREAD, - ) - self.assertEqual( - mail_read_status.message_id, - mailbox_message.message_id, - ) - self.assertEqual( - mail_read_status.message_num, - mailbox_message.message_number, - ) - - mark_status(MailReadStatuses.READ) - mail_read_status.refresh_from_db() - self.assertEqual( - mail_read_status.status, - MailReadStatuses.READ, - ) - - def test_mark_status_read_status_with_initial_object(self): - mailbox_config = MailboxConfigFactory() - - message = Message() - message["Message-Id"] = Header("<12345@example.com>") # /PS-IGNORE - - pop3_connection = MagicMock() - pop3_connection.top.return_value = MagicMock(), message.as_bytes().split(b"\n"), MagicMock() - - mailbox_message = MailboxMessage( - pop3_connection, - mailbox_config, - "1", - ) - - mailbox_config.mail_read_statuses.create( - message_id="12345", + mail_read_status = mailbox_config.mail_read_statuses.create( message_num="1", - ) - mark_status = MarkStatus(mailbox_config, mailbox_message) - mail_read_status = mailbox_config.mail_read_statuses.get() - self.assertEqual( - mail_read_status.status, - MailReadStatuses.UNREAD, - ) - self.assertEqual( - mail_read_status.message_id, - mailbox_message.message_id, - ) - self.assertEqual( - mail_read_status.message_num, - mailbox_message.message_number, + message_id="12345", + status=MailReadStatuses.UNREAD, ) + mark_status = MarkStatus(mailbox_message, mail_read_status) mark_status(MailReadStatuses.READ) mail_read_status.refresh_from_db() self.assertEqual( @@ -432,6 +382,10 @@ def _retr(which): ], transform=repr, ) + self.assertEqual( + bytes(mailbox.mail_read_statuses.all()[0].mail_data), + b"Message-Id: \nTo: to@example.com\nFrom: spire@example.com\nDate: 2021-04-23T12:38Z\nSubject: abc_xyz_nnn_yyy_1_datetime", # /PS-IGNORE + ) def test_get_message_iterator_invalid_senders(self): self.assertEqual(MailboxConfig.objects.count(), 0) @@ -611,7 +565,7 @@ def _top(which, howmuch): mailbox = MailboxConfig.objects.get(username="test@example.com") # /PS-IGNORE self.assertQuerySetEqual( mailbox.mail_read_statuses.order_by("message_id"), - [""], # /PS-IGNORE + [], transform=repr, ) @@ -641,7 +595,7 @@ def _top(which, howmuch): mock_pop3_connection.top.side_effect = _top - mock_pop3_connection.retr.return_value = "UNCOVERTABLE" + mock_pop3_connection.retr.return_value = MagicMock(), [b"UNCONVERTABLE"], MagicMock() mock_to_mail_message_dto.side_effect = ValueError() diff --git a/mailboxes/utils.py b/mailboxes/utils.py index 7d86a1e1..2fb53668 100644 --- a/mailboxes/utils.py +++ b/mailboxes/utils.py @@ -59,12 +59,9 @@ def is_from_valid_sender(message, valid_addresses): class MarkStatus: - def __init__(self, mailbox_config, message): + def __init__(self, message, read_status): self.message = message - self.read_status, _ = mailbox_config.mail_read_statuses.get_or_create( - message_id=message.message_id, - message_num=message.message_number, - ) + self.read_status = read_status def __call__(self, status): logger.info( @@ -98,6 +95,10 @@ def message_id(self): def mail_data(self): return self.pop3_connection.retr(self.message_number) + @cached_property + def binary_data(self): + return b"".join(self.mail_data[1]) + def get_messages(pop3_connection, mailbox_config, max_limit): _, mails, _ = pop3_connection.list() @@ -150,10 +151,8 @@ def get_message_iterator(server: MailServer) -> Iterator[Tuple[EmailMessageDto, if is_read(message, read_messages): continue - mark_status = MarkStatus(mailbox_config, message) - try: - message_dto = get_message_dto(message) + mail_data = message.binary_data except error_proto: logger.exception( "Unable to RETR message num %s with Message-ID %s in %r", @@ -162,6 +161,17 @@ def get_message_iterator(server: MailServer) -> Iterator[Tuple[EmailMessageDto, message.mailbox_config, ) continue + + read_status, _ = mailbox_config.mail_read_statuses.get_or_create( + message_id=message.message_id, + message_num=message.message_number, + mail_data=mail_data, + ) + + mark_status = MarkStatus(message, read_status) + + try: + message_dto = get_message_dto(message) except ValueError: mark_status(MailReadStatuses.UNPROCESSABLE) logger.exception( From 8a2a4e966182ae2ef1acd3828ebf869f6d768138 Mon Sep 17 00:00:00 2001 From: Kevin Carrogan Date: Tue, 24 Dec 2024 11:59:57 +0000 Subject: [PATCH 21/38] Ensure message iterator runs with active pop connection --- mail_servers/servers.py | 1 + mailboxes/utils.py | 88 ++++++++++++++++++++--------------------- 2 files changed, 45 insertions(+), 44 deletions(-) diff --git a/mail_servers/servers.py b/mail_servers/servers.py index eef04833..b7d451a1 100644 --- a/mail_servers/servers.py +++ b/mail_servers/servers.py @@ -36,6 +36,7 @@ def connect_to_pop3(self): self.auth.authenticate(pop3_connection) yield pop3_connection finally: + logger.info("Disconnecting pop3 connection") pop3_connection.quit() @property diff --git a/mailboxes/utils.py b/mailboxes/utils.py index 2fb53668..f2bd8420 100644 --- a/mailboxes/utils.py +++ b/mailboxes/utils.py @@ -128,7 +128,7 @@ def get_message_dto(message): def get_message_iterator(server: MailServer) -> Iterator[Tuple[EmailMessageDto, Callable]]: - mailbox_config, _ = MailboxConfig.objects.get_or_create(username=server.username) + mailbox_config, _ = MailboxConfig.objects.get_or_create(username=server.user) read_messages = get_read_messages(mailbox_config) with server.connect_to_pop3() as pop3_connection: @@ -138,48 +138,48 @@ def get_message_iterator(server: MailServer) -> Iterator[Tuple[EmailMessageDto, settings.INCOMING_EMAIL_CHECK_LIMIT, ) - for message in messages: - if not is_from_valid_sender(message, [settings.SPIRE_FROM_ADDRESS, settings.HMRC_TO_DIT_REPLY_ADDRESS]): - logger.warning( - "Found mail with message_num %s that is not from SPIRE (%s) or HMRC (%s), skipping ...", - message.message_number, - settings.SPIRE_FROM_ADDRESS, - settings.HMRC_TO_DIT_REPLY_ADDRESS, + for message in messages: + if not is_from_valid_sender(message, [settings.SPIRE_FROM_ADDRESS, settings.HMRC_TO_DIT_REPLY_ADDRESS]): + logger.warning( + "Found mail with message_num %s that is not from SPIRE (%s) or HMRC (%s), skipping ...", + message.message_number, + settings.SPIRE_FROM_ADDRESS, + settings.HMRC_TO_DIT_REPLY_ADDRESS, + ) + continue + + if is_read(message, read_messages): + continue + + try: + mail_data = message.binary_data + except error_proto: + logger.exception( + "Unable to RETR message num %s with Message-ID %s in %r", + message.message_number, + message.message_id, + message.mailbox_config, + ) + continue + + read_status, _ = mailbox_config.mail_read_statuses.get_or_create( + message_id=message.message_id, + message_num=message.message_number, + mail_data=mail_data, ) - continue - - if is_read(message, read_messages): - continue - - try: - mail_data = message.binary_data - except error_proto: - logger.exception( - "Unable to RETR message num %s with Message-ID %s in %r", - message.message_number, - message.message_id, - message.mailbox_config, - ) - continue - - read_status, _ = mailbox_config.mail_read_statuses.get_or_create( - message_id=message.message_id, - message_num=message.message_number, - mail_data=mail_data, - ) - - mark_status = MarkStatus(message, read_status) - - try: - message_dto = get_message_dto(message) - except ValueError: - mark_status(MailReadStatuses.UNPROCESSABLE) - logger.exception( - "Unable to convert message num %s with Message-Id %s to DTO in %r", - message.message_number, - message.message_id, - message.mailbox_config, - ) - continue - yield message_dto, mark_status + mark_status = MarkStatus(message, read_status) + + try: + message_dto = get_message_dto(message) + except ValueError: + mark_status(MailReadStatuses.UNPROCESSABLE) + logger.exception( + "Unable to convert message num %s with Message-Id %s to DTO in %r", + message.message_number, + message.message_id, + message.mailbox_config, + ) + continue + + yield message_dto, mark_status From a40ceefbca05b025e4319e99b6257f3a25310d51 Mon Sep 17 00:00:00 2001 From: Kevin Carrogan Date: Tue, 24 Dec 2024 13:14:36 +0000 Subject: [PATCH 22/38] Update to correctly match output from real mail servers --- mailboxes/tests/test_utils.py | 45 ++++++++++++++++++++++------------- mailboxes/utils.py | 6 +++-- 2 files changed, 32 insertions(+), 19 deletions(-) diff --git a/mailboxes/tests/test_utils.py b/mailboxes/tests/test_utils.py index adf422a9..acd5b068 100644 --- a/mailboxes/tests/test_utils.py +++ b/mailboxes/tests/test_utils.py @@ -280,12 +280,15 @@ def test_mail_data(self): pop3_connection.retr.assert_called_with("1") +@override_settings( + SPIRE_FROM_ADDRESS="from.spire@example.com", # /PS-IGNORE +) class GetMessageIteratorTests(TestCase): def test_get_message_iterator(self): self.assertEqual(MailboxConfig.objects.count(), 0) mail_server = MagicMock(spec=MailServer) - type(mail_server).username = "test@example.com" # /PS-IGNORE + type(mail_server).user = "test@example.com" # /PS-IGNORE mock_pop3_connection = mail_server.connect_to_pop3().__enter__() mock_pop3_connection.list.return_value = ( @@ -306,7 +309,7 @@ def _top(which, howmuch): msg["From"] = Header(settings.SPIRE_FROM_ADDRESS) msg["Date"] = Header("2021-04-23T12:38Z") msg["Subject"] = Header(f"abc_xyz_nnn_yyy_{which}_datetime") - return MagicMock(), msg.as_bytes().split(b"\n\n"), MagicMock() + return MagicMock(), msg.as_bytes().split(b"\n"), MagicMock() mock_pop3_connection.top.side_effect = _top @@ -317,7 +320,7 @@ def _retr(which): msg["From"] = Header(settings.SPIRE_FROM_ADDRESS) msg["Date"] = Header("2021-04-23T12:38Z") msg["Subject"] = Header(f"abc_xyz_nnn_yyy_{which}_datetime") - return b"+OK", msg.as_bytes().split(b"\n\n"), len(msg.as_bytes()) + return b"+OK", msg.as_bytes().split(b"\n"), len(msg.as_bytes()) mock_pop3_connection.retr.side_effect = _retr @@ -335,7 +338,7 @@ def _retr(which): subject="abc_xyz_nnn_yyy_1_datetime", body=b"", attachment=[None, None], - raw_data=f"(b'+OK', [b'Message-Id: \\nTo: to@example.com\\nFrom: {settings.SPIRE_FROM_ADDRESS}\\nDate: 2021-04-23T12:38Z\\nSubject: abc_xyz_nnn_yyy_1_datetime', b''], 143)", # /PS-IGNORE + raw_data=f"(b'+OK', [b'Message-Id: ', b'To: to@example.com', b'From: {settings.SPIRE_FROM_ADDRESS}', b'Date: 2021-04-23T12:38Z', b'Subject: abc_xyz_nnn_yyy_1_datetime', b'', b''], 148)", # /PS-IGNORE ), EmailMessageDto( run_number=2, @@ -345,7 +348,7 @@ def _retr(which): subject="abc_xyz_nnn_yyy_2_datetime", body=b"", attachment=[None, None], - raw_data=f"(b'+OK', [b'Message-Id: \\nTo: to@example.com\\nFrom: {settings.SPIRE_FROM_ADDRESS}\\nDate: 2021-04-23T12:38Z\\nSubject: abc_xyz_nnn_yyy_2_datetime', b''], 143)", # /PS-IGNORE + raw_data=f"(b'+OK', [b'Message-Id: ', b'To: to@example.com', b'From: {settings.SPIRE_FROM_ADDRESS}', b'Date: 2021-04-23T12:38Z', b'Subject: abc_xyz_nnn_yyy_2_datetime', b'', b''], 148)", # /PS-IGNORE ), EmailMessageDto( run_number=3, @@ -355,7 +358,7 @@ def _retr(which): subject="abc_xyz_nnn_yyy_3_datetime", body=b"", attachment=[None, None], - raw_data=f"(b'+OK', [b'Message-Id: \\nTo: to@example.com\\nFrom: {settings.SPIRE_FROM_ADDRESS}\\nDate: 2021-04-23T12:38Z\\nSubject: abc_xyz_nnn_yyy_3_datetime', b''], 143)", # /PS-IGNORE + raw_data=f"(b'+OK', [b'Message-Id: ', b'To: to@example.com', b'From: {settings.SPIRE_FROM_ADDRESS}', b'Date: 2021-04-23T12:38Z', b'Subject: abc_xyz_nnn_yyy_3_datetime', b'', b''], 148)", # /PS-IGNORE ), ], ) @@ -384,14 +387,22 @@ def _retr(which): ) self.assertEqual( bytes(mailbox.mail_read_statuses.all()[0].mail_data), - b"Message-Id: \nTo: to@example.com\nFrom: spire@example.com\nDate: 2021-04-23T12:38Z\nSubject: abc_xyz_nnn_yyy_1_datetime", # /PS-IGNORE + b"Message-Id: \nTo: to@example.com\nFrom: from.spire@example.com\nDate: 2021-04-23T12:38Z\nSubject: abc_xyz_nnn_yyy_1_datetime\n\n", # /PS-IGNORE + ) + self.assertEqual( + bytes(mailbox.mail_read_statuses.all()[1].mail_data), + b"Message-Id: \nTo: to@example.com\nFrom: from.spire@example.com\nDate: 2021-04-23T12:38Z\nSubject: abc_xyz_nnn_yyy_2_datetime\n\n", # /PS-IGNORE + ) + self.assertEqual( + bytes(mailbox.mail_read_statuses.all()[2].mail_data), + b"Message-Id: \nTo: to@example.com\nFrom: from.spire@example.com\nDate: 2021-04-23T12:38Z\nSubject: abc_xyz_nnn_yyy_3_datetime\n\n", # /PS-IGNORE ) def test_get_message_iterator_invalid_senders(self): self.assertEqual(MailboxConfig.objects.count(), 0) mail_server = MagicMock(spec=MailServer) - type(mail_server).username = "test@example.com" # /PS-IGNORE + type(mail_server).user = "test@example.com" # /PS-IGNORE mock_pop3_connection = mail_server.connect_to_pop3().__enter__() mock_pop3_connection.list.return_value = ( @@ -418,7 +429,7 @@ def _top(which, howmuch): msg["From"] = Header(lookup_from[which]) msg["Date"] = Header("2021-04-23T12:38Z") msg["Subject"] = Header(f"abc_xyz_nnn_yyy_{which}_datetime") - return MagicMock(), msg.as_bytes().split(b"\n\n"), MagicMock() + return MagicMock(), msg.as_bytes().split(b"\n"), MagicMock() mock_pop3_connection.top.side_effect = _top @@ -429,7 +440,7 @@ def _retr(which): msg["From"] = Header(lookup_from[which]) msg["Date"] = Header("2021-04-23T12:38Z") msg["Subject"] = Header(f"abc_xyz_nnn_yyy_{which}_datetime") - return b"+OK", msg.as_bytes().split(b"\n\n"), len(msg.as_bytes()) + return b"+OK", msg.as_bytes().split(b"\n"), len(msg.as_bytes()) mock_pop3_connection.retr.side_effect = _retr @@ -447,7 +458,7 @@ def _retr(which): subject="abc_xyz_nnn_yyy_2_datetime", body=b"", attachment=[None, None], - raw_data=f"(b'+OK', [b'Message-Id: \\nTo: to@example.com\\nFrom: {settings.SPIRE_FROM_ADDRESS}\\nDate: 2021-04-23T12:38Z\\nSubject: abc_xyz_nnn_yyy_2_datetime', b''], 143)", # /PS-IGNORE + raw_data=f"(b'+OK', [b'Message-Id: ', b'To: to@example.com', b'From: {settings.SPIRE_FROM_ADDRESS}', b'Date: 2021-04-23T12:38Z', b'Subject: abc_xyz_nnn_yyy_2_datetime', b'', b''], 148)", # /PS-IGNORE ), ], ) @@ -465,7 +476,7 @@ def test_get_message_iterator_already_read(self): self.assertEqual(MailboxConfig.objects.count(), 0) mail_server = MagicMock(spec=MailServer) - type(mail_server).username = "test@example.com" # /PS-IGNORE + type(mail_server).user = "test@example.com" # /PS-IGNORE mailbox_config = MailboxConfigFactory(username="test@example.com") # /PS-IGNORE mailbox_config.mail_read_statuses.create( @@ -498,7 +509,7 @@ def _top(which, howmuch): msg["From"] = Header(settings.SPIRE_FROM_ADDRESS) msg["Date"] = Header("2021-04-23T12:38Z") msg["Subject"] = Header(f"abc_xyz_nnn_yyy_{which}_datetime") - return MagicMock(), msg.as_bytes().split(b"\n\n"), MagicMock() + return MagicMock(), msg.as_bytes().split(b"\n"), MagicMock() mock_pop3_connection.top.side_effect = _top @@ -509,7 +520,7 @@ def _retr(which): msg["From"] = Header(settings.SPIRE_FROM_ADDRESS) msg["Date"] = Header("2021-04-23T12:38Z") msg["Subject"] = Header(f"abc_xyz_nnn_yyy_{which}_datetime") - return b"+OK", msg.as_bytes().split(b"\n\n"), len(msg.as_bytes()) + return b"+OK", msg.as_bytes().split(b"\n"), len(msg.as_bytes()) mock_pop3_connection.retr.side_effect = _retr @@ -527,7 +538,7 @@ def _retr(which): subject="abc_xyz_nnn_yyy_2_datetime", body=b"", attachment=[None, None], - raw_data=f"(b'+OK', [b'Message-Id: \\nTo: to@example.com\\nFrom: {settings.SPIRE_FROM_ADDRESS}\\nDate: 2021-04-23T12:38Z\\nSubject: abc_xyz_nnn_yyy_2_datetime', b''], 143)", # /PS-IGNORE + raw_data=f"(b'+OK', [b'Message-Id: ', b'To: to@example.com', b'From: {settings.SPIRE_FROM_ADDRESS}', b'Date: 2021-04-23T12:38Z', b'Subject: abc_xyz_nnn_yyy_2_datetime', b'', b''], 148)", # /PS-IGNORE ), ], ) @@ -536,7 +547,7 @@ def test_get_message_iterator_retr_failure(self): self.assertEqual(MailboxConfig.objects.count(), 0) mail_server = MagicMock(spec=MailServer) - type(mail_server).username = "test@example.com" # /PS-IGNORE + type(mail_server).user = "test@example.com" # /PS-IGNORE mock_pop3_connection = mail_server.connect_to_pop3().__enter__() mock_pop3_connection.list.return_value = ( @@ -574,7 +585,7 @@ def test_get_message_iterator_message_dto_failure(self, mock_to_mail_message_dto self.assertEqual(MailboxConfig.objects.count(), 0) mail_server = MagicMock(spec=MailServer) - type(mail_server).username = "test@example.com" # /PS-IGNORE + type(mail_server).user = "test@example.com" # /PS-IGNORE mock_pop3_connection = mail_server.connect_to_pop3().__enter__() mock_pop3_connection.list.return_value = ( diff --git a/mailboxes/utils.py b/mailboxes/utils.py index f2bd8420..f3a41c7f 100644 --- a/mailboxes/utils.py +++ b/mailboxes/utils.py @@ -23,7 +23,7 @@ def get_message_header(pop3_connection, msg_num): _, msg_header, _ = pop3_connection.top(msg_num, 0) parser = BytesHeaderParser() - header = parser.parsebytes(b"".join(msg_header)) + header = parser.parsebytes(b"\n".join(msg_header)) return header @@ -53,6 +53,7 @@ def get_read_messages(mailbox_config): def is_from_valid_sender(message, valid_addresses): _, from_address = parseaddr(str(message.message_header["From"])) + logger.info("Found from address %s", from_address) valid_addresses = [address.replace("From: ", "") for address in valid_addresses] return from_address in valid_addresses @@ -97,7 +98,7 @@ def mail_data(self): @cached_property def binary_data(self): - return b"".join(self.mail_data[1]) + return b"\n".join(self.mail_data[1]) def get_messages(pop3_connection, mailbox_config, max_limit): @@ -149,6 +150,7 @@ def get_message_iterator(server: MailServer) -> Iterator[Tuple[EmailMessageDto, continue if is_read(message, read_messages): + logger.debug("Already read message %s", message.message_id) continue try: From 428dc5d7a9612054784ed1a77fd03b7f342301b5 Mon Sep 17 00:00:00 2001 From: Kevin Carrogan Date: Tue, 24 Dec 2024 13:36:31 +0000 Subject: [PATCH 23/38] Handle cases where the payload may have slightly changed when getting data --- mailboxes/tests/test_utils.py | 165 ++++++++++++++++++++++++++++++++++ mailboxes/utils.py | 15 +++- 2 files changed, 178 insertions(+), 2 deletions(-) diff --git a/mailboxes/tests/test_utils.py b/mailboxes/tests/test_utils.py index acd5b068..1d06e04b 100644 --- a/mailboxes/tests/test_utils.py +++ b/mailboxes/tests/test_utils.py @@ -398,6 +398,171 @@ def _retr(which): b"Message-Id: \nTo: to@example.com\nFrom: from.spire@example.com\nDate: 2021-04-23T12:38Z\nSubject: abc_xyz_nnn_yyy_3_datetime\n\n", # /PS-IGNORE ) + def test_get_message_iterator_run_multiple_times(self): + self.assertEqual(MailboxConfig.objects.count(), 0) + + mail_server = MagicMock(spec=MailServer) + type(mail_server).user = "test@example.com" # /PS-IGNORE + + mock_pop3_connection = mail_server.connect_to_pop3().__enter__() + mock_pop3_connection.list.return_value = ( + MagicMock(), + [ + b"1 11111", + b"2 22222", + b"3 33333", + ], + MagicMock(), + ) + + def _top(which, howmuch): + self.assertEqual(howmuch, 0) + msg = Message() + msg["Message-Id"] = Header(f"") # /PS-IGNORE + msg["To"] = Header("to@example.com") # /PS-IGNORE + msg["From"] = Header(settings.SPIRE_FROM_ADDRESS) + msg["Date"] = Header("2021-04-23T12:38Z") + msg["Subject"] = Header(f"abc_xyz_nnn_yyy_{which}_datetime") + msg["ChangingValue"] = Header("first-run") + return MagicMock(), msg.as_bytes().split(b"\n"), MagicMock() + + mock_pop3_connection.top.side_effect = _top + + def _retr(which): + msg = Message() + msg["Message-Id"] = Header(f"") # /PS-IGNORE + msg["To"] = Header("to@example.com") # /PS-IGNORE + msg["From"] = Header(settings.SPIRE_FROM_ADDRESS) + msg["Date"] = Header("2021-04-23T12:38Z") + msg["Subject"] = Header(f"abc_xyz_nnn_yyy_{which}_datetime") + msg["ChangingValue"] = Header("first-run") + return b"+OK", msg.as_bytes().split(b"\n"), len(msg.as_bytes()) + + mock_pop3_connection.retr.side_effect = _retr + + iterator = get_message_iterator(mail_server) + dtos, _ = zip(*iterator) + + self.assertEqual( + list(dtos), + [ + EmailMessageDto( + run_number=1, + sender=settings.SPIRE_FROM_ADDRESS, + receiver="to@example.com", # /PS-IGNORE + date=datetime.datetime(2021, 4, 23, 12, 38, tzinfo=tzlocal()), + subject="abc_xyz_nnn_yyy_1_datetime", + body=b"", + attachment=[None, None], + raw_data=f"(b'+OK', [b'Message-Id: ', b'To: to@example.com', b'From: {settings.SPIRE_FROM_ADDRESS}', b'Date: 2021-04-23T12:38Z', b'Subject: abc_xyz_nnn_yyy_1_datetime', b'ChangingValue: first-run', b'', b''], 173)", # /PS-IGNORE + ), + EmailMessageDto( + run_number=2, + sender=settings.SPIRE_FROM_ADDRESS, + receiver="to@example.com", # /PS-IGNORE + date=datetime.datetime(2021, 4, 23, 12, 38, tzinfo=tzlocal()), + subject="abc_xyz_nnn_yyy_2_datetime", + body=b"", + attachment=[None, None], + raw_data=f"(b'+OK', [b'Message-Id: ', b'To: to@example.com', b'From: {settings.SPIRE_FROM_ADDRESS}', b'Date: 2021-04-23T12:38Z', b'Subject: abc_xyz_nnn_yyy_2_datetime', b'ChangingValue: first-run', b'', b''], 173)", # /PS-IGNORE + ), + EmailMessageDto( + run_number=3, + sender=settings.SPIRE_FROM_ADDRESS, + receiver="to@example.com", # /PS-IGNORE + date=datetime.datetime(2021, 4, 23, 12, 38, tzinfo=tzlocal()), + subject="abc_xyz_nnn_yyy_3_datetime", + body=b"", + attachment=[None, None], + raw_data=f"(b'+OK', [b'Message-Id: ', b'To: to@example.com', b'From: {settings.SPIRE_FROM_ADDRESS}', b'Date: 2021-04-23T12:38Z', b'Subject: abc_xyz_nnn_yyy_3_datetime', b'ChangingValue: first-run', b'', b''], 173)", # /PS-IGNORE + ), + ], + ) + mailbox = MailboxConfig.objects.get(username="test@example.com") # /PS-IGNORE + self.assertQuerySetEqual( + mailbox.mail_read_statuses.order_by("message_id"), + [ + "", + "", + "", + ], + transform=repr, + ) + + def _top(which, howmuch): + self.assertEqual(howmuch, 0) + msg = Message() + msg["Message-Id"] = Header(f"") # /PS-IGNORE + msg["To"] = Header("to@example.com") # /PS-IGNORE + msg["From"] = Header(settings.SPIRE_FROM_ADDRESS) + msg["Date"] = Header("2021-04-23T12:38Z") + msg["Subject"] = Header(f"abc_xyz_nnn_yyy_{which}_datetime") + msg["ChangingValue"] = Header("second-run") + return MagicMock(), msg.as_bytes().split(b"\n"), MagicMock() + + mock_pop3_connection.top.side_effect = _top + + def _retr(which): + msg = Message() + msg["Message-Id"] = Header(f"") # /PS-IGNORE + msg["To"] = Header("to@example.com") # /PS-IGNORE + msg["From"] = Header(settings.SPIRE_FROM_ADDRESS) + msg["Date"] = Header("2021-04-23T12:38Z") + msg["Subject"] = Header(f"abc_xyz_nnn_yyy_{which}_datetime") + msg["ChangingValue"] = Header("second-run") + return b"+OK", msg.as_bytes().split(b"\n"), len(msg.as_bytes()) + + mock_pop3_connection.retr.side_effect = _retr + + iterator = get_message_iterator(mail_server) + dtos, _ = zip(*iterator) + + self.assertEqual( + list(dtos), + [ + EmailMessageDto( + run_number=1, + sender=settings.SPIRE_FROM_ADDRESS, + receiver="to@example.com", # /PS-IGNORE + date=datetime.datetime(2021, 4, 23, 12, 38, tzinfo=tzlocal()), + subject="abc_xyz_nnn_yyy_1_datetime", + body=b"", + attachment=[None, None], + raw_data=f"(b'+OK', [b'Message-Id: ', b'To: to@example.com', b'From: {settings.SPIRE_FROM_ADDRESS}', b'Date: 2021-04-23T12:38Z', b'Subject: abc_xyz_nnn_yyy_1_datetime', b'ChangingValue: second-run', b'', b''], 174)", # /PS-IGNORE + ), + EmailMessageDto( + run_number=2, + sender=settings.SPIRE_FROM_ADDRESS, + receiver="to@example.com", # /PS-IGNORE + date=datetime.datetime(2021, 4, 23, 12, 38, tzinfo=tzlocal()), + subject="abc_xyz_nnn_yyy_2_datetime", + body=b"", + attachment=[None, None], + raw_data=f"(b'+OK', [b'Message-Id: ', b'To: to@example.com', b'From: {settings.SPIRE_FROM_ADDRESS}', b'Date: 2021-04-23T12:38Z', b'Subject: abc_xyz_nnn_yyy_2_datetime', b'ChangingValue: second-run', b'', b''], 174)", # /PS-IGNORE + ), + EmailMessageDto( + run_number=3, + sender=settings.SPIRE_FROM_ADDRESS, + receiver="to@example.com", # /PS-IGNORE + date=datetime.datetime(2021, 4, 23, 12, 38, tzinfo=tzlocal()), + subject="abc_xyz_nnn_yyy_3_datetime", + body=b"", + attachment=[None, None], + raw_data=f"(b'+OK', [b'Message-Id: ', b'To: to@example.com', b'From: {settings.SPIRE_FROM_ADDRESS}', b'Date: 2021-04-23T12:38Z', b'Subject: abc_xyz_nnn_yyy_3_datetime', b'ChangingValue: second-run', b'', b''], 174)", # /PS-IGNORE + ), + ], + ) + mailbox = MailboxConfig.objects.get(username="test@example.com") # /PS-IGNORE + self.assertQuerySetEqual( + mailbox.mail_read_statuses.order_by("message_id"), + [ + "", + "", + "", + ], + transform=repr, + ) + def test_get_message_iterator_invalid_senders(self): self.assertEqual(MailboxConfig.objects.count(), 0) diff --git a/mailboxes/utils.py b/mailboxes/utils.py index f3a41c7f..27f24a9f 100644 --- a/mailboxes/utils.py +++ b/mailboxes/utils.py @@ -164,10 +164,21 @@ def get_message_iterator(server: MailServer) -> Iterator[Tuple[EmailMessageDto, ) continue - read_status, _ = mailbox_config.mail_read_statuses.get_or_create( + logger.debug( + "About to create or update mail_read_status for %s (%s)", message.message_id, message.message_number + ) + read_status, created = mailbox_config.mail_read_statuses.update_or_create( message_id=message.message_id, message_num=message.message_number, - mail_data=mail_data, + defaults={ + "mail_data": mail_data, + }, + ) + logger.debug( + "%s read_status for %s (%s)", + "Created" if created else "Updated", + message.message_id, + message.message_number, ) mark_status = MarkStatus(message, read_status) From 8236a8b6010a5d4939ae7c41e6fbd47ada2d383b Mon Sep 17 00:00:00 2001 From: Kevin Carrogan Date: Tue, 24 Dec 2024 14:40:47 +0000 Subject: [PATCH 24/38] By default only run unit tests --- pytest.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytest.ini b/pytest.ini index e135ddfd..da3efc85 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,4 +1,4 @@ [pytest] DJANGO_SETTINGS_MODULE = conf.settings addopts = - -s --ignore=django_db_anonymiser + -s --ignore=django_db_anonymiser -k "not integration and not end_to_end and not anonymised_db_dumps" From 444a1d7e267fdb3ea7989d735cda5f9f87cbfc69 Mon Sep 17 00:00:00 2001 From: Kevin Carrogan Date: Tue, 24 Dec 2024 14:53:34 +0000 Subject: [PATCH 25/38] Add integration tests --- mail_servers/tests/test_integration.py | 42 ++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 mail_servers/tests/test_integration.py diff --git a/mail_servers/tests/test_integration.py b/mail_servers/tests/test_integration.py new file mode 100644 index 00000000..b0b0f9c8 --- /dev/null +++ b/mail_servers/tests/test_integration.py @@ -0,0 +1,42 @@ +from django.test import TestCase, override_settings +from parameterized import parameterized + +from mail_servers.utils import get_mail_server + + +@override_settings( + MAIL_SERVERS={ + "spire_to_dit": { + "HOSTNAME": "spire-to-dit-mailserver", + "POP3_PORT": 1110, + "AUTHENTICATION_CLASS": "mail_servers.auth.BasicAuthentication", + "AUTHENTICATION_OPTIONS": { + "user": "spire-to-dit-user", + "password": "password", + }, + }, + "hmrc_to_dit": { + "HOSTNAME": "hmrc-to-dit-mailserver", + "POP3_PORT": 1110, + "AUTHENTICATION_CLASS": "mail_servers.auth.BasicAuthentication", + "AUTHENTICATION_OPTIONS": { + "user": "hmrc-to-dit-user", + "password": "password", + }, + }, + }, +) +class IntegrationTests(TestCase): + @parameterized.expand( + [ + "spire_to_dit", + "hmrc_to_dit", + ] + ) + def test_mail_server(self, mail_server_key): + mail_server = get_mail_server(mail_server_key) + with mail_server.connect_to_pop3() as pop3_connection: + self.assertEqual( + pop3_connection.welcome.decode("ascii"), + "+OK Mailpit POP3 server", + ) From 401efa863251c6b843067cbd3f860fa57c0d156c Mon Sep 17 00:00:00 2001 From: Kevin Carrogan Date: Tue, 24 Dec 2024 15:09:30 +0000 Subject: [PATCH 26/38] Split out tests into separate jobs --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 3d62ea06..e50560bf 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -88,7 +88,7 @@ jobs: name: Set Environment File command: cp docker.env .env - run: - name: Run tests + name: Run tests on Postgres 13 command: docker compose run --build lite-hmrc-intg pipenv run pytest -k end_to_end tests_e2e_postgres13: From afccf54152c5cc3bb6bfae3f25ee2bedef906891 Mon Sep 17 00:00:00 2001 From: Kevin Carrogan Date: Tue, 24 Dec 2024 15:18:13 +0000 Subject: [PATCH 27/38] Add integration tests to circleci --- .circleci/config.yml | 48 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index e50560bf..7df59ac4 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -114,6 +114,52 @@ jobs: name: Run tests command: docker compose -f docker-compose.yml -f docker-compose.postgres13.yml run --build lite-hmrc-intg pipenv run pytest -k end_to_end + tests_integration: + machine: + image: ubuntu-2204:current + docker_layer_caching: true + resource_class: small + environment: + PIPENV_YES: 1 + steps: + - checkout + - attach_workspace: + at: ~/repo/tmp + - run: + name: Git Submodule Checkout + command: | + git submodule sync + git submodule update --init + - run: + name: Set Environment File + command: cp docker.env .env + - run: + name: Run tests + command: docker compose run --build lite-hmrc-intg pipenv run pytest -k integration + + tests_integration_postgres13: + machine: + image: ubuntu-2204:current + docker_layer_caching: true + resource_class: small + environment: + PIPENV_YES: 1 + steps: + - checkout + - attach_workspace: + at: ~/repo/tmp + - run: + name: Git Submodule Checkout + command: | + git submodule sync + git submodule update --init + - run: + name: Set Environment File + command: cp docker.env .env + - run: + name: Run tests + command: docker compose -f docker-compose.yml -f docker-compose.postgres13.yml run --build lite-hmrc-intg pipenv run pytest -k integration + check_migrations: docker: - image: cimg/python:3.9.18 @@ -195,6 +241,8 @@ workflows: - tests_postgres13 - tests_e2e - tests_e2e_postgres13 + - tests_integration + - tests_integration_postgres13 - check_migrations - linting - check_coverage: From 6b8252c2d3c5ab480e5893fb2dbb7b6d4b39b863 Mon Sep 17 00:00:00 2001 From: Kevin Carrogan Date: Tue, 24 Dec 2024 16:17:54 +0000 Subject: [PATCH 28/38] Add integration tests for message iterator --- mailboxes/tests/test_integration.py | 112 ++++++++++++++++++++++++++++ mailboxes/utils.py | 1 + 2 files changed, 113 insertions(+) create mode 100644 mailboxes/tests/test_integration.py diff --git a/mailboxes/tests/test_integration.py b/mailboxes/tests/test_integration.py new file mode 100644 index 00000000..9b5484f7 --- /dev/null +++ b/mailboxes/tests/test_integration.py @@ -0,0 +1,112 @@ +from unittest.mock import ANY + +import requests +from django.conf import settings +from django.test import TestCase, override_settings + +from mail.libraries.email_message_dto import EmailMessageDto +from mail_servers.utils import get_mail_server +from mailboxes.models import MailboxConfig +from mailboxes.utils import get_message_iterator + + +@override_settings( + MAIL_SERVERS={ + "spire_to_dit": { + "HOSTNAME": "spire-to-dit-mailserver", + "POP3_PORT": 1110, + "AUTHENTICATION_CLASS": "mail_servers.auth.BasicAuthentication", + "AUTHENTICATION_OPTIONS": { + "user": "spire-to-dit-user", + "password": "password", + }, + }, + "hmrc_to_dit": { + "HOSTNAME": "hmrc-to-dit-mailserver", + "POP3_PORT": 1110, + "AUTHENTICATION_CLASS": "mail_servers.auth.BasicAuthentication", + "AUTHENTICATION_OPTIONS": { + "user": "hmrc-to-dit-user", + "password": "password", + }, + }, + }, + SPIRE_FROM_ADDRESS="from.spire@example.com", # /PS-IGNORE + HMRC_TO_DIT_REPLY_ADDRESS="from.hmrc@example.com", # /PS-IGNORE +) +class IntegrationTests(TestCase): + def setUp(self): + super().setUp() + + requests.delete("http://spire-to-dit-mailserver:8025/api/v1/messages") + requests.delete("http://hmrc-to-dit-mailserver:8025/api/v1/messages") + + def test_get_message_iterator_creates_mailbox_config(self): + self.assertFalse(MailboxConfig.objects.exists()) + + mail_server = get_mail_server("spire_to_dit") + messages = get_message_iterator(mail_server) + self.assertEqual(list(messages), []) + self.assertEqual(MailboxConfig.objects.filter(username="spire-to-dit-user").count(), 1) + + mail_server = get_mail_server("hmrc_to_dit") + messages = get_message_iterator(mail_server) + self.assertEqual(list(messages), []) + self.assertEqual(MailboxConfig.objects.filter(username="hmrc-to-dit-user").count(), 1) + + def test_get_message_iterator_creates_read_statuses(self): + requests.post( + "http://spire-to-dit-mailserver:8025/api/v1/send", + json={ + "From": {"Email": settings.SPIRE_FROM_ADDRESS, "Name": "SPIRE"}, + "Subject": "abc_xyz_nnn_yyy_1111_datetime", + "To": [{"Email": "lite@example.com", "Name": "LITE"}], # /PS-IGNORE + }, + ) + mail_server = get_mail_server("spire_to_dit") + messages, _ = zip(*get_message_iterator(mail_server)) + self.assertEqual( + list(messages), + [ + EmailMessageDto( + run_number=1111, + sender='"SPIRE" ', # /PS-IGNORE + receiver='"LITE" ', # /PS-IGNORE + date=ANY, + subject="abc_xyz_nnn_yyy_1111_datetime", + body="", + attachment=[None, None], + raw_data=ANY, + ) + ], + ) + mailbox_config = MailboxConfig.objects.get(username="spire-to-dit-user") + self.assertEqual(mailbox_config.mail_read_statuses.count(), 1) + + requests.post( + "http://hmrc-to-dit-mailserver:8025/api/v1/send", + json={ + "From": {"Email": settings.HMRC_TO_DIT_REPLY_ADDRESS, "Name": "HMRC"}, + "Subject": "abc_xyz_nnn_yyy_2222_datetime", + "To": [{"Email": "lite@example.com", "Name": "LITE"}], # /PS-IGNORE + }, + ) + mail_server = get_mail_server("hmrc_to_dit") + messages, _ = zip(*get_message_iterator(mail_server)) + self.assertEqual( + list(messages), + [ + EmailMessageDto( + run_number=2222, + sender='"HMRC" ', # /PS-IGNORE + receiver='"LITE" ', # /PS-IGNORE + date=ANY, + subject="abc_xyz_nnn_yyy_2222_datetime", + body="", + attachment=[None, None], + raw_data=ANY, + ) + ], + ) + mailbox_config = MailboxConfig.objects.get(username="hmrc-to-dit-user") + self.assertEqual(mailbox_config.mail_read_statuses.count(), 1) diff --git a/mailboxes/utils.py b/mailboxes/utils.py index 27f24a9f..9676647b 100644 --- a/mailboxes/utils.py +++ b/mailboxes/utils.py @@ -103,6 +103,7 @@ def binary_data(self): def get_messages(pop3_connection, mailbox_config, max_limit): _, mails, _ = pop3_connection.list() + logger.debug(mails) mails = mails[-max_limit:] for mail in mails: message_number = get_message_number(mail) From c5be73e21eac03749b6bbee56d418c2e0d83943c Mon Sep 17 00:00:00 2001 From: Kevin Carrogan Date: Tue, 24 Dec 2024 16:48:18 +0000 Subject: [PATCH 29/38] Set better default value for database url --- local.env | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/local.env b/local.env index 7326205f..ca131bc6 100644 --- a/local.env +++ b/local.env @@ -1,5 +1,5 @@ DEBUG=True -DATABASE_URL=postgres://postgres:password@localhost:5432/postgres +DATABASE_URL=postgres://postgres:password@lite-hmrc-postgres:5432/postgres DJANGO_SECRET_KEY=DJANGO_SECRET_KEY EMAIL_HOSTNAME=localhost EMAIL_USER=outbox-user From ad95aceb926aef2f9d541e597a82a35aea2ff5b0 Mon Sep 17 00:00:00 2001 From: Kevin Carrogan Date: Tue, 24 Dec 2024 16:53:02 +0000 Subject: [PATCH 30/38] Set distinct URL values in env and docker compose --- docker-compose.yml | 3 +++ local.env | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index 9395ce1e..1cadcc1f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -14,6 +14,9 @@ services: container_name: "lite-hmrc-intg" env_file: - .env + environment: + - DATABASE_URL=postgres://postgres:password@lite-hmrc-postgres:5432/postgres + - REDIS_BASE_URL=redis://hmrc-redis:6379 build: . platform: linux/amd64 volumes: diff --git a/local.env b/local.env index ca131bc6..7326205f 100644 --- a/local.env +++ b/local.env @@ -1,5 +1,5 @@ DEBUG=True -DATABASE_URL=postgres://postgres:password@lite-hmrc-postgres:5432/postgres +DATABASE_URL=postgres://postgres:password@localhost:5432/postgres DJANGO_SECRET_KEY=DJANGO_SECRET_KEY EMAIL_HOSTNAME=localhost EMAIL_USER=outbox-user From 6353d46882d2e04f0c4ddf242e6e08529cae7efd Mon Sep 17 00:00:00 2001 From: Kevin Carrogan Date: Tue, 24 Dec 2024 17:53:34 +0000 Subject: [PATCH 31/38] Use docker.env file when running integration tests --- docker-compose.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 1cadcc1f..9395ce1e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -14,9 +14,6 @@ services: container_name: "lite-hmrc-intg" env_file: - .env - environment: - - DATABASE_URL=postgres://postgres:password@lite-hmrc-postgres:5432/postgres - - REDIS_BASE_URL=redis://hmrc-redis:6379 build: . platform: linux/amd64 volumes: From 3551c71a2940b121c0b9a440319be0e67daa74a4 Mon Sep 17 00:00:00 2001 From: Kevin Carrogan Date: Tue, 31 Dec 2024 11:09:14 +0000 Subject: [PATCH 32/38] Run all tests locally and ensure circleci doesn't run integration tests with unit tests --- .circleci/config.yml | 4 ++-- pytest.ini | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 7df59ac4..e01b9a3f 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -41,7 +41,7 @@ jobs: - setup - run: name: Run tests - command: pipenv run pytest --cov=. --cov-config=.coveragerc -k "not end_to_end" + command: pipenv run pytest --cov=. --cov-config=.coveragerc -k "not end_to_end and not integration" - run: name: Rename coverage file command: mkdir coverage-output && cp .coverage coverage-output/.coverage @@ -66,7 +66,7 @@ jobs: - setup - run: name: Run tests on Postgres 13 - command: pipenv run pytest -k "not end_to_end" + command: pipenv run pytest -k "not end_to_end and not integration" tests_e2e: machine: diff --git a/pytest.ini b/pytest.ini index da3efc85..e135ddfd 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,4 +1,4 @@ [pytest] DJANGO_SETTINGS_MODULE = conf.settings addopts = - -s --ignore=django_db_anonymiser -k "not integration and not end_to_end and not anonymised_db_dumps" + -s --ignore=django_db_anonymiser From 1b51caac4555806c7d4b2db9f068220fe6adcea6 Mon Sep 17 00:00:00 2001 From: Kevin Carrogan Date: Tue, 31 Dec 2024 11:13:13 +0000 Subject: [PATCH 33/38] Remove unused mock_hmrc app --- codecov.yml | 2 - conf/settings.py | 19 --- conf/urls.py | 4 - docker.env | 8 -- local.env | 7 - mail/libraries/routing_controller.py | 6 - .../libraries/test_routing_controller.py | 41 +----- mock_hmrc/__init__.py | 0 mock_hmrc/apps.py | 13 -- mock_hmrc/data_processors.py | 126 ------------------ mock_hmrc/enums.py | 24 ---- mock_hmrc/handler.py | 52 -------- mock_hmrc/migrations/0001_initial.py | 62 --------- .../migrations/0002_auto_20210505_1513.py | 26 ---- mock_hmrc/migrations/__init__.py | 0 mock_hmrc/models.py | 25 ---- mock_hmrc/tasks.py | 17 --- mock_hmrc/tests.py | 3 - mock_hmrc/urls.py | 13 -- mock_hmrc/views.py | 14 -- 20 files changed, 2 insertions(+), 460 deletions(-) delete mode 100644 mock_hmrc/__init__.py delete mode 100644 mock_hmrc/apps.py delete mode 100644 mock_hmrc/data_processors.py delete mode 100644 mock_hmrc/enums.py delete mode 100644 mock_hmrc/handler.py delete mode 100644 mock_hmrc/migrations/0001_initial.py delete mode 100644 mock_hmrc/migrations/0002_auto_20210505_1513.py delete mode 100644 mock_hmrc/migrations/__init__.py delete mode 100644 mock_hmrc/models.py delete mode 100644 mock_hmrc/tasks.py delete mode 100644 mock_hmrc/tests.py delete mode 100644 mock_hmrc/urls.py delete mode 100644 mock_hmrc/views.py diff --git a/codecov.yml b/codecov.yml index 5c0f3953..78621592 100644 --- a/codecov.yml +++ b/codecov.yml @@ -12,5 +12,3 @@ coverage: if_ci_failed: error only_pulls: false patch: off -ignore: - - "mock_hmrc" diff --git a/conf/settings.py b/conf/settings.py index 6d77e8d0..58d67ef2 100644 --- a/conf/settings.py +++ b/conf/settings.py @@ -92,11 +92,6 @@ WSGI_APPLICATION = "conf.wsgi.application" - -ENABLE_MOCK_HMRC_SERVICE = env.bool("ENABLE_MOCK_HMRC_SERVICE", default=False) -if ENABLE_MOCK_HMRC_SERVICE: - INSTALLED_APPS += ["mock_hmrc.apps.MockHmrcConfig"] - # Which system identifier to use in licence requests to HMRC's CHIEF system. # LITE (and SPIRE) uses "SPIRE". ICMS uses "ILBDOTI". CHIEF_SOURCE_SYSTEM = env("CHIEF_SOURCE_SYSTEM", default="SPIRE") @@ -114,11 +109,6 @@ OUTGOING_EMAIL_USER = env("OUTGOING_EMAIL_USER") -MOCK_HMRC_EMAIL_PASSWORD = env("MOCK_HMRC_EMAIL_PASSWORD", default="") -MOCK_HMRC_EMAIL_HOSTNAME = env("MOCK_HMRC_EMAIL_HOSTNAME", default="") -MOCK_HMRC_EMAIL_USER = env("MOCK_HMRC_EMAIL_USER", default="") -MOCK_HMRC_EMAIL_POP3_PORT = env("MOCK_HMRC_EMAIL_POP3_PORT", default=None) - SPIRE_STANDIN_EMAIL_PASSWORD = env("SPIRE_STANDIN_EMAIL_PASSWORD", default="") SPIRE_STANDIN_EMAIL_HOSTNAME = env("SPIRE_STANDIN_EMAIL_HOSTNAME", default="") SPIRE_STANDIN_EMAIL_USER = env("SPIRE_STANDIN_EMAIL_USER", default="") @@ -166,15 +156,6 @@ "tenant_id": AZURE_AUTH_TENANT_ID, }, }, - "mock_hmrc": { - "HOSTNAME": MOCK_HMRC_EMAIL_HOSTNAME, - "POP3_PORT": MOCK_HMRC_EMAIL_POP3_PORT, - "AUTHENTICATION_CLASS": "mail_servers.auth.BasicAuthentication", - "AUTHENTICATION_OPTIONS": { - "user": MOCK_HMRC_EMAIL_USER, - "password": MOCK_HMRC_EMAIL_PASSWORD, - }, - }, } TIME_TESTS = env.bool("TIME_TESTS", default=False) diff --git a/conf/urls.py b/conf/urls.py index 6e176db8..6d0e8c72 100644 --- a/conf/urls.py +++ b/conf/urls.py @@ -14,7 +14,6 @@ 2. Add a URL to urlpatterns: path('blog/', include('blog.urls')) """ -from django.conf import settings from django.contrib import admin from django.urls import include, path @@ -27,6 +26,3 @@ path("pingdom/ping.xml", HealthCheckPingdomView.as_view(), name="healthcheck-pingdom"), path("service-available-check/", ServiceAvailableHealthCheckView.as_view(), name="service-available-check"), ] - -if settings.ENABLE_MOCK_HMRC_SERVICE: # pragma: no cover - urlpatterns += [path("mock-hmrc/", include("mock_hmrc.urls"))] diff --git a/docker.env b/docker.env index a6bed92b..f991ea5f 100644 --- a/docker.env +++ b/docker.env @@ -21,8 +21,6 @@ HAWK_AUTHENTICATION_ENABLED=False LITE_HMRC_INTEGRATION_HAWK_KEY=LITE_HMRC_INTEGRATION_HAWK_KEY LITE_API_HAWK_KEY=LITE_API_HAWK_KEY -ENABLE_MOCK_HMRC_SERVICE=False - INCOMING_EMAIL_PASSWORD=password INCOMING_EMAIL_HOSTNAME=localhost INCOMING_EMAIL_USER=test_user @@ -37,12 +35,6 @@ HMRC_TO_DIT_EMAIL_SMTP_PORT=587 OUTGOING_EMAIL_USER=test_user -MOCK_HMRC_EMAIL_PASSWORD=password -MOCK_HMRC_EMAIL_HOSTNAME=localhost -MOCK_HMRC_EMAIL_USER=test_user -MOCK_HMRC_EMAIL_POP3_PORT=995 -MOCK_HMRC_EMAIL_SMTP_PORT=587 - SPIRE_STANDIN_EMAIL_HOSTNAME=localhost SPIRE_STANDIN_EMAIL_USER=test_spire SPIRE_STANDIN_EMAIL_PASSWORD=password diff --git a/local.env b/local.env index 7326205f..52fc8456 100644 --- a/local.env +++ b/local.env @@ -20,8 +20,6 @@ HAWK_AUTHENTICATION_ENABLED=False LITE_HMRC_INTEGRATION_HAWK_KEY=LITE_HMRC_INTEGRATION_HAWK_KEY LITE_API_HAWK_KEY=LITE_API_HAWK_KEY -ENABLE_MOCK_HMRC_SERVICE=False - CHIEF_SOURCE_SYSTEM=SPIRE INCOMING_EMAIL_PASSWORD=password @@ -36,11 +34,6 @@ HMRC_TO_DIT_EMAIL_POP3_PORT=995 OUTGOING_EMAIL_USER=test_user -MOCK_HMRC_EMAIL_PASSWORD=password -MOCK_HMRC_EMAIL_HOSTNAME=localhost -MOCK_HMRC_EMAIL_USER=test_user -MOCK_HMRC_EMAIL_POP3_PORT=995 - SPIRE_STANDIN_EMAIL_HOSTNAME=localhost SPIRE_STANDIN_EMAIL_USER=test_spire SPIRE_STANDIN_EMAIL_PASSWORD=password diff --git a/mail/libraries/routing_controller.py b/mail/libraries/routing_controller.py index 4567f140..4a501c44 100644 --- a/mail/libraries/routing_controller.py +++ b/mail/libraries/routing_controller.py @@ -51,12 +51,6 @@ def get_hmrc_to_dit_mailserver() -> MailServer: return mail_server -def get_mock_hmrc_mailserver() -> MailServer: - mail_server = get_mail_server("mock") - - return mail_server - - def check_and_route_emails(): logger.info("Checking for emails") hmrc_to_dit_server = get_hmrc_to_dit_mailserver() diff --git a/mail/tests/libraries/test_routing_controller.py b/mail/tests/libraries/test_routing_controller.py index f8124b73..81dfb2fe 100644 --- a/mail/tests/libraries/test_routing_controller.py +++ b/mail/tests/libraries/test_routing_controller.py @@ -3,12 +3,8 @@ from django.test import override_settings -from mail.libraries.routing_controller import ( - get_hmrc_to_dit_mailserver, - get_mock_hmrc_mailserver, - get_spire_to_dit_mailserver, -) -from mail_servers.auth import BasicAuthentication, ModernAuthentication +from mail.libraries.routing_controller import get_hmrc_to_dit_mailserver, get_spire_to_dit_mailserver +from mail_servers.auth import ModernAuthentication class RoutingControllerTest(unittest.TestCase): @@ -85,36 +81,3 @@ def test_get_hmrc_to_dit_mailserver(self, mock_MailServer, mock_ModernAuthentica ) self.assertEqual(hmrc_to_dit_mailserver, mock_MailServer()) - - @patch( - "mail_servers.auth.BasicAuthentication", - spec=BasicAuthentication, - ) - @patch("mail_servers.utils.MailServer") - @override_settings( - MAIL_SERVERS={ - "mock": { - "HOSTNAME": "host.example.com", - "POP3_PORT": 123, - "AUTHENTICATION_CLASS": "mail_servers.auth.BasicAuthentication", - "AUTHENTICATION_OPTIONS": { - "user": "mock.hmrc.email.user@example.com", - "password": "shhh", - }, - }, - } - ) - def test_get_mock_hmrc_mailserver(self, mock_MailServer, mock_BasicAuthentication): - mock_hmrc_mailserver = get_mock_hmrc_mailserver() - - mock_BasicAuthentication.assert_called_with( - user="mock.hmrc.email.user@example.com", - password="shhh", - ) - mock_MailServer.assert_called_with( - mock_BasicAuthentication(), - hostname="host.example.com", - pop3_port=123, - ) - - self.assertEqual(mock_hmrc_mailserver, mock_MailServer()) diff --git a/mock_hmrc/__init__.py b/mock_hmrc/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/mock_hmrc/apps.py b/mock_hmrc/apps.py deleted file mode 100644 index ed9d5b0c..00000000 --- a/mock_hmrc/apps.py +++ /dev/null @@ -1,13 +0,0 @@ -from django.apps import AppConfig -from django.db.models.signals import post_migrate - - -class MockHmrcConfig(AppConfig): - name = "mock_hmrc" - - @classmethod - def initialize_send_licence_usage_figures_to_lite_api(cls, **kwargs): - pass - - def ready(self): - post_migrate.connect(self.initialize_send_licence_usage_figures_to_lite_api, sender=self) diff --git a/mock_hmrc/data_processors.py b/mock_hmrc/data_processors.py deleted file mode 100644 index 43ecee38..00000000 --- a/mock_hmrc/data_processors.py +++ /dev/null @@ -1,126 +0,0 @@ -import json -import logging - -from django.conf import settings -from django.utils import timezone - -from mail.enums import ExtractTypeEnum -from mail.libraries.data_processors import convert_dto_data_for_serialization -from mail.libraries.email_message_dto import EmailMessageDto -from mail.libraries.helpers import get_extract_type -from mail.libraries.routing_controller import send -from mock_hmrc import enums, models - -MOCK_HMRC_SUPPORTED_EXTRACT_TYPES = [ExtractTypeEnum.LICENCE_DATA] - - -def update_retrieved_email_status(dto, status): - data = {"message_id": dto.message_id, "sender": dto.sender, "status": status} - models.RetrievedMail.objects.get_or_create(**data) - - -def save_hmrc_email_message_data(dto): - extract_type = get_extract_type(dto.subject) - if not extract_type: - update_retrieved_email_status(dto, enums.RetrievedEmailStatusEnum.INVALID) - logging.info("Extract type not supported (%s), skipping", dto.subject) - return None - - data = convert_dto_data_for_serialization(dto, extract_type) - - # ensure there is run number - if data["licence_data"]["source_run_number"] is None: - logging.error("Invalid email received") - update_retrieved_email_status(dto, enums.RetrievedEmailStatusEnum.INVALID) - return None - - hmrc_mail, _ = models.HmrcMail.objects.get_or_create( - extract_type=extract_type, - source_run_number=data["licence_data"]["source_run_number"], - defaults={ - "source": data["licence_data"]["source"], - "edi_filename": data["edi_filename"], - "edi_data": data["edi_data"], - "licence_ids": data["licence_data"]["licence_ids"], - }, - ) - update_retrieved_email_status(dto, enums.RetrievedEmailStatusEnum.VALID) - - return hmrc_mail - - -def build_reply_pending_filename(filename): - reply_identifier = "" - filename = filename.split("_") - extract_identifier = filename[3] - - if extract_identifier == "licenceData": - reply_identifier = "licenceReply" - - # provide default value if it is unknown - if reply_identifier == "": - reply_identifier = "licenceUnknown" - - filename[3] = reply_identifier - reply_filename = "_".join(filename) - - return reply_filename - - -def build_reply_pending_file_data(mail): - """ - Builds a reply file that looks like an actual reply from HMRC - - Since we are simulating this only some of the cases are included. - Eg rejected and error cases are not considered. - """ - line_num = 1 - reply_created_time = timezone.localtime().strftime("%Y%m%d%H%M") - data = [] - data.append(f"{line_num}\\fileHeader\\CHIEF\\SPIRE\\licenceReply\\{reply_created_time}\\{mail.source_run_number}") - accepted_ids = json.loads(mail.licence_ids) - for index, id in enumerate(accepted_ids, start=1): - line_num += index - data.append(f"{line_num}\\accepted\\{id}") - - data.append(f"{line_num}\\fileTrailer\\{len(accepted_ids)}\\0\\0") - file_data = "\n".join(data) - - return file_data - - -def build_reply_mail_message_dto(mail) -> EmailMessageDto: - if mail.extract_type not in MOCK_HMRC_SUPPORTED_EXTRACT_TYPES: - return None - - sender = settings.MOCK_HMRC_EMAIL_USER - receiver = settings.SPIRE_STANDIN_EMAIL_USER - attachment = [ - build_reply_pending_filename(mail.edi_filename), - build_reply_pending_file_data(mail), - ] - - return EmailMessageDto( - run_number=mail.source_run_number, - sender=sender, - receiver=receiver, - subject=attachment[0], - body=None, - attachment=attachment, - raw_data=None, - ) - - -def to_email_message_dto_from(hmrc_mail): - if hmrc_mail.status == enums.HmrcMailStatusEnum.ACCEPTED: - return build_reply_mail_message_dto(hmrc_mail) - - return None - - -def send_reply(email): - message_to_send = to_email_message_dto_from(email) - if message_to_send: - send(message_to_send) - email.status = enums.HmrcMailStatusEnum.REPLIED - email.save() diff --git a/mock_hmrc/enums.py b/mock_hmrc/enums.py deleted file mode 100644 index 2cc5c364..00000000 --- a/mock_hmrc/enums.py +++ /dev/null @@ -1,24 +0,0 @@ -class RetrievedEmailStatusEnum: - VALID = "valid" - INVALID = "invalid" - - choices = [ - (VALID, "Valid"), - (INVALID, "Invalid"), - ] - - -class HmrcMailStatusEnum: - ACCEPTED = "accepted" - REJECTED = "rejected" - REPLIED = "replied" - RETRY = "retry" - FAILED = "failed" - - choices = [ - (ACCEPTED, "Accepted"), - (REJECTED, "Rejected"), - (REPLIED, "Replied"), - (RETRY, "Retry"), - (FAILED, "Failed"), - ] diff --git a/mock_hmrc/handler.py b/mock_hmrc/handler.py deleted file mode 100644 index c7a96215..00000000 --- a/mock_hmrc/handler.py +++ /dev/null @@ -1,52 +0,0 @@ -import logging - -from django.conf import settings - -from mail.libraries.helpers import to_hmrc_mail_message_dto -from mail.libraries.routing_controller import get_mock_hmrc_mailserver -from mailboxes.utils import get_message_id -from mock_hmrc import enums, models -from mock_hmrc.data_processors import save_hmrc_email_message_data, send_reply - - -def get_hmrc_email_message_dto(server): - with server.connect_to_pop3() as conn: - _, mails, _ = conn.list() - message_ids = [get_message_id(line.decode(settings.DEFAULT_ENCODING)) for line in mails] - - if models.RetrievedMail.objects.count(): - recent_mail = models.RetrievedMail.objects.all().order_by("message_id").last() - target_id = None - for msg_id in message_ids: - if msg_id > recent_mail.message_id: - target_id = msg_id - break - else: - target_id = message_ids[0] - - dto = None - if target_id: - email = conn.retr(target_id) - dto = to_hmrc_mail_message_dto(target_id, email) - - return dto - - -def select_email_to_reply(): - return models.HmrcMail.objects.filter(status=enums.HmrcMailStatusEnum.ACCEPTED).order_by("created_at").first() - - -def parse_and_reply_emails(): - server = get_mock_hmrc_mailserver() - email_dto = get_hmrc_email_message_dto(server) - if email_dto: - email_instance = save_hmrc_email_message_data(email_dto) - if email_instance: - send_reply(email_instance) - else: - logging.info("No emails to process or invalid") - - # If no new emails, check if there are any pending replies - email = select_email_to_reply() - if email: - send_reply(email) diff --git a/mock_hmrc/migrations/0001_initial.py b/mock_hmrc/migrations/0001_initial.py deleted file mode 100644 index 22ca5446..00000000 --- a/mock_hmrc/migrations/0001_initial.py +++ /dev/null @@ -1,62 +0,0 @@ -# Generated by Django 2.2.17 on 2021-02-01 16:24 - -import uuid - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - initial = True - - dependencies = [] - - operations = [ - migrations.CreateModel( - name="HmrcMail", - fields=[ - ("id", models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)), - ("created_at", models.DateTimeField(auto_now_add=True)), - ( - "status", - models.TextField( - choices=[ - ("accepted", "Accepted"), - ("rejected", "Rejected"), - ("replied", "Replied"), - ("retry", "Retry"), - ("failed", "Failed"), - ], - default="accepted", - ), - ), - ( - "extract_type", - models.TextField( - choices=[ - ("usage_update", "Usage update"), - ("usage_reply", "Usage Reply"), - ("licence_update", "Licence Update"), - ("licence_reply", "Licence Reply"), - ("licence_data", "Licence Data"), - ], - null=True, - ), - ), - ("source", models.TextField()), - ("source_run_number", models.IntegerField()), - ("edi_filename", models.TextField()), - ("edi_data", models.TextField()), - ("licence_ids", models.TextField()), - ], - ), - migrations.CreateModel( - name="RetrievedMail", - fields=[ - ("id", models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)), - ("message_id", models.TextField()), - ("sender", models.TextField()), - ("status", models.TextField(choices=[("valid", "Valid"), ("invalid", "Invalid")], default="invalid")), - ], - ), - ] diff --git a/mock_hmrc/migrations/0002_auto_20210505_1513.py b/mock_hmrc/migrations/0002_auto_20210505_1513.py deleted file mode 100644 index b601b5e7..00000000 --- a/mock_hmrc/migrations/0002_auto_20210505_1513.py +++ /dev/null @@ -1,26 +0,0 @@ -# Generated by Django 2.2.18 on 2021-05-05 15:13 - -from django.db import migrations, models - - -class Migration(migrations.Migration): - - dependencies = [ - ("mock_hmrc", "0001_initial"), - ] - - operations = [ - migrations.AlterField( - model_name="hmrcmail", - name="extract_type", - field=models.TextField( - choices=[ - ("usage_data", "Usage Data"), - ("usage_reply", "Usage Reply"), - ("licence_reply", "Licence Reply"), - ("licence_data", "Licence Data"), - ], - null=True, - ), - ), - ] diff --git a/mock_hmrc/migrations/__init__.py b/mock_hmrc/migrations/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/mock_hmrc/models.py b/mock_hmrc/models.py deleted file mode 100644 index 89ee8eba..00000000 --- a/mock_hmrc/models.py +++ /dev/null @@ -1,25 +0,0 @@ -import uuid - -from django.db import models - -from mail.enums import ExtractTypeEnum -from mock_hmrc.enums import HmrcMailStatusEnum, RetrievedEmailStatusEnum - - -class RetrievedMail(models.Model): - id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) - message_id = models.TextField() - sender = models.TextField() - status = models.TextField(choices=RetrievedEmailStatusEnum.choices, default=RetrievedEmailStatusEnum.INVALID) - - -class HmrcMail(models.Model): - id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) - created_at = models.DateTimeField(auto_now_add=True, blank=True) - status = models.TextField(choices=HmrcMailStatusEnum.choices, default=HmrcMailStatusEnum.ACCEPTED) - extract_type = models.TextField(choices=ExtractTypeEnum.choices, null=True) - source = models.TextField() - source_run_number = models.IntegerField() - edi_filename = models.TextField() - edi_data = models.TextField() - licence_ids = models.TextField() diff --git a/mock_hmrc/tasks.py b/mock_hmrc/tasks.py deleted file mode 100644 index 0138af66..00000000 --- a/mock_hmrc/tasks.py +++ /dev/null @@ -1,17 +0,0 @@ -import logging - -from mock_hmrc.handler import parse_and_reply_emails - - -def handle_replies(): - logging.info("Polling mock HMRC inbox for updates") - - try: - parse_and_reply_emails() - except Exception as exc: # noqa - logging.error( - "An unexpected error occurred when polling inbox for updates -> %s", - type(exc).__name__, - exc_info=True, - ) - raise exc diff --git a/mock_hmrc/tests.py b/mock_hmrc/tests.py deleted file mode 100644 index 7ce503c2..00000000 --- a/mock_hmrc/tests.py +++ /dev/null @@ -1,3 +0,0 @@ -from django.test import TestCase - -# Create your tests here. diff --git a/mock_hmrc/urls.py b/mock_hmrc/urls.py deleted file mode 100644 index c8fa083b..00000000 --- a/mock_hmrc/urls.py +++ /dev/null @@ -1,13 +0,0 @@ -from django.conf import settings -from django.urls import path - -from mock_hmrc import views - -app_name = "mock_hmrc" - -urlpatterns = [] - -if settings.DEBUG: - urlpatterns = [ - path("handle-replies/", views.HandleReplies.as_view(), name="handle_replies"), - ] diff --git a/mock_hmrc/views.py b/mock_hmrc/views.py deleted file mode 100644 index dc4fc63c..00000000 --- a/mock_hmrc/views.py +++ /dev/null @@ -1,14 +0,0 @@ -from django.http import HttpResponse -from rest_framework import status -from rest_framework.views import APIView - -from mock_hmrc.tasks import handle_replies - -# Create your views here. - - -class HandleReplies(APIView): - def get(self, request): - handle_replies() - - return HttpResponse(status=status.HTTP_200_OK) From 310693faebe4917dc604842584cedcc0dcce885e Mon Sep 17 00:00:00 2001 From: Kevin Carrogan Date: Tue, 31 Dec 2024 11:14:38 +0000 Subject: [PATCH 34/38] Remove untested and unused management command --- .../management/commands/mark_mails_as_read.py | 57 ------------------- 1 file changed, 57 deletions(-) delete mode 100644 mail/management/commands/mark_mails_as_read.py diff --git a/mail/management/commands/mark_mails_as_read.py b/mail/management/commands/mark_mails_as_read.py deleted file mode 100644 index 7b951e9a..00000000 --- a/mail/management/commands/mark_mails_as_read.py +++ /dev/null @@ -1,57 +0,0 @@ -from django.conf import settings -from django.core.management.base import BaseCommand - -from mail import enums, models -from mail_servers.servers import MailServer -from mailboxes.utils import get_message_header, get_message_id - - -class Command(BaseCommand): - def add_arguments(self, parser): - parser.add_argument( - "--mailbox", type=str, nargs="?", help="Email address from which the mails are to be read and mark as read" - ) - parser.add_argument("--password", type=str, nargs="?", help="Password for the mailbox") - parser.add_argument("--dry_run", type=str, nargs="?", help="Is it a test run", default="True") - - def handle(self, *args, **options): - email_user = options.pop("mailbox") - email_password = options.pop("password") - dry_run = options.pop("dry_run") - - server = MailServer( - hostname=settings.EMAIL_HOSTNAME, - user=email_user, - password=email_password, - pop3_port=995, - smtp_port=587, - ) - - with server.connect_to_pop3() as pop3_connection: - self.stdout.write(self.style.SUCCESS(f"Connected to {email_user}")) - - _, mails, _ = pop3_connection.list() - self.stdout.write(self.style.SUCCESS(f"Found {len(mails)} in the inbox")) - - mail_message_ids = [ - get_message_id(*get_message_header(pop3_connection, m.decode(settings.DEFAULT_ENCODING))) for m in mails - ] - self.stdout.write( - self.style.SUCCESS(f"List of Message-Id and message numbers for existing mails:\n{mail_message_ids}") - ) - - if dry_run.lower() == "false": - mailbox_config, _ = models.MailboxConfig.objects.get_or_create(username=email_user) - - for message_id, message_num in mail_message_ids: - if message_id is None: - continue - - read_status, _ = models.MailReadStatus.objects.get_or_create( - message_id=message_id, - message_num=message_num, - mailbox=mailbox_config, - ) - read_status.status = enums.MailReadStatuses.READ - read_status.save() - self.stdout.write(self.style.SUCCESS(f"Message-Id {message_id} marked as Read")) From 3fce8fbc818d4b12be27adcb33a7d04a23787b97 Mon Sep 17 00:00:00 2001 From: Kevin Carrogan Date: Tue, 31 Dec 2024 11:22:29 +0000 Subject: [PATCH 35/38] Add tests for find_mail_of --- mail/tests/libraries/test_mailbox_service.py | 27 ++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 mail/tests/libraries/test_mailbox_service.py diff --git a/mail/tests/libraries/test_mailbox_service.py b/mail/tests/libraries/test_mailbox_service.py new file mode 100644 index 00000000..6fd4ec2b --- /dev/null +++ b/mail/tests/libraries/test_mailbox_service.py @@ -0,0 +1,27 @@ +from django.test import TestCase + +from mail.enums import ExtractTypeEnum, ReceptionStatusEnum +from mail.libraries.mailbox_service import find_mail_of +from mail.tests.factories import MailFactory + + +class FindMailOfTests(TestCase): + def test_finding_mail(self): + mail = MailFactory(status=ReceptionStatusEnum.REPLY_SENT, extract_type=ExtractTypeEnum.LICENCE_DATA) + + found_mail = find_mail_of( + [ExtractTypeEnum.LICENCE_DATA, ExtractTypeEnum.LICENCE_REPLY], + ReceptionStatusEnum.REPLY_SENT, + ) + + self.assertEqual(mail, found_mail) + + def test_mail_not_found(self): + MailFactory(status=ReceptionStatusEnum.REPLY_SENT, extract_type=ExtractTypeEnum.LICENCE_DATA) + + found_mail = find_mail_of( + [ExtractTypeEnum.USAGE_DATA, ExtractTypeEnum.USAGE_REPLY], + ReceptionStatusEnum.REPLY_SENT, + ) + + self.assertIsNone(found_mail) From efb1856fd5eca9c506255be2bba942f374a38d53 Mon Sep 17 00:00:00 2001 From: Kevin Carrogan Date: Tue, 31 Dec 2024 12:08:44 +0000 Subject: [PATCH 36/38] Swap out mocking mailservers for overriding settings --- mail/tests/test_end_to_end.py | 57 +++++++++++++---------------------- 1 file changed, 21 insertions(+), 36 deletions(-) diff --git a/mail/tests/test_end_to_end.py b/mail/tests/test_end_to_end.py index f73ec05b..3f2fc294 100644 --- a/mail/tests/test_end_to_end.py +++ b/mail/tests/test_end_to_end.py @@ -14,8 +14,6 @@ from mail.enums import ExtractTypeEnum, ReceptionStatusEnum, SourceEnum from mail.libraries.helpers import read_file from mail.models import LicenceData, LicencePayload, Mail, UsageData -from mail_servers.auth import BasicAuthentication -from mail_servers.servers import MailServer pytestmark = pytest.mark.django_db @@ -42,6 +40,27 @@ def set_settings(settings, outgoing_email_user): settings.LITE_API_URL = "https://lite.example.com" + settings.MAIL_SERVERS = { + "spire_to_dit": { + "HOSTNAME": "spire-to-dit-mailserver", + "POP3_PORT": 1110, + "AUTHENTICATION_CLASS": "mail_servers.auth.BasicAuthentication", + "AUTHENTICATION_OPTIONS": { + "user": "spire-to-dit-user", + "password": "password", + }, + }, + "hmrc_to_dit": { + "HOSTNAME": "hmrc-to-dit-mailserver", + "POP3_PORT": 1110, + "AUTHENTICATION_CLASS": "mail_servers.auth.BasicAuthentication", + "AUTHENTICATION_OPTIONS": { + "user": "hmrc-to-dit-user", + "password": "password", + }, + }, + } + @pytest.fixture() def hmrc_to_dit_mailserver_api_url(): @@ -100,40 +119,6 @@ def usage_data_file_body(usage_data_file_name): return read_file(f"mail/tests/files/end_to_end/{usage_data_file_name}", mode="rb") -@pytest.fixture(autouse=True) -def hmrc_to_dit_mailserver(mocker): - auth = BasicAuthentication( - user="hmrc-to-dit-user", - password="password", - ) - hmrc_to_dit_mailserver = MailServer( - auth, - hostname="hmrc-to-dit-mailserver", - pop3_port=1110, - ) - mocker.patch( - "mail.libraries.routing_controller.get_hmrc_to_dit_mailserver", - return_value=hmrc_to_dit_mailserver, - ) - - -@pytest.fixture(autouse=True) -def spire_to_dit_mailserver(mocker): - auth = BasicAuthentication( - user="spire-to-dit-user", - password="password", - ) - spire_to_dit_mailserver = MailServer( - auth, - hostname="spire-to-dit-mailserver", - pop3_port=1110, - ) - mocker.patch( - "mail.libraries.routing_controller.get_spire_to_dit_mailserver", - return_value=spire_to_dit_mailserver, - ) - - def normalise_line_endings(string): return string.replace("\r", "").strip() From 6b2ac82cd45535c2ea20527427b98d40ee64c0a2 Mon Sep 17 00:00:00 2001 From: Kevin Carrogan Date: Tue, 7 Jan 2025 11:51:50 +0000 Subject: [PATCH 37/38] Add comment for update_or_create on read statuses --- mailboxes/utils.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/mailboxes/utils.py b/mailboxes/utils.py index 9676647b..8c5300a6 100644 --- a/mailboxes/utils.py +++ b/mailboxes/utils.py @@ -168,6 +168,11 @@ def get_message_iterator(server: MailServer) -> Iterator[Tuple[EmailMessageDto, logger.debug( "About to create or update mail_read_status for %s (%s)", message.message_id, message.message_number ) + + # The `mail_data` really shouldn't be changing here so the `update` part seems redundant. + # However, even if we read an email that we've read previously we get back some header information that + # does change on every read e.g. a request id and some timestamps. + # Given that the data is changing I chose to allow it to update each time to save the latest headers. read_status, created = mailbox_config.mail_read_statuses.update_or_create( message_id=message.message_id, message_num=message.message_number, From 8135a5d97d0232ac379d5aee2c00ab0e03c705de Mon Sep 17 00:00:00 2001 From: Kevin Carrogan Date: Tue, 7 Jan 2025 16:04:42 +0000 Subject: [PATCH 38/38] Add code comments and annotations to mailbox utils --- mail_servers/servers.py | 3 +- mailboxes/tests/test_utils.py | 6 +- mailboxes/utils.py | 127 ++++++++++++++++++++++++---------- 3 files changed, 94 insertions(+), 42 deletions(-) diff --git a/mail_servers/servers.py b/mail_servers/servers.py index b7d451a1..c4169e81 100644 --- a/mail_servers/servers.py +++ b/mail_servers/servers.py @@ -1,6 +1,7 @@ import logging import poplib import smtplib +from collections.abc import Iterator from contextlib import contextmanager from django.conf import settings @@ -28,7 +29,7 @@ def __eq__(self, other): return self.hostname == other.hostname and self.auth == other.auth and self.pop3_port == other.pop3_port @contextmanager - def connect_to_pop3(self): + def connect_to_pop3(self) -> Iterator[poplib.POP3_SSL]: logger.info("Establishing a pop3 connection to %s:%s", self.hostname, self.pop3_port) pop3_connection = poplib.POP3_SSL(self.hostname, self.pop3_port, timeout=60) logger.info("Pop3 connection established") diff --git a/mailboxes/tests/test_utils.py b/mailboxes/tests/test_utils.py index 1d06e04b..f40f79e4 100644 --- a/mailboxes/tests/test_utils.py +++ b/mailboxes/tests/test_utils.py @@ -42,11 +42,7 @@ def test_get_message_header(self): class GetMessageIdTests(SimpleTestCase): - @override_settings( - SPIRE_FROM_ADDRESS="spire.from.address@example.com", # /PS-IGNORE - HMRC_TO_DIT_REPLY_ADDRESS="hmrc.to.dit.reply.address@example.com", # /PS-IGNORE - ) - def test_get_message_id_not_from_valid_email(self): + def test_get_message_id(self): message = Message() message_id_header = Header("<123456@example.com>") # /PS-IGNORE message["Message-ID"] = message_id_header diff --git a/mailboxes/utils.py b/mailboxes/utils.py index 8c5300a6..a028ba8b 100644 --- a/mailboxes/utils.py +++ b/mailboxes/utils.py @@ -1,8 +1,10 @@ import logging +from collections.abc import Iterator from email.headerregistry import Address +from email.message import Message from email.parser import BytesHeaderParser from email.utils import parseaddr -from poplib import error_proto +from poplib import POP3_SSL, error_proto from typing import Callable, Iterator, Tuple from django.conf import settings @@ -12,12 +14,13 @@ from mail.libraries.helpers import to_mail_message_dto from mail_servers.servers import MailServer from mailboxes.enums import MailReadStatuses -from mailboxes.models import MailboxConfig +from mailboxes.models import MailboxConfig, MailReadStatus logger = logging.getLogger(__name__) -def get_message_header(pop3_connection, msg_num): +def get_message_header(pop3_connection: POP3_SSL, msg_num: str) -> Message: + """Retrieves the message header for the message number of the pop3 mailbox""" # retrieves the header information # 0 indicates the number of lines of message to be retrieved after the header _, msg_header, _ = pop3_connection.top(msg_num, 0) @@ -28,19 +31,29 @@ def get_message_header(pop3_connection, msg_num): return header -def get_message_id(msg_header): +def get_message_id(msg_header: Message) -> str: + """Extract the message-id from the given message header and returns the + username part of the message-id + + Given the message-id `XXXX <123456@example.com>` this would return `123456`. + """ _, message_id = parseaddr(str(msg_header["message-id"])) message_id = Address(addr_spec=message_id).username return message_id -def get_message_number(listing_message): +def get_message_number(listing_message: bytes) -> str: + """Extracts the message number from a line of the response `POP3.list`. + + Given the response `22 12345` it would return `22`. + """ listing_msg = listing_message.decode(settings.DEFAULT_ENCODING) msg_num, _, _ = listing_msg.partition(" ") return msg_num -def get_read_messages(mailbox_config): +def get_read_messages(mailbox_config: MailboxConfig) -> set[str]: + """Retrives a set of all of the already read message_ids in a given mailbox.""" mail_read_statuses = mailbox_config.mail_read_statuses.filter( status__in=[ MailReadStatuses.READ, @@ -51,7 +64,45 @@ def get_read_messages(mailbox_config): return read_message_ids -def is_from_valid_sender(message, valid_addresses): +class MailboxMessage: + """This is a wrapper for the various bits of data we retrieve about messages + from a mailbox. + + It ties together the various bits of data about the message instead of having + to call various functions and pass in the same data over and over again. + """ + + def __init__( + self, + pop3_connection: POP3_SSL, + mailbox_config: MailboxConfig, + message_number: str, + ): + self.pop3_connection = pop3_connection + self.mailbox_config = mailbox_config + self.message_number = message_number + + @cached_property + def message_header(self) -> Message: + return get_message_header(self.pop3_connection, self.message_number) + + @cached_property + def message_id(self) -> str: + message_id = get_message_id(self.message_header) + logger.info("Extracted Message-Id as %s for the message_num %s", message_id, self.message_number) + return message_id + + @cached_property + def mail_data(self) -> tuple[bytes, list[bytes], int]: + return self.pop3_connection.retr(self.message_number) + + @cached_property + def binary_data(self) -> bytes: + return b"\n".join(self.mail_data[1]) + + +def is_from_valid_sender(message: MailboxMessage, valid_addresses: list[str]) -> bool: + """Checks whether an email message is one of the valid addresses supplied.""" _, from_address = parseaddr(str(message.message_header["From"])) logger.info("Found from address %s", from_address) valid_addresses = [address.replace("From: ", "") for address in valid_addresses] @@ -60,11 +111,17 @@ def is_from_valid_sender(message, valid_addresses): class MarkStatus: - def __init__(self, message, read_status): + """This is a class that is to be used as a function call and acts like + a callback so that a specific MailReadStatus object can be updated without + the caller having to get to the object itself. + """ + + def __init__(self, message: MailboxMessage, read_status: MailReadStatus): self.message = message self.read_status = read_status - def __call__(self, status): + def __call__(self, status: MailReadStatuses): + """Sets the status of the captured MailReadStatus object.""" logger.info( "Marking message_id %s with message_num %s from %r as %s", self.message.message_id, @@ -76,32 +133,15 @@ def __call__(self, status): self.read_status.save() -class MailboxMessage: - def __init__(self, pop3_connection, mailbox_config, message_number): - self.pop3_connection = pop3_connection - self.mailbox_config = mailbox_config - self.message_number = message_number - - @cached_property - def message_header(self): - return get_message_header(self.pop3_connection, self.message_number) - - @cached_property - def message_id(self): - message_id = get_message_id(self.message_header) - logger.info("Extracted Message-Id as %s for the message_num %s", message_id, self.message_number) - return message_id - - @cached_property - def mail_data(self): - return self.pop3_connection.retr(self.message_number) +def get_messages( + pop3_connection: POP3_SSL, + mailbox_config: MailboxConfig, + max_limit: int, +) -> Iterator[MailboxMessage]: + """Returns an iterator of objects containing information from the given mailbox. - @cached_property - def binary_data(self): - return b"\n".join(self.mail_data[1]) - - -def get_messages(pop3_connection, mailbox_config, max_limit): + The number of which is limited by `max_limit`. + """ _, mails, _ = pop3_connection.list() logger.debug(mails) mails = mails[-max_limit:] @@ -111,11 +151,15 @@ def get_messages(pop3_connection, mailbox_config, max_limit): yield message -def is_read(message, read_messages): +def is_read(message: MailboxMessage, read_messages: set[str]) -> bool: + """Given a set of already read message id numbers this will return whether + the passed in message is part of that group. + """ return message.message_id in read_messages -def get_message_dto(message): +def get_message_dto(message: MailboxMessage): + """Returns the mail DTO object for the given mailbox message""" mail_data = message.mail_data logger.info( "Retrieved message_id %s with message_num %s from %s", @@ -130,6 +174,17 @@ def get_message_dto(message): def get_message_iterator(server: MailServer) -> Iterator[Tuple[EmailMessageDto, Callable]]: + """Returns all of the unread message DTOs for the mailbox associated to the + mail server. + + The DTO is paired with a callback that allows the status of the mail to be + marked with a given status. + + This also does a check to ensure that only valid emails from known senders + are returned. + + When a mail is read it will store that mail data in the MailReadStatus. + """ mailbox_config, _ = MailboxConfig.objects.get_or_create(username=server.user) read_messages = get_read_messages(mailbox_config)