From ee612c6c21e20806d223f29d993517636686f0b9 Mon Sep 17 00:00:00 2001 From: Emmanuelle Vargas-Gonzalez Date: Wed, 9 Feb 2022 21:45:27 -0500 Subject: [PATCH 1/8] provide current user (from web request) to populate `created_by` for manual submissions using the pipeline create a generic user --- src/tram/tram/management/commands/pipeline.py | 9 +++++++-- src/tram/tram/ml/base.py | 1 + src/tram/tram/models.py | 7 ++++--- src/tram/tram/serializers.py | 2 +- src/tram/tram/views.py | 4 ++-- 5 files changed, 15 insertions(+), 8 deletions(-) diff --git a/src/tram/tram/management/commands/pipeline.py b/src/tram/tram/management/commands/pipeline.py index 2cffc3b019..67874c68fa 100644 --- a/src/tram/tram/management/commands/pipeline.py +++ b/src/tram/tram/management/commands/pipeline.py @@ -1,6 +1,7 @@ import json import time +from django.contrib.auth.models import User from django.core.files import File from django.core.management.base import BaseCommand @@ -50,11 +51,15 @@ def add_arguments(self, parser): def handle(self, *args, **options): subcommand = options["subcommand"] + user, created = User.objects.get_or_create(username="pipeline (manual)") + if created: + self.stdout.write(f"Created User '{user.username}' to handle manual submissions") + if subcommand == ADD: filepath = options["file"] with open(filepath, "rb") as f: django_file = File(f) - db_models.DocumentProcessingJob.create_from_file(django_file) + db_models.DocumentProcessingJob.create_from_file(django_file, user) self.stdout.write(f"Added file to ML Pipeline: {filepath}") return @@ -64,7 +69,7 @@ def handle(self, *args, **options): with open(filepath, "r") as f: res = serializers.ReportExportSerializer(data=json.load(f)) res.is_valid(raise_exception=True) - res.save() + res.save(created_by=user) return model = options["model"] diff --git a/src/tram/tram/ml/base.py b/src/tram/tram/ml/base.py index 080b4d8ecd..4f1f8f2458 100644 --- a/src/tram/tram/ml/base.py +++ b/src/tram/tram/ml/base.py @@ -351,6 +351,7 @@ def _save_report(self, report, document): document=document, text=report.text, ml_model=self.model.__class__.__name__, + created_by=document.created_by, ) rpt.save() diff --git a/src/tram/tram/models.py b/src/tram/tram/models.py index 60367ad9a7..238dbfefc4 100644 --- a/src/tram/tram/models.py +++ b/src/tram/tram/models.py @@ -88,11 +88,12 @@ class DocumentProcessingJob(models.Model): updated_on = models.DateTimeField(auto_now=True) @classmethod - def create_from_file(cls, f): + def create_from_file(cls, f, u): assert isinstance(f, File) - doc = Document(docfile=f) + assert isinstance(u, User) + doc = Document(docfile=f, created_by=u) doc.save() - dpj = DocumentProcessingJob(document=doc) + dpj = DocumentProcessingJob(document=doc, created_by=u) dpj.save() return dpj diff --git a/src/tram/tram/serializers.py b/src/tram/tram/serializers.py index f17e73bdb5..0ecf23579a 100644 --- a/src/tram/tram/serializers.py +++ b/src/tram/tram/serializers.py @@ -197,7 +197,7 @@ def create(self, validated_data): document=None, text=validated_data["text"], ml_model=validated_data["ml_model"], - created_by=None, # TODO: Get user from session + created_by=validated_data["created_by"], ) for sentence in validated_data["sentences"]: diff --git a/src/tram/tram/views.py b/src/tram/tram/views.py index eff5d995c6..8e657228ca 100644 --- a/src/tram/tram/views.py +++ b/src/tram/tram/views.py @@ -99,13 +99,13 @@ def upload(request): "text/html", # .html files "application/vnd.openxmlformats-officedocument.wordprocessingml.document", # .docx files ): - DocumentProcessingJob.create_from_file(request.FILES["file"]) + DocumentProcessingJob.create_from_file(request.FILES["file"], request.user) elif file_content_type in ("application/json",): # .json files json_data = json.loads(request.FILES["file"].read()) res = serializers.ReportExportSerializer(data=json_data) if res.is_valid(): - res.save() + res.save(created_by=request.user) else: return HttpResponseBadRequest(res.errors) else: From 6581ef5efc52140e70fa23a5c4a4d62adbb58ab2 Mon Sep 17 00:00:00 2001 From: Emmanuelle Vargas-Gonzalez Date: Thu, 10 Feb 2022 07:43:37 -0500 Subject: [PATCH 2/8] formatting lines --- src/tram/tram/management/commands/pipeline.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/tram/tram/management/commands/pipeline.py b/src/tram/tram/management/commands/pipeline.py index 67874c68fa..634c4cc919 100644 --- a/src/tram/tram/management/commands/pipeline.py +++ b/src/tram/tram/management/commands/pipeline.py @@ -53,7 +53,9 @@ def handle(self, *args, **options): user, created = User.objects.get_or_create(username="pipeline (manual)") if created: - self.stdout.write(f"Created User '{user.username}' to handle manual submissions") + self.stdout.write( + f"Created User '{user.username}' to handle manual submissions" + ) if subcommand == ADD: filepath = options["file"] From 1e0032dbe8d899c97f18155fdfa5993f3e6f8316 Mon Sep 17 00:00:00 2001 From: Emmanuelle Vargas-Gonzalez Date: Thu, 10 Feb 2022 07:45:29 -0500 Subject: [PATCH 3/8] revert file to old style triggers warnings every time you run it --- tox.ini | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tox.ini b/tox.ini index 2c69a1e328..58cd146299 100644 --- a/tox.ini +++ b/tox.ini @@ -12,9 +12,9 @@ passenv = GITHUB_* [testenv:tram] description = Run Pytest commands = - python -m nltk.downloader punkt - python -m nltk.downloader wordnet - python -m nltk.downloader omw-1.4 + python -c "import nltk; nltk.download('punkt')" + python -c "import nltk; nltk.download('wordnet')" + python -c "import nltk; nltk.download('omw-1.4')" pytest --cov=src/ --cov=src/tram --cov=src/tram/tram/ml --cov=src/tram/tram/management/commands --cov-report=xml [testenv:bandit] From 3fb530a5238179b265ddbc79c4728487887d803e Mon Sep 17 00:00:00 2001 From: Emmanuelle Vargas-Gonzalez Date: Thu, 10 Feb 2022 08:00:28 -0500 Subject: [PATCH 4/8] add missing pieces to tests, update ReportExportSerializer fields --- src/tram/tram/serializers.py | 4 ++-- tests/tram/test_base.py | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/tram/tram/serializers.py b/src/tram/tram/serializers.py index 0ecf23579a..66a1e34bcb 100644 --- a/src/tram/tram/serializers.py +++ b/src/tram/tram/serializers.py @@ -194,10 +194,10 @@ def create(self, validated_data): with transaction.atomic(): report = db_models.Report.objects.create( name=validated_data["name"], - document=None, + document=validated_data.get("document"), text=validated_data["text"], ml_model=validated_data["ml_model"], - created_by=validated_data["created_by"], + created_by=validated_data.get("created_by"), ) for sentence in validated_data["sentences"]: diff --git a/tests/tram/test_base.py b/tests/tram/test_base.py index 48a0e5fb70..595d8b26a4 100644 --- a/tests/tram/test_base.py +++ b/tests/tram/test_base.py @@ -1,5 +1,6 @@ import pytest from constance import config +from django.contrib.auth.models import User from django.core.files import File import tram.models as db_models @@ -319,8 +320,9 @@ def test_process_job_handles_image_based_pdf(self): """ # Arrange image_pdf = "tests/data/GroupIB_Big_Airline_Heist_APT41.pdf" + dummy_user = User.objects.get_or_create(username="dummy-user")[0] with open(image_pdf, "rb") as f: - processing_job = db_models.DocumentProcessingJob.create_from_file(File(f)) + processing_job = db_models.DocumentProcessingJob.create_from_file(File(f), dummy_user) job_id = processing_job.id model_manager = base.ModelManager("dummy") From 12976568e6bc53bf4f6797244a148f77baf34186 Mon Sep 17 00:00:00 2001 From: Emmanuelle Vargas-Gonzalez Date: Thu, 10 Feb 2022 08:14:16 -0500 Subject: [PATCH 5/8] add test user fixture to test --- tests/tram/test_base.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/tests/tram/test_base.py b/tests/tram/test_base.py index 595d8b26a4..016b415ee1 100644 --- a/tests/tram/test_base.py +++ b/tests/tram/test_base.py @@ -12,6 +12,15 @@ def dummy_model(): return base.DummyModel() +@pytest.fixture +def user(): + user = User.objects.create_superuser(username="testuser") + user.set_password("12345") + user.save() + yield user + user.delete() + + class TestSentence: def test_sentence_stores_no_mapping(self): # Arrange @@ -310,7 +319,7 @@ def test_process_job_produces_valid_report(self): assert report.text is not None assert len(report.sentences) > 0 - def test_process_job_handles_image_based_pdf(self): + def test_process_job_handles_image_based_pdf(self, user): """ Some PDFs can be saved such that the text is stored as images and therefore cannot be extracted from the PDF. Windows PDF Printer behaves this way. @@ -320,9 +329,8 @@ def test_process_job_handles_image_based_pdf(self): """ # Arrange image_pdf = "tests/data/GroupIB_Big_Airline_Heist_APT41.pdf" - dummy_user = User.objects.get_or_create(username="dummy-user")[0] with open(image_pdf, "rb") as f: - processing_job = db_models.DocumentProcessingJob.create_from_file(File(f), dummy_user) + processing_job = db_models.DocumentProcessingJob.create_from_file(File(f), user) job_id = processing_job.id model_manager = base.ModelManager("dummy") From 32da34ffaadd7b5d108b1a54576389a35bd86615 Mon Sep 17 00:00:00 2001 From: Emmanuelle Vargas-Gonzalez Date: Thu, 10 Feb 2022 09:05:10 -0500 Subject: [PATCH 6/8] update tests --- tests/tram/test_base.py | 4 +++- tests/tram/test_commands.py | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/tram/test_base.py b/tests/tram/test_base.py index 016b415ee1..c93483336a 100644 --- a/tests/tram/test_base.py +++ b/tests/tram/test_base.py @@ -330,7 +330,9 @@ def test_process_job_handles_image_based_pdf(self, user): # Arrange image_pdf = "tests/data/GroupIB_Big_Airline_Heist_APT41.pdf" with open(image_pdf, "rb") as f: - processing_job = db_models.DocumentProcessingJob.create_from_file(File(f), user) + processing_job = db_models.DocumentProcessingJob.create_from_file( + File(f), user + ) job_id = processing_job.id model_manager = base.ModelManager("dummy") diff --git a/tests/tram/test_commands.py b/tests/tram/test_commands.py index 21cf344c31..d819dc83e3 100644 --- a/tests/tram/test_commands.py +++ b/tests/tram/test_commands.py @@ -7,6 +7,7 @@ from tram.models import AttackObject, Sentence +@pytest.mark.django_db class TestPipeline: def test_add_calls_create_from_file(self, mocker): # Arrange From 010b9b157a1d96129cb58784a1bc0a24eb2ed2ba Mon Sep 17 00:00:00 2001 From: Emmanuelle Vargas-Gonzalez Date: Tue, 15 Feb 2022 10:04:09 -0500 Subject: [PATCH 7/8] create docstrings for `create_from_file` --- src/tram/tram/models.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/tram/tram/models.py b/src/tram/tram/models.py index 238dbfefc4..219ba9890f 100644 --- a/src/tram/tram/models.py +++ b/src/tram/tram/models.py @@ -89,6 +89,14 @@ class DocumentProcessingJob(models.Model): @classmethod def create_from_file(cls, f, u): + """ + Creates a document processing job for the ML pipeline based on a file + submission by an authenticated user. + + :param f: An instance of django.core.files.File + :param u: An instance of django.contrib.auth.models.User + :return: An instance of tram.models.DocumentProcessingJob + """ assert isinstance(f, File) assert isinstance(u, User) doc = Document(docfile=f, created_by=u) From 953419b72805dd54fd95bb8c7f7057d21eb1fabc Mon Sep 17 00:00:00 2001 From: Emmanuelle Vargas-Gonzalez Date: Tue, 15 Feb 2022 10:13:52 -0500 Subject: [PATCH 8/8] move `self.stdout.write` call to `logger.info` --- src/tram/tram/management/commands/pipeline.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/tram/tram/management/commands/pipeline.py b/src/tram/tram/management/commands/pipeline.py index ec97cbef6b..4b8d40a7b3 100644 --- a/src/tram/tram/management/commands/pipeline.py +++ b/src/tram/tram/management/commands/pipeline.py @@ -55,9 +55,7 @@ def handle(self, *args, **options): user, created = User.objects.get_or_create(username="pipeline (manual)") if created: - self.stdout.write( - f"Created User '{user.username}' to handle manual submissions" - ) + logger.info(f"Created User '{user.username}' to handle manual submissions") if subcommand == ADD: filepath = options["file"]