Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CTID-18] Provide user information for uploads #147

Merged
merged 14 commits into from
Feb 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions src/tram/tram/management/commands/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import logging
import time

from django.contrib.auth.models import User
from django.core.files import File
from django.core.management.base import BaseCommand

Expand Down Expand Up @@ -52,11 +53,15 @@ def add_arguments(self, parser):
def handle(self, *args, **options):
subcommand = options["subcommand"]

user, created = User.objects.get_or_create(username="pipeline (manual)")
if created:
logger.info(f"Created User '{user.username}' to handle manual submissions")

if subcommand == ADD:
filepath = options["file"]
with open(filepath, "rb") as f:
django_file = File(f)
db_models.DocumentProcessingJob.create_from_file(django_file)
db_models.DocumentProcessingJob.create_from_file(django_file, user)
logger.info("Added file to ML Pipeline: %s", filepath)
return

Expand All @@ -66,7 +71,7 @@ def handle(self, *args, **options):
with open(filepath, "r") as f:
res = serializers.ReportExportSerializer(data=json.load(f))
res.is_valid(raise_exception=True)
res.save()
res.save(created_by=user)
return

model = options["model"]
Expand Down
1 change: 1 addition & 0 deletions src/tram/tram/ml/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,7 @@ def _save_report(self, report, document):
document=document,
text=report.text,
ml_model=self.model.__class__.__name__,
created_by=document.created_by,
)
rpt.save()

Expand Down
15 changes: 12 additions & 3 deletions src/tram/tram/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,11 +88,20 @@ class DocumentProcessingJob(models.Model):
updated_on = models.DateTimeField(auto_now=True)

@classmethod
def create_from_file(cls, f):
def create_from_file(cls, f, u):
mehaase marked this conversation as resolved.
Show resolved Hide resolved
"""
Creates a document processing job for the ML pipeline based on a file
submission by an authenticated user.

:param f: An instance of django.core.files.File
:param u: An instance of django.contrib.auth.models.User
:return: An instance of tram.models.DocumentProcessingJob
"""
assert isinstance(f, File)
doc = Document(docfile=f)
assert isinstance(u, User)
doc = Document(docfile=f, created_by=u)
doc.save()
dpj = DocumentProcessingJob(document=doc)
dpj = DocumentProcessingJob(document=doc, created_by=u)
dpj.save()
return dpj

Expand Down
4 changes: 2 additions & 2 deletions src/tram/tram/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,10 +194,10 @@ def create(self, validated_data):
with transaction.atomic():
report = db_models.Report.objects.create(
name=validated_data["name"],
document=None,
document=validated_data.get("document"),
text=validated_data["text"],
ml_model=validated_data["ml_model"],
created_by=None, # TODO: Get user from session
created_by=validated_data.get("created_by"),
)

for sentence in validated_data["sentences"]:
Expand Down
4 changes: 2 additions & 2 deletions src/tram/tram/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,13 +100,13 @@ def upload(request):
"application/vnd.openxmlformats-officedocument.wordprocessingml.document", # .docx files
"text/plain", # .txt files
):
DocumentProcessingJob.create_from_file(request.FILES["file"])
DocumentProcessingJob.create_from_file(request.FILES["file"], request.user)
elif file_content_type in ("application/json",): # .json files
json_data = json.loads(request.FILES["file"].read())
res = serializers.ReportExportSerializer(data=json_data)

if res.is_valid():
res.save()
res.save(created_by=request.user)
else:
return HttpResponseBadRequest(res.errors)
else:
Expand Down
16 changes: 14 additions & 2 deletions tests/tram/test_base.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pytest
from constance import config
from django.contrib.auth.models import User
from django.core.files import File

import tram.models as db_models
Expand All @@ -11,6 +12,15 @@ def dummy_model():
return base.DummyModel()


@pytest.fixture
def user():
user = User.objects.create_superuser(username="testuser")
user.set_password("12345")
user.save()
yield user
user.delete()


class TestSentence:
def test_sentence_stores_no_mapping(self):
# Arrange
Expand Down Expand Up @@ -309,7 +319,7 @@ def test_process_job_produces_valid_report(self):
assert report.text is not None
assert len(report.sentences) > 0

def test_process_job_handles_image_based_pdf(self):
def test_process_job_handles_image_based_pdf(self, user):
"""
Some PDFs can be saved such that the text is stored as images and therefore
cannot be extracted from the PDF. Windows PDF Printer behaves this way.
Expand All @@ -320,7 +330,9 @@ def test_process_job_handles_image_based_pdf(self):
# Arrange
image_pdf = "tests/data/GroupIB_Big_Airline_Heist_APT41.pdf"
with open(image_pdf, "rb") as f:
processing_job = db_models.DocumentProcessingJob.create_from_file(File(f))
processing_job = db_models.DocumentProcessingJob.create_from_file(
File(f), user
)
job_id = processing_job.id
model_manager = base.ModelManager("dummy")

Expand Down
1 change: 1 addition & 0 deletions tests/tram/test_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from tram.models import AttackObject, Sentence


@pytest.mark.django_db
class TestPipeline:
def test_add_calls_create_from_file(self, mocker):
# Arrange
Expand Down
6 changes: 3 additions & 3 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ passenv = GITHUB_*
[testenv:tram]
description = Run Pytest
commands =
python -m nltk.downloader punkt
python -m nltk.downloader wordnet
python -m nltk.downloader omw-1.4
python -c "import nltk; nltk.download('punkt')"
python -c "import nltk; nltk.download('wordnet')"
python -c "import nltk; nltk.download('omw-1.4')"
pytest --cov=src/ --cov=src/tram --cov=src/tram/tram/ml --cov=src/tram/tram/management/commands --cov-report=xml

[testenv:bandit]
Expand Down