Skip to content

Commit

Permalink
Merge pull request #153 from center-for-threat-informed-defense/setup…
Browse files Browse the repository at this point in the history
…-logging

refactor(logs): Set up Django logging to replace existing print()'s
  • Loading branch information
mehaase authored Feb 15, 2022
2 parents 36ba0b0 + 8dff25e commit 748b518
Show file tree
Hide file tree
Showing 5 changed files with 60 additions and 18 deletions.
5 changes: 4 additions & 1 deletion src/scripts/reformat_training_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
"""

import json
import logging
import os
import sys
from datetime import datetime
Expand All @@ -43,6 +44,8 @@
from tram.serializers import ReportExportSerializer # noqa: E402

outfile = "data/training/bootstrap-training-data.json"
logger = logging.getLogger(__name__)


ATTACK_LOOKUP = { # A mapping of attack descriptions to technique IDs
"drive-by compromise": "T1189",
Expand Down Expand Up @@ -289,7 +292,7 @@ def main():
with open(outfile, "w") as f:
json.dump(res.initial_data, f, indent=4)

print("Wrote data to %s" % outfile)
logger.info("Wrote data to %s" % outfile)


if __name__ == "__main__":
Expand Down
10 changes: 6 additions & 4 deletions src/tram/tram/management/commands/attackdata.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import logging

from django.conf import settings
from django.core.management.base import BaseCommand
Expand All @@ -7,6 +8,7 @@

LOAD = "load"
CLEAR = "clear"
logger = logging.getLogger(__name__)


STIX_TYPE_TO_ATTACK_TYPE = {
Expand Down Expand Up @@ -35,7 +37,7 @@ def add_arguments(self, parser):

def clear_attack_data(self):
deleted = AttackObject.objects.all().delete()
print(f"Deleted {deleted[0]} Attack objects")
logger.info("Deleted %d Attack objects", deleted[0])

def create_attack_object(self, obj):
for external_reference in obj["external_references"]:
Expand Down Expand Up @@ -106,11 +108,11 @@ def load_attack_data(self, filepath):
except ValueError: # Value error means unsupported object type
skipped_stats[obj_type] = skipped_stats.get(obj_type, 0) + 1

print(f"Load stats for {filepath}:")
logger.info("Load stats for %s:", filepath)
for k, v in created_stats.items():
print(f"\tCreated {v} {k} objects")
logger.info("Created %s %s objects", v, k)
for k, v in skipped_stats.items():
print(f"\tSkipped {v} {k} objects")
logger.info("Skipped %s %s objects", v, k)

def handle(self, *args, **options):
subcommand = options["subcommand"]
Expand Down
12 changes: 7 additions & 5 deletions src/tram/tram/management/commands/pipeline.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import logging
import time

from django.core.files import File
Expand All @@ -12,6 +13,7 @@
RUN = "run"
TRAIN = "train"
LOAD_TRAINING_DATA = "load-training-data"
logger = logging.getLogger(__name__)


class Command(BaseCommand):
Expand Down Expand Up @@ -55,12 +57,12 @@ def handle(self, *args, **options):
with open(filepath, "rb") as f:
django_file = File(f)
db_models.DocumentProcessingJob.create_from_file(django_file)
self.stdout.write(f"Added file to ML Pipeline: {filepath}")
logger.info("Added file to ML Pipeline: %s", filepath)
return

if subcommand == LOAD_TRAINING_DATA:
filepath = options["file"]
self.stdout.write(f"Loading training data from {filepath}")
logger.info("Loading training data from %s", filepath)
with open(filepath, "r") as f:
res = serializers.ReportExportSerializer(data=json.load(f))
res.is_valid(raise_exception=True)
Expand All @@ -71,13 +73,13 @@ def handle(self, *args, **options):
model_manager = base.ModelManager(model)

if subcommand == RUN:
self.stdout.write(f"Running ML Pipeline with Model: {model}")
logger.info("Running ML Pipeline with Model: %s", model)
return model_manager.run_model(options["run_forever"])
elif subcommand == TRAIN:
self.stdout.write(f"Training ML Model: {model}")
logger.info("Training ML Model: %s", model)
start = time.time()
return_value = model_manager.train_model()
end = time.time()
elapsed = end - start
self.stdout.write(f"Trained ML model in {elapsed} seconds")
logger.info("Trained ML model in %0.3f seconds", elapsed)
return return_value
17 changes: 9 additions & 8 deletions src/tram/tram/ml/base.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import logging
import pathlib
import pickle
import re
import time
import traceback
from abc import ABC, abstractmethod
from datetime import datetime, timezone
from io import BytesIO
Expand All @@ -27,6 +27,8 @@
# The word model is overloaded in this scope, so a prefix is necessary
from tram import models as db_models

logger = logging.getLogger(__name__)


class Sentence(object):
def __init__(self, text, order, mappings):
Expand Down Expand Up @@ -346,10 +348,10 @@ def __init__(self, model):
model_filepath = self.get_model_filepath(model_class)
if path.exists(model_filepath):
self.model = model_class.load_from_file(model_filepath)
print("%s loaded from %s" % (model_class.__name__, model_filepath))
logger.info("%s loaded from %s", model_class.__name__, model_filepath)
else:
self.model = model_class()
print("%s loaded from __init__" % model_class.__name__)
logger.info("%s loaded from __init__", model_class.__name__)

def _save_report(self, report, document):
rpt = db_models.Report(
Expand Down Expand Up @@ -393,19 +395,18 @@ def run_model(self, run_forever=False):
).order_by("created_on")
for job in jobs:
filename = job.document.docfile.name
print("Processing Job #%d: %s" % (job.id, filename))
logger.info("Processing Job #%d: %s", job.id, filename)
try:
report = self.model.process_job(job)
with transaction.atomic():
self._save_report(report, job.document)
job.delete()
print("Created report %s" % report.name)
logger.info("Created report %s", report.name)
except Exception as ex:
job.status = "error"
job.message = str(ex)
job.save()
print(f"Failed to create report for {filename}.")
print(traceback.format_exc())
logger.exception("Failed to create report for %s.", filename)

if not run_forever:
return
Expand All @@ -420,7 +421,7 @@ def train_model(self):
self.model.test()
filepath = self.get_model_filepath(self.model.__class__)
self.model.save_to_file(filepath)
print("Trained model saved to %s" % filepath)
logger.info("Trained model saved to %s" % filepath)
return

@staticmethod
Expand Down
34 changes: 34 additions & 0 deletions src/tram/tram/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,40 @@

DATA_UPLOAD_MAX_NUMBER_FIELDS = None

LOGGING = {
"version": 1,
"disable_existing_loggers": False,
"formatters": {
"tram_formatter": {
"format": "[{asctime}] {levelname} [{name}] {message}",
"datefmt": "%Y-%m-%d %H:%M:%S",
"style": "{",
},
},
"handlers": {
"tram_handler": {
"class": "logging.StreamHandler",
"formatter": "tram_formatter",
},
},
"loggers": {
"root": {
"handlers": ["tram_handler"],
"level": "WARNING",
},
"django": {
"handlers": ["tram_handler"],
"level": "INFO",
"propagate": False,
},
"tram": {
"handlers": ["tram_handler"],
"level": "INFO",
"propagate": False,
},
},
}

# Application definition

INSTALLED_APPS = [
Expand Down

0 comments on commit 748b518

Please sign in to comment.