Skip to content

Commit

Permalink
gt: refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
AlyaGomaa committed Nov 13, 2024
1 parent 1af1da4 commit d4098fd
Showing 1 changed file with 13 additions and 13 deletions.
26 changes: 13 additions & 13 deletions parsers/ground_truth.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import sys
import traceback
from pprint import pp

import utils.timestamp_handler
from typing import (
Expand Down Expand Up @@ -160,14 +161,16 @@ def extract_label_from_line(self, line:str) -> str:
:param line: a zeek tab separated line
:return: malicious, benign or unknown
"""
pattern = r"Malicious[\s\t]+"
if findall(pattern, line):
return 'malicious'

pattern = r"Benign[\s\t]+"
if findall(pattern, line):
return 'benign'
patterns = {
r"Malicious[\s\t]+": 'malicious',
r"Benign[\s\t]+": 'benign',
r"Background[\s\t]+": 'background',
}

for pattern, label in patterns.items():
if findall(pattern, line):
return label

return 'unknown'

def update_labels_ctr(self, label: str):
Expand Down Expand Up @@ -255,9 +258,8 @@ def extract_fields(self, line: str) -> Tuple[Union[bool,dict], str]:
return False, "Invalid flow"

try:
if flow[0] == "unknown":
label = "background" if "Background" in line else ""
return False, f"Unsupported flow label {label}"
if flow[0] in ("unknown", "background"):
return False, f"Unsupported flow label {flow[0]}"

return {
'label': flow[0],
Expand Down Expand Up @@ -391,15 +393,13 @@ def parse_file(self, filename: str):
line_number = 0
while line := gt_file.readline():
line_number += 1

# skip comments
if line.startswith('#'):
continue

flow, err = self.extract_fields(line)
if not flow:
# self.log(f"{err}. Skipping flow at line: ",
# line_number,
# error=True)
continue

tw_registration_stats: dict = self.register_timewindow(
Expand Down

0 comments on commit d4098fd

Please sign in to comment.