From 23baea8a119ebead3a0eb18b42646d1a806fd8a5 Mon Sep 17 00:00:00 2001 From: alya Date: Thu, 26 Sep 2024 22:01:16 +0300 Subject: [PATCH 01/44] flowalerts.conn: use async timer instead of timerthread to avoid hanging threads in memory --- modules/flowalerts/conn.py | 71 +++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 39 deletions(-) diff --git a/modules/flowalerts/conn.py b/modules/flowalerts/conn.py index e8185ef8d..109e5a8cc 100644 --- a/modules/flowalerts/conn.py +++ b/modules/flowalerts/conn.py @@ -1,3 +1,4 @@ +import asyncio import contextlib import ipaddress import json @@ -6,7 +7,6 @@ import validators from modules.flowalerts.dns import DNS -from modules.flowalerts.timer_thread import TimerThread from slips_files.common.abstracts.flowalerts_analyzer import ( IFlowalertsAnalyzer, ) @@ -416,7 +416,9 @@ def check_if_resolution_was_made_by_different_version( pass return False - def check_connection_without_dns_resolution(self, profileid, twid, flow): + async def check_connection_without_dns_resolution( + self, profileid, twid, flow + ): """ Checks if there's a flow to a dstip that has no cached DNS answer """ @@ -458,45 +460,36 @@ def check_connection_without_dns_resolution(self, profileid, twid, flow): if self.db.is_ip_resolved(flow.daddr, 24): return False - if flow.uid not in self.connections_checked_in_conn_dns_timer_thread: - # comes here if we haven't started the timer - # thread for this connection before - # mark this connection as checked - self.connections_checked_in_conn_dns_timer_thread.append(flow.uid) - params = [profileid, twid, flow] - # There is no DNS resolution, but it can be that Slips is - # still reading it from the files. - # To give time to Slips to read all the files and get all the flows - # don't alert a Connection Without DNS until 5 seconds has passed - # in real time from the time of this checking. - timer = TimerThread( - 15, self.check_connection_without_dns_resolution, params - ) - timer.start() - else: - # It means we already checked this conn with the Timer process - # (we waited 15 seconds for the dns to arrive after - # the connection was made) - # but still no dns resolution for it. - # Sometimes the same computer makes requests using - # its ipv4 and ipv6 address, check if this is the case - if self.check_if_resolution_was_made_by_different_version( - profileid, flow.daddr - ): - return False + # There is no DNS resolution, but it can be that Slips is + # still reading it from the files. + # To give time to Slips to read all the files and get all the flows + # don't alert a Connection Without DNS until 15 seconds has passed + # in real time from the time of this checking. + await asyncio.sleep(15) + if self.db.is_ip_resolved(flow.daddr, 24): + return False - if self.is_well_known_org(flow.daddr): - # if the SNI or rDNS of the IP matches a - # well-known org, then this is a FP - return False + # Reaching here means we already waited 15 seconds for the dns + # to arrive after the connection was made, but still no dns + # resolution for it. - self.set_evidence.conn_without_dns(twid, flow) - # This UID will never appear again, so we can remove it and - # free some memory - with contextlib.suppress(ValueError): - self.connections_checked_in_conn_dns_timer_thread.remove( - flow.uid - ) + # Sometimes the same computer makes requests using + # its ipv4 and ipv6 address, check if this is the case + if self.check_if_resolution_was_made_by_different_version( + profileid, flow.daddr + ): + return False + + if self.is_well_known_org(flow.daddr): + # if the SNI or rDNS of the IP matches a + # well-known org, then this is a FP + return False + + self.set_evidence.conn_without_dns(twid, flow) + # This UID will never appear again, so we can remove it and + # free some memory + with contextlib.suppress(ValueError): + self.connections_checked_in_conn_dns_timer_thread.remove(flow.uid) def check_conn_to_port_0(self, profileid, twid, flow): """ From c66ea4cdd0ed9e324217b1df715ecd35262006ad Mon Sep 17 00:00:00 2001 From: alya Date: Thu, 26 Sep 2024 22:48:21 +0300 Subject: [PATCH 02/44] flowalerts.dns: use async timer instead of timerthread to avoid hanging threads in memory --- modules/flowalerts/conn.py | 9 +- modules/flowalerts/dns.py | 98 ++++++++----------- slips_files/core/database/database_manager.py | 2 +- 3 files changed, 46 insertions(+), 63 deletions(-) diff --git a/modules/flowalerts/conn.py b/modules/flowalerts/conn.py index 109e5a8cc..ca8a0c91c 100644 --- a/modules/flowalerts/conn.py +++ b/modules/flowalerts/conn.py @@ -418,7 +418,7 @@ def check_if_resolution_was_made_by_different_version( async def check_connection_without_dns_resolution( self, profileid, twid, flow - ): + ) -> bool: """ Checks if there's a flow to a dstip that has no cached DNS answer """ @@ -427,7 +427,7 @@ async def check_connection_without_dns_resolution( # 2- Ignore some IPs like private IPs, multicast, and broadcast if self.should_ignore_conn_without_dns(flow): - return + return False # Ignore some IP ## - All dhcp servers. Since is ok to connect to @@ -486,10 +486,7 @@ async def check_connection_without_dns_resolution( return False self.set_evidence.conn_without_dns(twid, flow) - # This UID will never appear again, so we can remove it and - # free some memory - with contextlib.suppress(ValueError): - self.connections_checked_in_conn_dns_timer_thread.remove(flow.uid) + return True def check_conn_to_port_0(self, profileid, twid, flow): """ diff --git a/modules/flowalerts/dns.py b/modules/flowalerts/dns.py index 0c46e5a5f..0027ddf31 100644 --- a/modules/flowalerts/dns.py +++ b/modules/flowalerts/dns.py @@ -1,11 +1,10 @@ +import asyncio import collections -import contextlib import json import math from typing import List import validators -from modules.flowalerts.timer_thread import TimerThread from slips_files.common.abstracts.flowalerts_analyzer import ( IFlowalertsAnalyzer, ) @@ -157,20 +156,8 @@ def is_connection_made_by_different_version(self, profileid, twid, daddr): # by this computer but using a different ip version return True - def check_dns_without_connection(self, profileid, twid, flow): - """ - Makes sure all cached DNS answers are used in contacted_ips - """ - if not self.should_detect_dns_without_conn(flow): - return False - - # One DNS query may not be answered exactly by UID, - # but the computer can re-ask the domain, - # and the next DNS resolution can be - # answered. So dont check the UID, check if the domain has an IP - - # self.print(f'The DNS query to {domain} had as answers {answers} ') - + def get_previous_domain_resolutions(self, query) -> List[str]: + prev_resolutions = [] # It can happen that this domain was already resolved # previously, but with other IPs # So we get from the DB all the IPs for this domain @@ -180,25 +167,27 @@ def check_dns_without_connection(self, profileid, twid, flow): # with AAAA, and the computer chooses the A address. # Therefore, the 2nd DNS resolution # would be treated as 'without connection', but this is false. - if prev_domain_resolutions := self.db.get_domain_data(flow.query): - prev_domain_resolutions = prev_domain_resolutions.get("IPs", []) - # if there's a domain in the cache - # (prev_domain_resolutions) that is not in the - # current answers given to this function, - # append it to the answers list - flow.answers.extend( - [ - ans - for ans in prev_domain_resolutions - if ans not in flow.answers - ] - ) + if prev_domain_resolutions := self.db.get_domain_data(query): + prev_resolutions = prev_domain_resolutions.get("IPs", []) + return prev_resolutions + + def is_any_flow_answer_contacted(self, profileid, twid, flow) -> bool: + """ + checks if any of the answers of the given dns flow were contacted + before + """ + # we're doing this to answer this question, was the query we asked + # the dns for, resolved before to an IP that is not in the + # current flow.answer AND that previous resolution IP was contancted? + # if so, we extend the flow.asnwers to include + # these IPs. the goal is to avoid FPs + flow.answers.extend(self.get_previous_domain_resolutions(flow.query)) if flow.answers == ["-"]: # If no IPs are in the answer, we can not expect # the computer to connect to anything # self.print(f'No ips in the answer, so ignoring') - return False + return True contacted_ips = self.db.get_all_contacted_ips_in_profileid_twid( profileid, twid @@ -220,38 +209,35 @@ def check_dns_without_connection(self, profileid, twid, flow): ) ): # this dns resolution has a connection. We can exit - return False + return True # Check if there was a connection to any of the CNAMEs if self.is_cname_contacted(flow.answers, contacted_ips): # this is not a DNS without resolution + return True + + async def check_dns_without_connection( + self, profileid, twid, flow + ) -> bool: + """ + Makes sure all cached DNS answers are there in contacted_ips + """ + if not self.should_detect_dns_without_conn(flow): return False - # self.print(f'It seems that none of the IPs were contacted') - # Found a DNS query which none of its IPs was contacted - # It can be that Slips is still reading it from the files. - # Lets check back in some time - # Create a timer thread that will wait some seconds for the - # connection to arrive and then check again - if flow.uid not in self.connections_checked_in_dns_conn_timer_thread: - # comes here if we haven't started the timer - # thread for this dns before mark this dns as checked - self.connections_checked_in_dns_conn_timer_thread.append(flow.uid) - params = [profileid, twid, flow] - # self.print(f'Starting the timer to check on {domain}, uid {uid}. - # time {datetime.datetime.now()}') - timer = TimerThread(40, self.check_dns_without_connection, params) - timer.start() - else: - # It means we already checked this dns with the Timer process - # but still no connection for it. - self.set_evidence.dns_without_conn(twid, flow) - # This UID will never appear again, so we can remove it and - # free some memory - with contextlib.suppress(ValueError): - self.connections_checked_in_dns_conn_timer_thread.remove( - flow.uid - ) + if self.is_any_flow_answer_contacted(profileid, twid, flow): + return False + + # Found a DNS query and none of its answers were contacted + await asyncio.sleep(40) + + if self.is_any_flow_answer_contacted(profileid, twid, flow): + return False + + # Reaching here means we already waited some time for the connection + # of this dns to arrive but none was found + self.set_evidence.dns_without_conn(twid, flow) + return True @staticmethod def estimate_shannon_entropy(string): diff --git a/slips_files/core/database/database_manager.py b/slips_files/core/database/database_manager.py index c5f34d7f7..43cd27ef6 100644 --- a/slips_files/core/database/database_manager.py +++ b/slips_files/core/database/database_manager.py @@ -750,7 +750,7 @@ def mark_profile_tw_as_modified(self, *args, **kwargs): def add_tuple(self, *args, **kwargs): return self.rdb.add_tuple(*args, **kwargs) - def search_tws_for_flow(self, profileid, twid, uid, go_back=False): + def search_tws_for_flow(self, twid, uid, go_back=False): """ Search for the given uid in the given twid, or the tws before :param go_back: how many hours back to search? From 1b37445b669df51d21a73572c9f234bad5ddc70e Mon Sep 17 00:00:00 2001 From: alya Date: Thu, 26 Sep 2024 22:48:52 +0300 Subject: [PATCH 03/44] Imodule: unify the return type of run() --- slips_files/common/abstracts/module.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/slips_files/common/abstracts/module.py b/slips_files/common/abstracts/module.py index a079c0748..ba132b2a6 100644 --- a/slips_files/common/abstracts/module.py +++ b/slips_files/common/abstracts/module.py @@ -156,7 +156,7 @@ def print_traceback(self): self.print(f"Problem in pre_main() line {exception_line}", 0, 1) self.print(traceback.format_exc(), 0, 1) - def run(self) -> bool: + def run(self): """ This is the loop function, it runs non-stop as long as the module is running @@ -165,37 +165,38 @@ def run(self) -> bool: error: bool = self.pre_main() if error or self.should_stop(): self.shutdown_gracefully() - return True + return except KeyboardInterrupt: self.shutdown_gracefully() - return True + return except Exception: self.print_traceback() - return True + return keyboard_int_ctr = 0 while True: try: if self.should_stop(): self.shutdown_gracefully() - return True + return # if a module's main() returns 1, it means there's an # error and it needs to stop immediately error: bool = self.main() if error: self.shutdown_gracefully() + return except KeyboardInterrupt: keyboard_int_ctr += 1 if keyboard_int_ctr >= 2: # on the second ctrl+c Slips immediately stop - return True + return - # on the first ctrl + C keep looping until the should_stop + # on the first ctrl + C keep looping until the should_stop() # returns true continue except Exception: self.print_traceback() - return False + return From 183815b0d32b32c08ffdf999fddf0ccb784d287b Mon Sep 17 00:00:00 2001 From: alya Date: Thu, 26 Sep 2024 22:49:34 +0300 Subject: [PATCH 04/44] flowalerts.ssh: use async timer instead of timerthread --- modules/flowalerts/ssh.py | 138 ++++++++++++++++---------------------- 1 file changed, 58 insertions(+), 80 deletions(-) diff --git a/modules/flowalerts/ssh.py b/modules/flowalerts/ssh.py index c7368433b..cadd7fdc9 100644 --- a/modules/flowalerts/ssh.py +++ b/modules/flowalerts/ssh.py @@ -1,7 +1,7 @@ -import contextlib +import asyncio import json +from typing import Optional -from modules.flowalerts.timer_thread import TimerThread from slips_files.common.abstracts.flowalerts_analyzer import ( IFlowalertsAnalyzer, ) @@ -30,97 +30,75 @@ def read_configuration(self): conf.ssh_succesful_detection_threshold() ) - def detect_successful_ssh_by_slips(self, profileid, twid, flow): + def detect_successful_ssh_by_slips( + self, twid, conn_log_flow: dict, ssh_flow + ): """ Try Slips method to detect if SSH was successful by comparing all bytes sent and received to our threshold """ - # this is the ssh flow read from conn.log not ssh.log + size = conn_log_flow["sbytes"] + conn_log_flow["dbytes"] + if size <= self.ssh_succesful_detection_threshold: + return + + daddr = conn_log_flow["daddr"] + saddr = conn_log_flow["saddr"] + # Set the evidence because there is no + # easier way to show how Slips detected + # the successful ssh and not Zeek + self.set_evidence.ssh_successful( + twid, + saddr, + daddr, + size, + ssh_flow.uid, + ssh_flow.starttime, + by="Slips", + ) + return True + + def get_original_ssh_flow(self, flow) -> Optional[dict]: + """original ssh flows are the ones from conn.log""" original_ssh_flow = self.db.get_flow(flow.uid) original_flow_uid = next(iter(original_ssh_flow)) if original_ssh_flow[original_flow_uid]: - ssh_flow_dict = json.loads(original_ssh_flow[original_flow_uid]) - size = ssh_flow_dict["sbytes"] + ssh_flow_dict["dbytes"] - if size > self.ssh_succesful_detection_threshold: - daddr = ssh_flow_dict["daddr"] - saddr = ssh_flow_dict["saddr"] - # Set the evidence because there is no - # easier way to show how Slips detected - # the successful ssh and not Zeek - self.set_evidence.ssh_successful( - twid, - saddr, - daddr, - size, - flow.uid, - flow.starttime, - by="Slips", - ) - with contextlib.suppress(ValueError): - self.connections_checked_in_ssh_timer_thread.remove( - flow.uid - ) - return True - - elif flow.uid not in self.connections_checked_in_ssh_timer_thread: - # It can happen that the original SSH flow is not in the DB yet - # comes here if we haven't started the timer - # thread for this connection before - # mark this connection as checked - # self.print(f'Starting the timer to check on {flow_dict}, uid {uid}. - # time {datetime.datetime.now()}') - self.connections_checked_in_ssh_timer_thread.append(flow.uid) - params = [profileid, twid, flow] - timer = TimerThread(15, self.check_successful_ssh, params) - timer.start() - - def detect_successful_ssh_by_zeek(self, profileid, twid, flow): - """ - Check for auth_success: true in the given zeek flow - """ - original_ssh_flow = self.db.search_tws_for_flow( - profileid, twid, flow.uid + return json.loads(original_ssh_flow[original_flow_uid]) + + def set_evidence_ssh_successful_by_zeek( + self, twid, conn_log_flow, ssh_flow + ): + daddr = conn_log_flow["daddr"] + saddr = conn_log_flow["saddr"] + size = conn_log_flow["sbytes"] + conn_log_flow["dbytes"] + self.set_evidence.ssh_successful( + twid, + saddr, + daddr, + size, + ssh_flow.uid, + ssh_flow.starttime, + by="Zeek", ) - original_flow_uid = next(iter(original_ssh_flow)) - if original_ssh_flow[original_flow_uid]: - ssh_flow_dict = json.loads(original_ssh_flow[original_flow_uid]) - daddr = ssh_flow_dict["daddr"] - saddr = ssh_flow_dict["saddr"] - size = ssh_flow_dict["sbytes"] + ssh_flow_dict["dbytes"] - self.set_evidence.ssh_successful( - twid, - saddr, - daddr, - size, - flow.uid, - flow.starttime, - by="Zeek", - ) - with contextlib.suppress(ValueError): - self.connections_checked_in_ssh_timer_thread.remove(flow.uid) - return True - - elif flow.uid not in self.connections_checked_in_ssh_timer_thread: - # It can happen that the original SSH flow is not in the DB yet - # comes here if we haven't started the timer thread - # for this connection before - # mark this connection as checked - # self.print(f'Starting the timer to check on {flow_dict}, - # uid {uid}. time {datetime.datetime.now()}') - self.connections_checked_in_ssh_timer_thread.append(flow.uid) - params = [flow.uid, flow.starttime, profileid, twid] - timer = TimerThread(15, self.detect_successful_ssh_by_zeek, params) - timer.start() - - def check_successful_ssh(self, profileid, twid, flow): + return True + + async def check_successful_ssh(self, twid, flow): """ Function to check if an SSH connection logged in successfully """ + # this is the ssh flow read from conn.log not ssh.log + conn_log_flow = self.get_original_ssh_flow(flow) + + if not conn_log_flow: + await asyncio.sleep(15) + conn_log_flow = self.get_original_ssh_flow(flow) + if not conn_log_flow: + return + # it's true in zeek json files, T in zeke tab files if flow.auth_success in ["true", "T"]: - self.detect_successful_ssh_by_zeek(profileid, twid, flow) + self.set_evidence_ssh_successful_by_zeek(twid, conn_log_flow, flow) else: - self.detect_successful_ssh_by_slips(profileid, twid, flow) + self.detect_successful_ssh_by_slips(twid, conn_log_flow, flow) def check_ssh_password_guessing(self, profileid, twid, flow): """ @@ -156,5 +134,5 @@ def analyze(self, msg): twid = msg["twid"] flow = self.classifier.convert_to_flow_obj(msg["flow"]) - self.check_successful_ssh(profileid, twid, flow) + self.check_successful_ssh(twid, flow) self.check_ssh_password_guessing(profileid, twid, flow) From 0c5c6b1b7a9d21317a683a9528f8051855da5f77 Mon Sep 17 00:00:00 2001 From: alya Date: Thu, 26 Sep 2024 22:50:06 +0300 Subject: [PATCH 05/44] update ssh unit tests --- modules/flowalerts/ssl.py | 3 ++- modules/flowalerts/timer_thread.py | 2 +- tests/test_ssh.py | 6 +++--- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/modules/flowalerts/ssl.py b/modules/flowalerts/ssl.py index bac0c2ff4..d4d6c60ba 100644 --- a/modules/flowalerts/ssl.py +++ b/modules/flowalerts/ssl.py @@ -42,7 +42,8 @@ def read_configuration(self): def wait_for_ssl_flows_to_appear_in_connlog(self): """ - thread that waits forever for ssl flows to appear in conn.log + thread that waits forever(as long as flowalerts is receiving new + msgs) for ssl flows to appear in conn.log whenever the conn.log flow of an ssl flow is found, thread calls check_pastebin_download ssl flows to wait for are stored in pending_ssl_flows diff --git a/modules/flowalerts/timer_thread.py b/modules/flowalerts/timer_thread.py index ed4a150e8..423ed71a3 100644 --- a/modules/flowalerts/timer_thread.py +++ b/modules/flowalerts/timer_thread.py @@ -17,7 +17,7 @@ def shutdown(self): def run(self): try: - if self._finished.isSet(): + if self._finished.is_set(): return True # sleep for interval or until shutdown diff --git a/tests/test_ssh.py b/tests/test_ssh.py index d3f238583..116734eae 100644 --- a/tests/test_ssh.py +++ b/tests/test_ssh.py @@ -198,7 +198,7 @@ def test_detect_successful_ssh_by_zeek(): ssh.db.search_tws_for_flow = MagicMock(return_value=mock_flow) ssh.set_evidence = MagicMock() ssh.connections_checked_in_ssh_timer_thread = [] - assert ssh.detect_successful_ssh_by_zeek(profileid, twid, flow) + assert ssh.set_evidence_ssh_successful_by_zeek(twid, flow) ssh.set_evidence.ssh_successful.assert_called_once_with( twid, flow_data["saddr"], @@ -248,7 +248,7 @@ def test_detect_successful_ssh_by_zeek_flow_exists_auth_success(): host_key_alg="", host_key="", ) - result = ssh.detect_successful_ssh_by_zeek("profileid", "twid", flow) + result = ssh.set_evidence_ssh_successful_by_zeek("twid", flow) expected_result = True assert result == expected_result @@ -298,7 +298,7 @@ def test_detect_successful_ssh_by_zeek_flow_exists_auth_fail(): host_key_alg="", host_key="", ) - result = ssh.detect_successful_ssh_by_zeek("profileid", "twid", flow) + result = ssh.set_evidence_ssh_successful_by_zeek("twid", flow) expected_result = True assert result == expected_result From e52f0eed16d25e4e61437da573431693c2ecff39 Mon Sep 17 00:00:00 2001 From: alya Date: Fri, 27 Sep 2024 14:48:09 +0300 Subject: [PATCH 06/44] module: add handlers for async main() and shutdown_gracefully() functions in flowalerts --- slips_files/common/abstracts/module.py | 69 ++++++++++++++++---------- 1 file changed, 42 insertions(+), 27 deletions(-) diff --git a/slips_files/common/abstracts/module.py b/slips_files/common/abstracts/module.py index ba132b2a6..3cc25e248 100644 --- a/slips_files/common/abstracts/module.py +++ b/slips_files/common/abstracts/module.py @@ -1,17 +1,21 @@ +import asyncio +import inspect import sys import traceback +import warnings from abc import ABC, abstractmethod from multiprocessing import Process, Event from typing import ( Dict, Optional, ) - from slips_files.common.printer import Printer from slips_files.core.output import Output from slips_files.common.slips_utils import utils from slips_files.core.database.database_manager import DBManager +warnings.filterwarnings("ignore", category=RuntimeWarning) + class IModule(ABC, Process): """ @@ -46,27 +50,13 @@ def __init__( # set its own channels # tracks whether or not in the last iteration there was a msg # received in that channel - self.channel_tracker: Dict[str, dict] = self.init_channel_tracker() + self.channel_tracker: Dict[str, Dict[str, bool]] + self.channel_tracker = self.init_channel_tracker() def print(self, *args, **kwargs): return self.printer.print(*args, **kwargs) - @property - @abstractmethod - def name(self): - pass - - @property - @abstractmethod - def description(self): - pass - - @property - @abstractmethod - def authors(self): - pass - - def init_channel_tracker(self) -> Dict[str, bool]: + def init_channel_tracker(self) -> Dict[str, Dict[str, bool]]: """ tracks if in the last loop, a msg was received in any of the subscribed channels or not @@ -136,7 +126,7 @@ def main(self): here will be executed in a loop """ - def pre_main(self): + def pre_main(self) -> bool: """ This function is for initializations that are executed once before the main loop @@ -156,6 +146,34 @@ def print_traceback(self): self.print(f"Problem in pre_main() line {exception_line}", 0, 1) self.print(traceback.format_exc(), 0, 1) + def run_shutdown_gracefully(self): + """ + some modules use async functions like flowalerts, + the goals of this function is to make sure that async and normal + shutdown_gracefully() functions run until completion + """ + if inspect.iscoroutinefunction(self.shutdown_gracefully): + loop = asyncio.get_event_loop() + # Ensure shutdown is completed + loop.run_until_complete(self.shutdown_gracefully()) + return + else: + self.shutdown_gracefully() + return + + def run_main(self): + """ + some modules use async functions like flowalerts, + the goals of this function is to make sure that async and normal + shutdown_gracefully() functions run until completion + """ + if inspect.iscoroutinefunction(self.shutdown_gracefully): + loop = asyncio.get_event_loop() + # Ensure shutdown is completed + return loop.run_until_complete(self.main()) + else: + return self.main() + def run(self): """ This is the loop function, it runs non-stop as long as @@ -164,10 +182,10 @@ def run(self): try: error: bool = self.pre_main() if error or self.should_stop(): - self.shutdown_gracefully() + self.run_shutdown_gracefully() return except KeyboardInterrupt: - self.shutdown_gracefully() + self.run_shutdown_gracefully() return except Exception: self.print_traceback() @@ -177,23 +195,20 @@ def run(self): while True: try: if self.should_stop(): - self.shutdown_gracefully() + self.run_shutdown_gracefully() return # if a module's main() returns 1, it means there's an # error and it needs to stop immediately - error: bool = self.main() + error: bool = self.run_main() if error: - self.shutdown_gracefully() - return + self.run_shutdown_gracefully() except KeyboardInterrupt: keyboard_int_ctr += 1 - if keyboard_int_ctr >= 2: # on the second ctrl+c Slips immediately stop return - # on the first ctrl + C keep looping until the should_stop() # returns true continue From b755632db36d4d3d4b2c082590751cd83c4a749e Mon Sep 17 00:00:00 2001 From: alya Date: Fri, 27 Sep 2024 15:22:26 +0300 Subject: [PATCH 07/44] zeek.py: fix problem parsing FTP flows --- slips_files/core/input_profilers/zeek.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/slips_files/core/input_profilers/zeek.py b/slips_files/core/input_profilers/zeek.py index 711079f41..44249cb12 100644 --- a/slips_files/core/input_profilers/zeek.py +++ b/slips_files/core/input_profilers/zeek.py @@ -162,7 +162,7 @@ def process_line(self, new_line: dict): elif "ftp" in file_type: self.flow: FTP = FTP( starttime, - line.get("uids", []), + line.get("uid", []), line.get("id.orig_h", ""), line.get("id.resp_h", ""), line.get("data_channel.resp_p", False), From aeea367d1652e7aa57de5eded037d748537b3a1d Mon Sep 17 00:00:00 2001 From: alya Date: Fri, 27 Sep 2024 16:50:55 +0300 Subject: [PATCH 08/44] http: use the original conn.log flow of teh weird.log in set_evidence_weird_http_method() --- modules/http_analyzer/http_analyzer.py | 36 +++++++++++++++++++------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/modules/http_analyzer/http_analyzer.py b/modules/http_analyzer/http_analyzer.py index 3fb995689..907f6785c 100644 --- a/modules/http_analyzer/http_analyzer.py +++ b/modules/http_analyzer/http_analyzer.py @@ -1,9 +1,14 @@ +import asyncio import json import urllib from uuid import uuid4 import requests -from typing import Union, Dict +from typing import ( + Union, + Dict, + Optional, +) from slips_files.common.flow_classifier import FlowClassifier from slips_files.common.parsers.config_parser import ConfigParser @@ -582,21 +587,26 @@ def check_pastebin_downloads(self, twid, flow): self.db.set_evidence(evidence) return True - def set_evidence_weird_http_method(self, twid: str, flow) -> None: + def set_evidence_weird_http_method( + self, twid: str, weird_method, flow: dict + ) -> None: confidence = 0.9 threat_level: ThreatLevel = ThreatLevel.MEDIUM - attacker: Attacker = Attacker( - direction=Direction.SRC, attacker_type=IoCType.IP, value=flow.saddr + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=flow["saddr"], ) victim: Victim = Victim( - direction=Direction.DST, victim_type=IoCType.IP, value=flow.daddr + direction=Direction.DST, + victim_type=IoCType.IP, + value=flow["daddr"], ) description: str = ( - f'Weird HTTP method "{flow.addl}" to IP: ' - f"{flow.daddr}. by Zeek." + f"Weird HTTP method {weird_method} to IP: " + f'{flow["daddr"]}. by Zeek.' ) twid_number: int = int(twid.replace("timewindow", "")) @@ -616,7 +626,7 @@ def set_evidence_weird_http_method(self, twid: str, flow) -> None: self.db.set_evidence(evidence) - def check_weird_http_method(self, msg: Dict[str, str]): + async def check_weird_http_method(self, msg: Dict[str, str]): """ detect weird http methods in zeek's weird.log """ @@ -626,7 +636,15 @@ def check_weird_http_method(self, msg: Dict[str, str]): if "unknown_HTTP_method" not in flow.name: return False - self.set_evidence_weird_http_method(twid, flow) + conn_log_flow: Optional[dict] + conn_log_flow = self.utils.get_original_conn_flow(flow, self.db) + if not conn_log_flow: + await asyncio.sleep(15) + conn_log_flow = self.utils.get_original_conn_flow(flow, self.db) + if not conn_log_flow: + return + + self.set_evidence_weird_http_method(twid, flow.addl, conn_log_flow) def pre_main(self): utils.drop_root_privs() From 4e7d46d9caeeaea8602a43090610cf9a5f846e86 Mon Sep 17 00:00:00 2001 From: alya Date: Fri, 27 Sep 2024 16:53:19 +0300 Subject: [PATCH 09/44] module: use the main event loop to run async functions --- slips_files/common/abstracts/module.py | 94 +++++++++++++++++--------- 1 file changed, 62 insertions(+), 32 deletions(-) diff --git a/slips_files/common/abstracts/module.py b/slips_files/common/abstracts/module.py index 3cc25e248..9e775efe0 100644 --- a/slips_files/common/abstracts/module.py +++ b/slips_files/common/abstracts/module.py @@ -1,5 +1,4 @@ import asyncio -import inspect import sys import traceback import warnings @@ -8,6 +7,7 @@ from typing import ( Dict, Optional, + Callable, ) from slips_files.common.printer import Printer from slips_files.core.output import Output @@ -71,9 +71,7 @@ def init_channel_tracker(self) -> Dict[str, Dict[str, bool]]: """ tracker = {} for channel_name in self.channels: - tracker[channel_name] = { - "msg_received": False, - } + tracker[channel_name] = {"msg_received": False} return tracker @abstractmethod @@ -146,46 +144,77 @@ def print_traceback(self): self.print(f"Problem in pre_main() line {exception_line}", 0, 1) self.print(traceback.format_exc(), 0, 1) - def run_shutdown_gracefully(self): + def run(self): """ some modules use async functions like flowalerts, the goals of this function is to make sure that async and normal shutdown_gracefully() functions run until completion """ - if inspect.iscoroutinefunction(self.shutdown_gracefully): - loop = asyncio.get_event_loop() - # Ensure shutdown is completed - loop.run_until_complete(self.shutdown_gracefully()) - return - else: + try: + error: bool = self.pre_main() + if error or self.should_stop(): + self.shutdown_gracefully() + return + except KeyboardInterrupt: self.shutdown_gracefully() return + except Exception: + self.print_traceback() + return - def run_main(self): - """ - some modules use async functions like flowalerts, - the goals of this function is to make sure that async and normal - shutdown_gracefully() functions run until completion - """ - if inspect.iscoroutinefunction(self.shutdown_gracefully): - loop = asyncio.get_event_loop() - # Ensure shutdown is completed - return loop.run_until_complete(self.main()) - else: - return self.main() + keyboard_int_ctr = 0 + while True: + try: + if self.should_stop(): + self.shutdown_gracefully() + return + + error: bool = self.main() + if error: + self.shutdown_gracefully() + except KeyboardInterrupt: + keyboard_int_ctr += 1 + if keyboard_int_ctr >= 2: + return + continue + except Exception: + self.print_traceback() + return + + +class AsyncModule(IModule, ABC, Process): + """ + An abstract class for asynchronous slips modules + """ + + name = "abstract class" + + def __init__(self, *args, **kwargs): + IModule.__init__(self, *args, **kwargs) + + def init(self, **kwargs): ... + + async def main(self): ... + + async def shutdown_gracefully(self): + """Implement the async shutdown logic here""" + pass + + async def run_main(self): + return await self.main() + + def run_async_function(self, func: Callable): + loop = asyncio.get_event_loop() + return loop.run_until_complete(func()) def run(self): - """ - This is the loop function, it runs non-stop as long as - the module is running - """ try: error: bool = self.pre_main() if error or self.should_stop(): - self.run_shutdown_gracefully() + self.run_async_function(self.shutdown_gracefully) return except KeyboardInterrupt: - self.run_shutdown_gracefully() + self.run_async_function(self.shutdown_gracefully) return except Exception: self.print_traceback() @@ -195,14 +224,15 @@ def run(self): while True: try: if self.should_stop(): - self.run_shutdown_gracefully() + self.run_async_function(self.shutdown_gracefully) return # if a module's main() returns 1, it means there's an # error and it needs to stop immediately - error: bool = self.run_main() + error: bool = self.run_async_function(self.run_main) if error: - self.run_shutdown_gracefully() + self.run_async_function(self.shutdown_gracefully) + return except KeyboardInterrupt: keyboard_int_ctr += 1 From 348a622a06077921d1d2f4ef8205395322ace166 Mon Sep 17 00:00:00 2001 From: alya Date: Fri, 27 Sep 2024 17:10:24 +0300 Subject: [PATCH 10/44] module: move asyncmodule class to a separate file --- slips_files/common/abstracts/async_module.py | 70 ++++++++++++++++++++ slips_files/common/abstracts/module.py | 67 ------------------- 2 files changed, 70 insertions(+), 67 deletions(-) create mode 100644 slips_files/common/abstracts/async_module.py diff --git a/slips_files/common/abstracts/async_module.py b/slips_files/common/abstracts/async_module.py new file mode 100644 index 000000000..92f5c7a43 --- /dev/null +++ b/slips_files/common/abstracts/async_module.py @@ -0,0 +1,70 @@ +import asyncio +from abc import ABC +from multiprocessing import Process +from typing import Callable +from slips_files.common.abstracts.module import IModule + + +class AsyncModule(IModule, ABC, Process): + """ + An abstract class for asynchronous slips modules + """ + + name = "Async Module Abstract Class" + + def __init__(self, *args, **kwargs): + IModule.__init__(self, *args, **kwargs) + + def init(self, **kwargs): ... + + async def main(self): ... + + async def shutdown_gracefully(self): + """Implement the async shutdown logic here""" + pass + + async def run_main(self): + return await self.main() + + def run_async_function(self, func: Callable): + loop = asyncio.get_event_loop() + return loop.run_until_complete(func()) + + def run(self): + try: + error: bool = self.pre_main() + if error or self.should_stop(): + self.run_async_function(self.shutdown_gracefully) + return + except KeyboardInterrupt: + self.run_async_function(self.shutdown_gracefully) + return + except Exception: + self.print_traceback() + return + + keyboard_int_ctr = 0 + while True: + try: + if self.should_stop(): + self.run_async_function(self.shutdown_gracefully) + return + + # if a module's main() returns 1, it means there's an + # error and it needs to stop immediately + error: bool = self.run_async_function(self.run_main) + if error: + self.run_async_function(self.shutdown_gracefully) + return + + except KeyboardInterrupt: + keyboard_int_ctr += 1 + if keyboard_int_ctr >= 2: + # on the second ctrl+c Slips immediately stop + return + # on the first ctrl + C keep looping until the should_stop() + # returns true + continue + except Exception: + self.print_traceback() + return diff --git a/slips_files/common/abstracts/module.py b/slips_files/common/abstracts/module.py index 9e775efe0..7331835f7 100644 --- a/slips_files/common/abstracts/module.py +++ b/slips_files/common/abstracts/module.py @@ -1,4 +1,3 @@ -import asyncio import sys import traceback import warnings @@ -7,7 +6,6 @@ from typing import ( Dict, Optional, - Callable, ) from slips_files.common.printer import Printer from slips_files.core.output import Output @@ -180,68 +178,3 @@ def run(self): except Exception: self.print_traceback() return - - -class AsyncModule(IModule, ABC, Process): - """ - An abstract class for asynchronous slips modules - """ - - name = "abstract class" - - def __init__(self, *args, **kwargs): - IModule.__init__(self, *args, **kwargs) - - def init(self, **kwargs): ... - - async def main(self): ... - - async def shutdown_gracefully(self): - """Implement the async shutdown logic here""" - pass - - async def run_main(self): - return await self.main() - - def run_async_function(self, func: Callable): - loop = asyncio.get_event_loop() - return loop.run_until_complete(func()) - - def run(self): - try: - error: bool = self.pre_main() - if error or self.should_stop(): - self.run_async_function(self.shutdown_gracefully) - return - except KeyboardInterrupt: - self.run_async_function(self.shutdown_gracefully) - return - except Exception: - self.print_traceback() - return - - keyboard_int_ctr = 0 - while True: - try: - if self.should_stop(): - self.run_async_function(self.shutdown_gracefully) - return - - # if a module's main() returns 1, it means there's an - # error and it needs to stop immediately - error: bool = self.run_async_function(self.run_main) - if error: - self.run_async_function(self.shutdown_gracefully) - return - - except KeyboardInterrupt: - keyboard_int_ctr += 1 - if keyboard_int_ctr >= 2: - # on the second ctrl+c Slips immediately stop - return - # on the first ctrl + C keep looping until the should_stop() - # returns true - continue - except Exception: - self.print_traceback() - return From 8fce1a1ce511dbb497233cc5b6d617754926052c Mon Sep 17 00:00:00 2001 From: alya Date: Fri, 27 Sep 2024 17:13:19 +0300 Subject: [PATCH 11/44] flowalerts: implement asyncmodule instead of IModule --- managers/process_manager.py | 4 +++- modules/flowalerts/flowalerts.py | 33 ++++++++++++++++++++++++++------ 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/managers/process_manager.py b/managers/process_manager.py index 5a4192245..d5d475da0 100644 --- a/managers/process_manager.py +++ b/managers/process_manager.py @@ -581,7 +581,9 @@ def should_run_non_stop(self) -> bool: return True return False - def shutdown_interactive(self, to_kill_first, to_kill_last): + def shutdown_interactive( + self, to_kill_first, to_kill_last + ) -> Tuple[List[Process], List[Process]]: """ Shuts down modules in interactive mode only. it won't work with the daemon's -S because the diff --git a/modules/flowalerts/flowalerts.py b/modules/flowalerts/flowalerts.py index 6f0c57d46..ba6aa74c1 100644 --- a/modules/flowalerts/flowalerts.py +++ b/modules/flowalerts/flowalerts.py @@ -1,5 +1,10 @@ +import asyncio +import inspect +from asyncio import Task +from typing import List + from slips_files.common.slips_utils import utils -from slips_files.common.abstracts.module import IModule +from slips_files.common.abstracts.async_module import AsyncModule from .conn import Conn from .dns import DNS from .downloaded_file import DownloadedFile @@ -8,12 +13,11 @@ from .software import Software from .ssh import SSH from .ssl import SSL -from slips_files.core.helpers.whitelist.whitelist import Whitelist - from .tunnel import Tunnel +from slips_files.core.helpers.whitelist.whitelist import Whitelist -class FlowAlerts(IModule): +class FlowAlerts(AsyncModule): name = "Flow Alerts" description = ( "Alerts about flows: long connection, successful ssh, " @@ -33,6 +37,8 @@ def init(self): self.downloaded_file = DownloadedFile(self.db, flowalerts=self) self.tunnel = Tunnel(self.db, flowalerts=self) self.conn = Conn(self.db, flowalerts=self) + # list of async functions to await before flowalerts shuts down + self.tasks: List[Task] = [] def subscribe_to_channels(self): channels = ( @@ -51,6 +57,9 @@ def subscribe_to_channels(self): channel_obj = self.db.subscribe(channel) self.channels.update({channel: channel_obj}) + async def shutdown_gracefully(self): + await asyncio.gather(*self.tasks) + def pre_main(self): utils.drop_root_privs() self.analyzers_map = { @@ -66,11 +75,23 @@ def pre_main(self): "new_ssl": [self.ssl.analyze], } - def main(self): + async def main(self): for channel, analyzers in self.analyzers_map.items(): msg: dict = self.get_msg(channel) if not msg: continue for analyzer in analyzers: - analyzer(msg) + # some analyzers are async functions + if inspect.iscoroutinefunction(analyzer): + # analyzer will run normally, until it finishes. + # tasks inside this analyzer will run asynchrously, + # and finish whenever they finish, we'll not wait for them + loop = asyncio.get_event_loop() + task = loop.create_task(analyzer(msg)) + # to wait for these functions before flowalerts shuts down + self.tasks.append(task) + # Allow the event loop to run the scheduled task + await asyncio.sleep(0) + else: + analyzer(msg) From c3f7b7d83127dac61fcb70614fdb9d5db6e3df7f Mon Sep 17 00:00:00 2001 From: alya Date: Fri, 27 Sep 2024 17:14:12 +0300 Subject: [PATCH 12/44] ssl: use async instead of threads to wait for ssl conn.log flows --- modules/flowalerts/ssl.py | 97 ++++++++------------------------------- 1 file changed, 18 insertions(+), 79 deletions(-) diff --git a/modules/flowalerts/ssl.py b/modules/flowalerts/ssl.py index d4d6c60ba..66b5e82df 100644 --- a/modules/flowalerts/ssl.py +++ b/modules/flowalerts/ssl.py @@ -1,11 +1,7 @@ +import asyncio import json -import multiprocessing -import threading -import time from typing import ( - Tuple, Union, - Dict, ) from slips_files.common.abstracts.flowalerts_analyzer import ( @@ -21,15 +17,6 @@ class SSL(IFlowalertsAnalyzer): def init(self): self.classifier = FlowClassifier() - # in pastebin download detection, we wait for each conn.log flow - # of the seen ssl flow to appear - # this is the dict of ssl flows we're waiting for - self.pending_ssl_flows = multiprocessing.Queue() - # thread that waits for ssl flows to appear in conn.log - self.ssl_thread_started = False - self.ssl_waiting_thread = threading.Thread( - target=self.wait_for_ssl_flows_to_appear_in_connlog, daemon=True - ) def name(self) -> str: return "ssl_analyzer" @@ -40,72 +27,26 @@ def read_configuration(self): conf.get_pastebin_download_threshold() ) - def wait_for_ssl_flows_to_appear_in_connlog(self): - """ - thread that waits forever(as long as flowalerts is receiving new - msgs) for ssl flows to appear in conn.log - whenever the conn.log flow of an ssl flow is found, thread calls - check_pastebin_download - ssl flows to wait for are stored in pending_ssl_flows - """ - # this is the time we give ssl flows to appear in conn.log, - # when this time is over, we check, then wait again, etc. - wait_time = 60 * 2 - - # this thread shouldn't run on interface only because in zeek dirs we - # we should wait for the conn.log to be read too - - while not self.flowalerts.should_stop(): - size = self.pending_ssl_flows.qsize() - if size == 0: - # nothing in queue - time.sleep(30) - continue - # try to get the conn of each pending flow only once - # this is to ensure that re-added flows to the queue aren't checked twice - for ssl_flow in range(size): - try: - ssl_flow: Tuple[Union[SSL, SuricataTLS], str, str] = ( - self.pending_ssl_flows.get(timeout=0.5) - ) - except Exception: - continue - - flow, profileid, twid = ssl_flow - # get the conn.log with the same uid, - # returns {uid: {actual flow..}} - # always returns a dict, never returns None - conn_log_flow: Dict[str, str] = self.db.get_flow(flow.uid) - if conn_log_flow := conn_log_flow.get(flow.uid): - conn_log_flow: dict = json.loads(conn_log_flow) - if "starttime" in conn_log_flow: - # this means the flow is found in conn.log - self.check_pastebin_download(flow, conn_log_flow, twid) - else: - # flow not found in conn.log yet, - # re-add it to the queue to check it later - self.pending_ssl_flows.put((flow, profileid, twid)) - - # give the ssl flows remaining in self.pending_ssl_flows - # 2 more mins to appear - time.sleep(wait_time) - - def check_pastebin_download( + async def check_pastebin_download( self, - ssl_flow: Union[SSL, SuricataTLS], - conn_log_flow: Dict[str, str], twid: str, + ssl_flow: Union[SSL, SuricataTLS], ): """ Alerts on downloads from pastebin.com with more than 12000 bytes This function waits for the ssl.log flow to appear - in conn.log before alerting - : param flow: this is the conn.log of the ssl flow - we're currently checking + in conn.log for 40s before alerting """ if "pastebin" not in ssl_flow.server_name: return False + conn_log_flow = self.utils.get_original_conn_flow(ssl_flow, self.db) + if not conn_log_flow: + await asyncio.sleep(40) + conn_log_flow = utils.get_original_conn_flow(ssl_flow, self.db) + if not conn_log_flow: + return False + # orig_bytes is number of payload bytes downloaded downloaded_bytes = conn_log_flow["resp_bytes"] if downloaded_bytes >= self.pastebin_downloads_threshold: @@ -203,20 +144,18 @@ def detect_doh(self, twid, flow): self.set_evidence.doh(twid, flow) self.db.set_ip_info(flow.daddr, {"is_doh_server": True}) - def analyze(self, msg: dict): - if not self.ssl_thread_started: - self.ssl_waiting_thread.start() - self.ssl_thread_started = True - + async def analyze(self, msg: dict): if utils.is_msg_intended_for(msg, "new_ssl"): msg = json.loads(msg["data"]) profileid = msg["profileid"] twid = msg["twid"] flow = self.classifier.convert_to_flow_obj(msg["flow"]) - # we'll be checking pastebin downloads of this ssl flow - # later - # todo: can i put ssl flow obj in the queue?? - self.pending_ssl_flows.put((flow, profileid, twid)) + + task = asyncio.create_task( + self.check_pastebin_download(twid, flow) + ) + # to wait for these functions before flowalerts shuts down + self.flowalerts.tasks.append(task) self.check_self_signed_certs(twid, flow) self.detect_malicious_ja3(twid, flow) From 5dc68978173fa2dd2d061442269d47bcbd5af195 Mon Sep 17 00:00:00 2001 From: alya Date: Fri, 27 Sep 2024 17:15:32 +0300 Subject: [PATCH 13/44] move get_original_conn_flow() to utils --- modules/flowalerts/ssh.py | 17 +++++------------ slips_files/common/slips_utils.py | 7 +++++++ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/modules/flowalerts/ssh.py b/modules/flowalerts/ssh.py index cadd7fdc9..fe56372b5 100644 --- a/modules/flowalerts/ssh.py +++ b/modules/flowalerts/ssh.py @@ -1,6 +1,5 @@ import asyncio import json -from typing import Optional from slips_files.common.abstracts.flowalerts_analyzer import ( IFlowalertsAnalyzer, @@ -57,13 +56,6 @@ def detect_successful_ssh_by_slips( ) return True - def get_original_ssh_flow(self, flow) -> Optional[dict]: - """original ssh flows are the ones from conn.log""" - original_ssh_flow = self.db.get_flow(flow.uid) - original_flow_uid = next(iter(original_ssh_flow)) - if original_ssh_flow[original_flow_uid]: - return json.loads(original_ssh_flow[original_flow_uid]) - def set_evidence_ssh_successful_by_zeek( self, twid, conn_log_flow, ssh_flow ): @@ -86,11 +78,11 @@ async def check_successful_ssh(self, twid, flow): Function to check if an SSH connection logged in successfully """ # this is the ssh flow read from conn.log not ssh.log - conn_log_flow = self.get_original_ssh_flow(flow) + conn_log_flow = self.utils.get_original_conn_flow(flow, self.db) if not conn_log_flow: await asyncio.sleep(15) - conn_log_flow = self.get_original_ssh_flow(flow) + conn_log_flow = self.utils.get_original_conn_flow(flow, self.db) if not conn_log_flow: return @@ -133,6 +125,7 @@ def analyze(self, msg): profileid = msg["profileid"] twid = msg["twid"] flow = self.classifier.convert_to_flow_obj(msg["flow"]) - - self.check_successful_ssh(twid, flow) + task = asyncio.create_task(self.check_successful_ssh(twid, flow)) + # to wait for these functions before flowalerts shuts down + self.flowalerts.tasks.append(task) self.check_ssh_password_guessing(profileid, twid, flow) diff --git a/slips_files/common/slips_utils.py b/slips_files/common/slips_utils.py index 4feec94fb..3e60bdaa5 100644 --- a/slips_files/common/slips_utils.py +++ b/slips_files/common/slips_utils.py @@ -114,6 +114,13 @@ def threat_level_to_string(self, threat_level: float) -> str: def is_valid_threat_level(self, threat_level): return threat_level in self.threat_levels + def get_original_conn_flow(self, altflow, db) -> Optional[dict]: + """Returns the original conn.log of the given altflow""" + original_conn_flow = db.get_flow(altflow.uid) + original_flow_uid = next(iter(original_conn_flow)) + if original_conn_flow[original_flow_uid]: + return json.loads(original_conn_flow[original_flow_uid]) + def sanitize(self, input_string): """ Sanitize strings taken from the user From 867fd528653a4f55e2c00f94e3f1e2f44745fdc2 Mon Sep 17 00:00:00 2001 From: alya Date: Fri, 27 Sep 2024 17:16:49 +0300 Subject: [PATCH 14/44] make analyze() asynchronous in conn.py and dns.py --- modules/flowalerts/conn.py | 11 ++++++++--- modules/flowalerts/dns.py | 10 ++++++++-- slips_files/core/database/sqlite_db/database.py | 3 ++- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/modules/flowalerts/conn.py b/modules/flowalerts/conn.py index ca8a0c91c..b908783e2 100644 --- a/modules/flowalerts/conn.py +++ b/modules/flowalerts/conn.py @@ -425,7 +425,6 @@ async def check_connection_without_dns_resolution( # The exceptions are: # 1- Do not check for DNS requests # 2- Ignore some IPs like private IPs, multicast, and broadcast - if self.should_ignore_conn_without_dns(flow): return False @@ -722,7 +721,7 @@ def is_dns_conn(flow): self.set_evidence.conn_to_private_ip(twid, flow) - def analyze(self, msg): + async def analyze(self, msg): if utils.is_msg_intended_for(msg, "new_flow"): msg = json.loads(msg["data"]) profileid = msg["profileid"] @@ -741,7 +740,13 @@ def analyze(self, msg): self.check_different_localnet_usage( twid, flow, what_to_check="srcip" ) - self.check_connection_without_dns_resolution(profileid, twid, flow) + task = asyncio.create_task( + self.check_connection_without_dns_resolution( + profileid, twid, flow + ) + ) + # to wait for these functions before flowalerts shuts down + self.flowalerts.tasks.append(task) self.detect_connection_to_multiple_ports(profileid, twid, flow) self.check_data_upload(profileid, twid, flow) self.check_non_http_port_80_conns(twid, flow) diff --git a/modules/flowalerts/dns.py b/modules/flowalerts/dns.py index 0027ddf31..30f889f83 100644 --- a/modules/flowalerts/dns.py +++ b/modules/flowalerts/dns.py @@ -394,14 +394,20 @@ def check_dns_arpa_scan(self, profileid, twid, flow): self.dns_arpa_queries.pop(profileid) return True - def analyze(self, msg): + async def analyze(self, msg): if not utils.is_msg_intended_for(msg, "new_dns"): return False msg = json.loads(msg["data"]) profileid = msg["profileid"] twid = msg["twid"] flow = self.classifier.convert_to_flow_obj(msg["flow"]) - self.check_dns_without_connection(profileid, twid, flow) + task = asyncio.create_task( + self.check_dns_without_connection(profileid, twid, flow) + ) + # Allow the event loop to run the scheduled task + await asyncio.sleep(0) + # to wait for these functions before flowalerts shuts down + self.flowalerts.tasks.append(task) self.check_high_entropy_dns_answers(twid, flow) self.check_invalid_dns_answers(twid, flow) self.detect_dga(profileid, twid, flow) diff --git a/slips_files/core/database/sqlite_db/database.py b/slips_files/core/database/sqlite_db/database.py index 67f40fe09..b153ea7c8 100644 --- a/slips_files/core/database/sqlite_db/database.py +++ b/slips_files/core/database/sqlite_db/database.py @@ -401,7 +401,8 @@ def execute(self, query, params=None): self.trial = 0 # discard query self.print( - f"Error executing query: {query} - {e}. Query discarded", + f"Error executing query: {query} {params}- {e}. " + f"Query discarded", 0, 1, ) From c82f19f4afa23bb31825a6f2943a5c452724e19c Mon Sep 17 00:00:00 2001 From: alya Date: Fri, 27 Sep 2024 17:30:10 +0300 Subject: [PATCH 15/44] update set_evidence_weird_http_method() unit test --- modules/http_analyzer/http_analyzer.py | 13 +++++++------ tests/test_http_analyzer.py | 26 ++++++++++++++++++++++++-- 2 files changed, 31 insertions(+), 8 deletions(-) diff --git a/modules/http_analyzer/http_analyzer.py b/modules/http_analyzer/http_analyzer.py index 907f6785c..8164c8903 100644 --- a/modules/http_analyzer/http_analyzer.py +++ b/modules/http_analyzer/http_analyzer.py @@ -14,6 +14,7 @@ from slips_files.common.parsers.config_parser import ConfigParser from slips_files.common.slips_utils import utils from slips_files.common.abstracts.module import IModule +from slips_files.core.flows.zeek import Weird from slips_files.core.structures.evidence import ( Evidence, ProfileID, @@ -588,7 +589,7 @@ def check_pastebin_downloads(self, twid, flow): return True def set_evidence_weird_http_method( - self, twid: str, weird_method, flow: dict + self, twid: str, weird_flow: Weird, flow: dict ) -> None: confidence = 0.9 threat_level: ThreatLevel = ThreatLevel.MEDIUM @@ -605,7 +606,7 @@ def set_evidence_weird_http_method( ) description: str = ( - f"Weird HTTP method {weird_method} to IP: " + f"Weird HTTP method {weird_flow.addl} to IP: " f'{flow["daddr"]}. by Zeek.' ) @@ -617,10 +618,10 @@ def set_evidence_weird_http_method( victim=victim, threat_level=threat_level, description=description, - profile=ProfileID(ip=flow.saddr), + profile=ProfileID(ip=flow["saddr"]), timewindow=TimeWindow(number=twid_number), - uid=[flow.uid], - timestamp=flow.starttime, + uid=[flow["uid"]], + timestamp=weird_flow.starttime, confidence=confidence, ) @@ -644,7 +645,7 @@ async def check_weird_http_method(self, msg: Dict[str, str]): if not conn_log_flow: return - self.set_evidence_weird_http_method(twid, flow.addl, conn_log_flow) + self.set_evidence_weird_http_method(twid, flow, conn_log_flow) def pre_main(self): utils.drop_root_privs() diff --git a/tests/test_http_analyzer.py b/tests/test_http_analyzer.py index 6ce303c80..893ec01e0 100644 --- a/tests/test_http_analyzer.py +++ b/tests/test_http_analyzer.py @@ -6,6 +6,7 @@ from slips_files.core.flows.zeek import ( HTTP, Weird, + Conn, ) from tests.module_factory import ModuleFactory from unittest.mock import patch, MagicMock @@ -325,7 +326,7 @@ def test_set_evidence_weird_http_method(mocker): "Some IP identification" ) mocker.spy(http_analyzer.db, "set_evidence") - flow = Weird( + weird_flow = Weird( starttime="1726593782.8840969", uid="123", saddr="192.168.1.5", @@ -333,7 +334,28 @@ def test_set_evidence_weird_http_method(mocker): name="", addl="weird_method_here", ) - http_analyzer.set_evidence_weird_http_method(twid, flow) + conn_flow = Conn( + starttime="1726249372.312124", + uid="123", + saddr="192.168.1.1", + daddr="1.1.1.1", + dur=1, + proto="tcp", + appproto="", + sport="0", + dport="12345", + spkts=0, + dpkts=0, + sbytes=0, + dbytes=0, + smac="", + dmac="", + state="Established", + history="", + ) + http_analyzer.set_evidence_weird_http_method( + twid, weird_flow, asdict(conn_flow) + ) http_analyzer.db.set_evidence.assert_called_once() From e7c63820ad22898f56ee483506669b8c59c64ac8 Mon Sep 17 00:00:00 2001 From: alya Date: Tue, 1 Oct 2024 14:35:18 +0300 Subject: [PATCH 16/44] update http unit tests --- modules/http_analyzer/http_analyzer.py | 14 +++++--------- pytest.ini | 12 ++++++++++++ tests/test_http_analyzer.py | 19 ++++++++++++++----- 3 files changed, 31 insertions(+), 14 deletions(-) create mode 100644 pytest.ini diff --git a/modules/http_analyzer/http_analyzer.py b/modules/http_analyzer/http_analyzer.py index 8164c8903..4c8218135 100644 --- a/modules/http_analyzer/http_analyzer.py +++ b/modules/http_analyzer/http_analyzer.py @@ -196,9 +196,7 @@ def check_multiple_empty_connections(self, twid: str, flow): self.connections_counter[host] = ([], 0) return True - def set_evidence_incompatible_user_agent( - self, profileid, twid, flow, user_agent - ): + def set_evidence_incompatible_user_agent(self, twid, flow, user_agent): os_type: str = user_agent.get("os_type", "").lower() os_name: str = user_agent.get("os_name", "").lower() @@ -300,9 +298,7 @@ def check_incompatible_user_agent(self, profileid, twid, flow): browser = user_agent.get("browser", "").lower() # user_agent = user_agent.get('user_agent', '') if "safari" in browser and "apple" not in vendor: - self.set_evidence_incompatible_user_agent( - profileid, twid, flow, user_agent - ) + self.set_evidence_incompatible_user_agent(twid, flow, user_agent) return True # make sure all of them are lowercase @@ -344,7 +340,7 @@ def check_incompatible_user_agent(self, profileid, twid, flow): # [('microsoft', 'windows', 'NT'), ('android'), ('linux')] # is found in the UA that belongs to an apple device self.set_evidence_incompatible_user_agent( - profileid, twid, flow, user_agent + twid, flow, user_agent ) return True @@ -638,10 +634,10 @@ async def check_weird_http_method(self, msg: Dict[str, str]): return False conn_log_flow: Optional[dict] - conn_log_flow = self.utils.get_original_conn_flow(flow, self.db) + conn_log_flow = utils.get_original_conn_flow(flow, self.db) if not conn_log_flow: await asyncio.sleep(15) - conn_log_flow = self.utils.get_original_conn_flow(flow, self.db) + conn_log_flow = utils.get_original_conn_flow(flow, self.db) if not conn_log_flow: return diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 000000000..64a304dae --- /dev/null +++ b/pytest.ini @@ -0,0 +1,12 @@ +[pytest] +log_level = INFO +log_cli_level = INFO +log_format = %(asctime)s %(levelname)s %(message)s +log_cli_format = %(asctime)s %(levelname)s %(message)s +log_date_format = %H:%M:%S +log_cli_date_format = %H:%M:%S +addopts = -s -vvv -p no:warnings +# ensures that the appropriate event loop scope is selected automatically +# based on the version of pytest-asyncio you're using. +asyncio_mode = auto +asyncio_default_fixture_loop_scope = function diff --git a/tests/test_http_analyzer.py b/tests/test_http_analyzer.py index 893ec01e0..3071dd111 100644 --- a/tests/test_http_analyzer.py +++ b/tests/test_http_analyzer.py @@ -2,6 +2,7 @@ import json from dataclasses import asdict +import pytest from slips_files.core.flows.zeek import ( HTTP, @@ -9,9 +10,12 @@ Conn, ) from tests.module_factory import ModuleFactory -from unittest.mock import patch, MagicMock +from unittest.mock import ( + patch, + MagicMock, + Mock, +) from modules.http_analyzer.http_analyzer import utils -import pytest import requests # dummy params used for testing @@ -412,8 +416,9 @@ def test_read_configuration_valid(mocker, config_value): ), ], ) -def test_check_weird_http_method(mocker, flow_name, evidence_expected): +async def test_check_weird_http_method(mocker, flow_name, evidence_expected): http_analyzer = ModuleFactory().create_http_analyzer_obj() + http_analyzer.set_evidence_weird_http_method = Mock() mocker.spy(http_analyzer, "set_evidence_weird_http_method") msg = { @@ -424,13 +429,17 @@ def test_check_weird_http_method(mocker, flow_name, evidence_expected): saddr="192.168.1.5", daddr="1.1.1.1", name=flow_name, - addl="weird_method_here", + addl=flow_name, ) ), "twid": twid, } - http_analyzer.check_weird_http_method(msg) + with patch( + "slips_files.common.slips_utils.utils.get_original_conn_flow" + ) as mock_get_original_conn_flow: + mock_get_original_conn_flow.side_effect = [None, {"flow": {}}] + await http_analyzer.check_weird_http_method(msg) if evidence_expected: http_analyzer.set_evidence_weird_http_method.assert_called_once() From 07dfcd3eeaaa6186b59e049cc7de32ebc068216c Mon Sep 17 00:00:00 2001 From: alya Date: Tue, 1 Oct 2024 20:08:22 +0300 Subject: [PATCH 17/44] update ssl unit tests --- modules/flowalerts/ssl.py | 2 +- tests/test_ssl.py | 45 +++++++++++++++------------------------ 2 files changed, 18 insertions(+), 29 deletions(-) diff --git a/modules/flowalerts/ssl.py b/modules/flowalerts/ssl.py index 66b5e82df..2e10937fe 100644 --- a/modules/flowalerts/ssl.py +++ b/modules/flowalerts/ssl.py @@ -40,7 +40,7 @@ async def check_pastebin_download( if "pastebin" not in ssl_flow.server_name: return False - conn_log_flow = self.utils.get_original_conn_flow(ssl_flow, self.db) + conn_log_flow = utils.get_original_conn_flow(ssl_flow, self.db) if not conn_log_flow: await asyncio.sleep(40) conn_log_flow = utils.get_original_conn_flow(ssl_flow, self.db) diff --git a/tests/test_ssl.py b/tests/test_ssl.py index 5365ef3bf..e9c1c2196 100644 --- a/tests/test_ssl.py +++ b/tests/test_ssl.py @@ -1,7 +1,10 @@ """Unit test for modules/flowalerts/ssl.py""" from dataclasses import asdict -from unittest.mock import Mock +from unittest.mock import ( + Mock, + patch, +) from slips_files.core.flows.zeek import ( SSL, @@ -189,7 +192,7 @@ def test_detect_doh(mocker, is_doh, expected_calls): ("www.example.com", 15000, False), ], ) -def test_check_pastebin_download( +async def test_check_pastebin_download( mocker, server_name, downloaded_bytes, @@ -201,7 +204,6 @@ def test_check_pastebin_download( "modules.flowalerts.set_evidence.SetEvidnceHelper.pastebin_download" ) - conn_log_flow = {"resp_bytes": downloaded_bytes} flow = SSL( starttime="1726593782.8840969", uid="123", @@ -224,7 +226,12 @@ def test_check_pastebin_download( ja3s="", is_DoH="", ) - ssl.check_pastebin_download(flow, conn_log_flow, twid) + conn_log_flow = {"resp_bytes": downloaded_bytes} + with patch( + "slips_files.common.slips_utils.utils.get_original_conn_flow" + ) as mock_get_original_conn_flow: + mock_get_original_conn_flow.side_effect = [None, conn_log_flow] + await ssl.check_pastebin_download(twid, flow) assert mock_set_evidence.call_count == expected_call_count @@ -353,11 +360,8 @@ def test_check_non_ssl_port_443_conns( assert mock_set_evidence.call_count == expected_call_count -def test_analyze_new_ssl_msg(mocker): +async def test_analyze_new_ssl_msg(mocker): ssl = ModuleFactory().create_ssl_analyzer_obj() - mock_pending_ssl_flows_put = mocker.patch.object( - ssl.pending_ssl_flows, "put" - ) mock_check_self_signed_certs = mocker.patch.object( ssl, "check_self_signed_certs" ) @@ -403,20 +407,9 @@ def test_analyze_new_ssl_msg(mocker): ), } - ssl.analyze(msg) - - mock_pending_ssl_flows_put.assert_called_once_with( - ( - flow, - "profile_192.168.1.1", - "timewindow1", - ) - ) - + await ssl.analyze(msg) mock_check_self_signed_certs.assert_called_once_with("timewindow1", flow) - mock_detect_malicious_ja3.assert_called_once_with("timewindow1", flow) - mock_detect_incompatible_cn.assert_called_once_with( "profile_192.168.1.1", "timewindow1", flow ) @@ -424,7 +417,7 @@ def test_analyze_new_ssl_msg(mocker): mock_detect_doh.assert_called_once_with("timewindow1", flow) -def test_analyze_new_flow_msg(mocker): +async def test_analyze_new_flow_msg(mocker): ssl = ModuleFactory().create_ssl_analyzer_obj() mock_check_non_ssl_port_443_conns = mocker.patch.object( ssl, "check_non_ssl_port_443_conns" @@ -460,21 +453,18 @@ def test_analyze_new_flow_msg(mocker): ), } - ssl.analyze(msg) + await ssl.analyze(msg) mock_check_non_ssl_port_443_conns.assert_called_once_with( "timewindow1", flow ) -def test_analyze_no_messages( +async def test_analyze_no_messages( mocker, ): ssl = ModuleFactory().create_ssl_analyzer_obj() - mock_pending_ssl_flows_put = mocker.patch.object( - ssl.pending_ssl_flows, "put" - ) mock_check_self_signed_certs = mocker.patch.object( ssl, "check_self_signed_certs" ) @@ -489,9 +479,8 @@ def test_analyze_no_messages( ssl, "check_non_ssl_port_443_conns" ) - ssl.analyze({}) + await ssl.analyze({}) - mock_pending_ssl_flows_put.assert_not_called() mock_check_self_signed_certs.assert_not_called() mock_detect_malicious_ja3.assert_not_called() mock_detect_incompatible_cn.assert_not_called() From d6a38aa501fb4973c4f1c9f4e1dcaed649d1091f Mon Sep 17 00:00:00 2001 From: alya Date: Tue, 1 Oct 2024 20:17:28 +0300 Subject: [PATCH 18/44] update ssh unit tests --- modules/flowalerts/ssh.py | 6 +++--- tests/module_factory.py | 9 +-------- tests/test_ssh.py | 40 ++++++++++++++++++++++++--------------- 3 files changed, 29 insertions(+), 26 deletions(-) diff --git a/modules/flowalerts/ssh.py b/modules/flowalerts/ssh.py index fe56372b5..7160e2b34 100644 --- a/modules/flowalerts/ssh.py +++ b/modules/flowalerts/ssh.py @@ -78,11 +78,11 @@ async def check_successful_ssh(self, twid, flow): Function to check if an SSH connection logged in successfully """ # this is the ssh flow read from conn.log not ssh.log - conn_log_flow = self.utils.get_original_conn_flow(flow, self.db) + conn_log_flow = utils.get_original_conn_flow(flow, self.db) if not conn_log_flow: await asyncio.sleep(15) - conn_log_flow = self.utils.get_original_conn_flow(flow, self.db) + conn_log_flow = utils.get_original_conn_flow(flow, self.db) if not conn_log_flow: return @@ -117,7 +117,7 @@ def check_ssh_password_guessing(self, profileid, twid, flow): # reset the counter del self.password_guessing_cache[cache_key] - def analyze(self, msg): + async def analyze(self, msg): if not utils.is_msg_intended_for(msg, "new_ssh"): return diff --git a/tests/module_factory.py b/tests/module_factory.py index 903403b71..704fe32a5 100644 --- a/tests/module_factory.py +++ b/tests/module_factory.py @@ -217,13 +217,7 @@ def create_smtp_analyzer_obj(self, mock_db): @patch(DB_MANAGER, name="mock_db") def create_ssl_analyzer_obj(self, mock_db): flowalerts = self.create_flowalerts_obj() - with patch( - "modules.flowalerts.ssl.SSL" - ".wait_for_ssl_flows_to_appear_in_connlog", - return_value=Mock(), - ): - ssl = SSL(flowalerts.db, flowalerts=flowalerts) - return ssl + return SSL(flowalerts.db, flowalerts=flowalerts) @patch(DB_MANAGER, name="mock_db") def create_ssh_analyzer_obj(self, mock_db): @@ -608,4 +602,3 @@ def create_riskiq_obj(self, mock_db): termination_event, ) return riskiq - diff --git a/tests/test_ssh.py b/tests/test_ssh.py index 116734eae..0197f3ca4 100644 --- a/tests/test_ssh.py +++ b/tests/test_ssh.py @@ -5,9 +5,12 @@ from slips_files.core.flows.zeek import SSH from tests.module_factory import ModuleFactory import json -from unittest.mock import patch +from unittest.mock import ( + patch, +) from unittest.mock import MagicMock import pytest +from tests.common_test_utils import get_mock_coro # dummy params used for testing profileid = "profile_192.168.1.1" @@ -18,7 +21,7 @@ @pytest.mark.parametrize( - "auth_success, expected_called_zeek, expected_called_slips", + "auth_success, expected_zeek_evidence, expected_called_slips", [ # Test case 1: auth_success is 'true' - # should call detect_successful_ssh_by_zeek @@ -37,12 +40,12 @@ ("some_other_value", False, True), ], ) -def test_check_successful_ssh( - mocker, auth_success, expected_called_zeek, expected_called_slips +async def test_check_successful_ssh( + mocker, auth_success, expected_zeek_evidence, expected_called_slips ): ssh = ModuleFactory().create_ssh_analyzer_obj() - mock_detect_zeek = mocker.patch( - "modules.flowalerts.ssh.SSH.detect_successful_ssh_by_zeek" + mock_set_evidence_ssh_successful_by_zeek = mocker.patch( + "modules.flowalerts.ssh.SSH.set_evidence_ssh_successful_by_zeek" ) mock_detect_slips = mocker.patch( "modules.flowalerts.ssh.SSH.detect_successful_ssh_by_slips" @@ -64,9 +67,16 @@ def test_check_successful_ssh( host_key_alg="", host_key="", ) - ssh.check_successful_ssh(profileid, twid, flow) - - assert mock_detect_zeek.called == expected_called_zeek + with patch( + "slips_files.common.slips_utils.utils.get_original_conn_flow" + ) as mock_get_original_conn_flow: + mock_get_original_conn_flow.side_effect = [None, {"flow": {}}] + await ssh.check_successful_ssh(twid, flow) + + assert ( + mock_set_evidence_ssh_successful_by_zeek.called + == expected_zeek_evidence + ) assert mock_detect_slips.called == expected_called_slips @@ -314,23 +324,23 @@ def test_detect_successful_ssh_by_zeek_flow_exists_auth_fail(): assert flow.uid not in ssh.connections_checked_in_ssh_timer_thread -def test_analyze_no_message(): +async def test_analyze_no_message(): ssh = ModuleFactory().create_ssh_analyzer_obj() ssh.flowalerts = MagicMock() ssh.flowalerts.get_msg.return_value = None ssh.check_successful_ssh = MagicMock() ssh.check_ssh_password_guessing = MagicMock() - ssh.analyze({}) + await ssh.analyze({}) ssh.check_successful_ssh.assert_not_called() ssh.check_ssh_password_guessing.assert_not_called() @pytest.mark.parametrize("auth_success", ["true", "false"]) -def test_analyze_with_message(auth_success): +async def test_analyze_with_message(auth_success): ssh = ModuleFactory().create_ssh_analyzer_obj() - ssh.check_successful_ssh = MagicMock() + ssh.check_successful_ssh = get_mock_coro(True) ssh.check_ssh_password_guessing = MagicMock() flow = SSH( starttime="1726655400.0", @@ -356,9 +366,9 @@ def test_analyze_with_message(auth_success): "flow": asdict(flow), } - ssh.analyze({"channel": "new_ssh", "data": json.dumps(msg_data)}) + await ssh.analyze({"channel": "new_ssh", "data": json.dumps(msg_data)}) - ssh.check_successful_ssh.assert_called_once_with(profileid, twid, flow) + ssh.check_successful_ssh.assert_called_once_with(twid, flow) ssh.check_ssh_password_guessing.assert_called_once_with( profileid, twid, flow ) From 9ffd7f74e667b915f8e4134d9c1a66e2b9e7623f Mon Sep 17 00:00:00 2001 From: alya Date: Tue, 1 Oct 2024 20:18:28 +0300 Subject: [PATCH 19/44] update input.py unit tests --- tests/test_inputProc.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tests/test_inputProc.py b/tests/test_inputProc.py index 91c33109e..15182ff12 100644 --- a/tests/test_inputProc.py +++ b/tests/test_inputProc.py @@ -403,13 +403,11 @@ def test_shutdown_gracefully_all_components_active(): input_process.zeek_thread = MagicMock() input_process.zeek_thread.start() input_process.open_file_handlers = {"test_file.log": MagicMock()} - input_process.zeek_pid = os.getpid() + input_process.zeek_pid = 123 with patch("os.kill") as mock_kill: - assert input_process.shutdown_gracefully() is True - mock_kill.assert_called_once_with( - input_process.zeek_pid, signal.SIGKILL - ) + assert input_process.shutdown_gracefully() + mock_kill.assert_called_with(input_process.zeek_pid, signal.SIGKILL) assert input_process.open_file_handlers["test_file.log"].close.called From 8761b44d7523f5adfdfd8538a5940f4eb487c543 Mon Sep 17 00:00:00 2001 From: alya Date: Tue, 1 Oct 2024 20:19:35 +0300 Subject: [PATCH 20/44] common_test_utils.py: add a mock to use for async functions --- tests/common_test_utils.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/common_test_utils.py b/tests/common_test_utils.py index 50b87e0d6..08ba1df9b 100644 --- a/tests/common_test_utils.py +++ b/tests/common_test_utils.py @@ -8,6 +8,7 @@ Dict, Optional, ) +from unittest.mock import Mock IS_IN_A_DOCKER_CONTAINER = os.environ.get("IS_IN_A_DOCKER_CONTAINER", False) @@ -20,6 +21,19 @@ path.mkdir(parents=True, exist_ok=True) +def get_mock_coro(return_value): + """ + instead of doing async_func = Mock() which doesn't work + you should use this function to mock it + so async_func = get_mock_coro(x) + """ + + async def mock_coro(*args, **kwargs): + return return_value + + return Mock(wraps=mock_coro) + + def do_nothing(*args): """Used to override the print function because using the self.print causes broken pipes""" pass From d3881b3ad1414088d31f25952fda0fde95108a8b Mon Sep 17 00:00:00 2001 From: alya Date: Tue, 1 Oct 2024 20:23:59 +0300 Subject: [PATCH 21/44] update dns.py unit tests --- tests/test_dns.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/test_dns.py b/tests/test_dns.py index 4e5464e74..386cb3bbb 100644 --- a/tests/test_dns.py +++ b/tests/test_dns.py @@ -3,6 +3,7 @@ from dataclasses import asdict from slips_files.core.flows.zeek import DNS +from tests.common_test_utils import get_mock_coro from tests.module_factory import ModuleFactory from numpy import arange from unittest.mock import patch, Mock @@ -439,17 +440,17 @@ def test_check_high_entropy_dns_answers_no_call( ), ], ) -def test_analyze_new_flow_msg(test_case, expected_calls): +async def test_analyze_new_flow_msg(test_case, expected_calls): dns = ModuleFactory().create_dns_analyzer_obj() dns.connections_checked_in_dns_conn_timer_thread = [] - dns.check_dns_without_connection = Mock() + dns.check_dns_without_connection = get_mock_coro(True) dns.check_high_entropy_dns_answers = Mock() dns.check_invalid_dns_answers = Mock() dns.detect_dga = Mock() dns.detect_young_domains = Mock() dns.check_dns_arpa_scan = Mock() - dns.analyze({"channel": "new_dns", "data": test_case["data"]}) + await dns.analyze({"channel": "new_dns", "data": test_case["data"]}) assert ( dns.check_dns_without_connection.call_count From 73cf821499d58d3941ebecb2584e925685cb0c45 Mon Sep 17 00:00:00 2001 From: alya Date: Tue, 1 Oct 2024 20:53:50 +0300 Subject: [PATCH 22/44] update ssh.py unit tests --- tests/test_ssh.py | 158 ++-------------------------------------------- 1 file changed, 7 insertions(+), 151 deletions(-) diff --git a/tests/test_ssh.py b/tests/test_ssh.py index 0197f3ca4..f80e91ea4 100644 --- a/tests/test_ssh.py +++ b/tests/test_ssh.py @@ -162,9 +162,13 @@ def test_detect_successful_ssh_by_slips(): host_key_alg="", host_key="", ) - result = ssh.detect_successful_ssh_by_slips("profileid", "twid", flow) - expected_result = True - assert result == expected_result + conn_log_flow = { + "saddr": flow.saddr, + "daddr": flow.daddr, + "sbytes": 2000, + "dbytes": 2000, + } + assert ssh.detect_successful_ssh_by_slips("twid", conn_log_flow, flow) ssh.set_evidence.ssh_successful.assert_called_once_with( "twid", "192.168.1.1", @@ -174,154 +178,6 @@ def test_detect_successful_ssh_by_slips(): flow.starttime, by="Slips", ) - assert "1234" not in ssh.connections_checked_in_ssh_timer_thread - - -def test_detect_successful_ssh_by_zeek(): - ssh = ModuleFactory().create_ssh_analyzer_obj() - profileid = "profile_192.168.1.1" - twid = "timewindow1" - flow = SSH( - starttime="1726655400.0", - uid="1234", - daddr="192.168.1.2", - saddr="192.168.1.1", - version="", - auth_success="true", - auth_attempts="", - client="", - server="", - cipher_alg="", - mac_alg="", - compression_alg="", - kex_alg="", - host_key_alg="", - host_key="", - ) - flow_data = { - "daddr": "192.168.1.2", - "saddr": "192.168.1.1", - "sbytes": 1000, - "dbytes": 1000, - } - mock_flow = {"1234": json.dumps(flow_data)} - ssh.db.search_tws_for_flow = MagicMock(return_value=mock_flow) - ssh.set_evidence = MagicMock() - ssh.connections_checked_in_ssh_timer_thread = [] - assert ssh.set_evidence_ssh_successful_by_zeek(twid, flow) - ssh.set_evidence.ssh_successful.assert_called_once_with( - twid, - flow_data["saddr"], - flow_data["daddr"], - flow_data["sbytes"] + flow_data["dbytes"], - flow.uid, - flow.starttime, - by="Zeek", - ) - assert flow.uid not in ssh.connections_checked_in_ssh_timer_thread - ssh.db.search_tws_for_flow.assert_called_once_with( - profileid, twid, flow.uid - ) - - -def test_detect_successful_ssh_by_zeek_flow_exists_auth_success(): - ssh = ModuleFactory().create_ssh_analyzer_obj() - - mock_flow = { - "test_uid": json.dumps( - { - "daddr": "192.168.1.2", - "saddr": "192.168.1.1", - "sbytes": 1000, - "dbytes": 1000, - "auth_success": True, - } - ) - } - - ssh.db.search_tws_for_flow = MagicMock(return_value=mock_flow) - ssh.set_evidence = MagicMock() - flow = SSH( - starttime="1726655400.0", - uid="1234", - saddr="192.168.1.1", - daddr="192.168.1.2", - version="", - auth_success="true", - auth_attempts="", - client="", - server="", - cipher_alg="", - mac_alg="", - compression_alg="", - kex_alg="", - host_key_alg="", - host_key="", - ) - result = ssh.set_evidence_ssh_successful_by_zeek("twid", flow) - - expected_result = True - assert result == expected_result - ssh.set_evidence.ssh_successful.assert_called_once_with( - "twid", - "192.168.1.1", - "192.168.1.2", - 2000, - flow.uid, - flow.starttime, - by="Zeek", - ) - assert flow.uid not in ssh.connections_checked_in_ssh_timer_thread - - -def test_detect_successful_ssh_by_zeek_flow_exists_auth_fail(): - ssh = ModuleFactory().create_ssh_analyzer_obj() - - mock_flow = { - "test_uid": json.dumps( - { - "daddr": "192.168.1.2", - "saddr": "192.168.1.1", - "sbytes": 1000, - "dbytes": 1000, - "auth_success": False, - } - ) - } - - ssh.db.search_tws_for_flow = MagicMock(return_value=mock_flow) - ssh.set_evidence = MagicMock() - flow = SSH( - starttime="1726655400.0", - uid="1234", - saddr="192.168.1.1", - daddr="192.168.1.2", - version="", - auth_success="true", - auth_attempts="", - client="", - server="", - cipher_alg="", - mac_alg="", - compression_alg="", - kex_alg="", - host_key_alg="", - host_key="", - ) - result = ssh.set_evidence_ssh_successful_by_zeek("twid", flow) - - expected_result = True - assert result == expected_result - ssh.set_evidence.ssh_successful.assert_called_once_with( - "twid", - "192.168.1.1", - "192.168.1.2", - 2000, - flow.uid, - flow.starttime, - by="Zeek", - ) - assert flow.uid not in ssh.connections_checked_in_ssh_timer_thread async def test_analyze_no_message(): From c4c62ecf9a42b987a714fed89a08070f7be4525d Mon Sep 17 00:00:00 2001 From: alya Date: Thu, 26 Sep 2024 22:01:16 +0300 Subject: [PATCH 23/44] flowalerts.conn: use async timer instead of timerthread to avoid hanging threads in memory --- modules/flowalerts/conn.py | 71 +++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 39 deletions(-) diff --git a/modules/flowalerts/conn.py b/modules/flowalerts/conn.py index e8185ef8d..109e5a8cc 100644 --- a/modules/flowalerts/conn.py +++ b/modules/flowalerts/conn.py @@ -1,3 +1,4 @@ +import asyncio import contextlib import ipaddress import json @@ -6,7 +7,6 @@ import validators from modules.flowalerts.dns import DNS -from modules.flowalerts.timer_thread import TimerThread from slips_files.common.abstracts.flowalerts_analyzer import ( IFlowalertsAnalyzer, ) @@ -416,7 +416,9 @@ def check_if_resolution_was_made_by_different_version( pass return False - def check_connection_without_dns_resolution(self, profileid, twid, flow): + async def check_connection_without_dns_resolution( + self, profileid, twid, flow + ): """ Checks if there's a flow to a dstip that has no cached DNS answer """ @@ -458,45 +460,36 @@ def check_connection_without_dns_resolution(self, profileid, twid, flow): if self.db.is_ip_resolved(flow.daddr, 24): return False - if flow.uid not in self.connections_checked_in_conn_dns_timer_thread: - # comes here if we haven't started the timer - # thread for this connection before - # mark this connection as checked - self.connections_checked_in_conn_dns_timer_thread.append(flow.uid) - params = [profileid, twid, flow] - # There is no DNS resolution, but it can be that Slips is - # still reading it from the files. - # To give time to Slips to read all the files and get all the flows - # don't alert a Connection Without DNS until 5 seconds has passed - # in real time from the time of this checking. - timer = TimerThread( - 15, self.check_connection_without_dns_resolution, params - ) - timer.start() - else: - # It means we already checked this conn with the Timer process - # (we waited 15 seconds for the dns to arrive after - # the connection was made) - # but still no dns resolution for it. - # Sometimes the same computer makes requests using - # its ipv4 and ipv6 address, check if this is the case - if self.check_if_resolution_was_made_by_different_version( - profileid, flow.daddr - ): - return False + # There is no DNS resolution, but it can be that Slips is + # still reading it from the files. + # To give time to Slips to read all the files and get all the flows + # don't alert a Connection Without DNS until 15 seconds has passed + # in real time from the time of this checking. + await asyncio.sleep(15) + if self.db.is_ip_resolved(flow.daddr, 24): + return False - if self.is_well_known_org(flow.daddr): - # if the SNI or rDNS of the IP matches a - # well-known org, then this is a FP - return False + # Reaching here means we already waited 15 seconds for the dns + # to arrive after the connection was made, but still no dns + # resolution for it. - self.set_evidence.conn_without_dns(twid, flow) - # This UID will never appear again, so we can remove it and - # free some memory - with contextlib.suppress(ValueError): - self.connections_checked_in_conn_dns_timer_thread.remove( - flow.uid - ) + # Sometimes the same computer makes requests using + # its ipv4 and ipv6 address, check if this is the case + if self.check_if_resolution_was_made_by_different_version( + profileid, flow.daddr + ): + return False + + if self.is_well_known_org(flow.daddr): + # if the SNI or rDNS of the IP matches a + # well-known org, then this is a FP + return False + + self.set_evidence.conn_without_dns(twid, flow) + # This UID will never appear again, so we can remove it and + # free some memory + with contextlib.suppress(ValueError): + self.connections_checked_in_conn_dns_timer_thread.remove(flow.uid) def check_conn_to_port_0(self, profileid, twid, flow): """ From cb6b122ced83f65098e18dc85d053be21d297cf4 Mon Sep 17 00:00:00 2001 From: alya Date: Thu, 26 Sep 2024 22:48:21 +0300 Subject: [PATCH 24/44] flowalerts.dns: use async timer instead of timerthread to avoid hanging threads in memory --- modules/flowalerts/conn.py | 9 +- modules/flowalerts/dns.py | 98 ++++++++----------- slips_files/core/database/database_manager.py | 2 +- 3 files changed, 46 insertions(+), 63 deletions(-) diff --git a/modules/flowalerts/conn.py b/modules/flowalerts/conn.py index 109e5a8cc..ca8a0c91c 100644 --- a/modules/flowalerts/conn.py +++ b/modules/flowalerts/conn.py @@ -418,7 +418,7 @@ def check_if_resolution_was_made_by_different_version( async def check_connection_without_dns_resolution( self, profileid, twid, flow - ): + ) -> bool: """ Checks if there's a flow to a dstip that has no cached DNS answer """ @@ -427,7 +427,7 @@ async def check_connection_without_dns_resolution( # 2- Ignore some IPs like private IPs, multicast, and broadcast if self.should_ignore_conn_without_dns(flow): - return + return False # Ignore some IP ## - All dhcp servers. Since is ok to connect to @@ -486,10 +486,7 @@ async def check_connection_without_dns_resolution( return False self.set_evidence.conn_without_dns(twid, flow) - # This UID will never appear again, so we can remove it and - # free some memory - with contextlib.suppress(ValueError): - self.connections_checked_in_conn_dns_timer_thread.remove(flow.uid) + return True def check_conn_to_port_0(self, profileid, twid, flow): """ diff --git a/modules/flowalerts/dns.py b/modules/flowalerts/dns.py index 0c46e5a5f..0027ddf31 100644 --- a/modules/flowalerts/dns.py +++ b/modules/flowalerts/dns.py @@ -1,11 +1,10 @@ +import asyncio import collections -import contextlib import json import math from typing import List import validators -from modules.flowalerts.timer_thread import TimerThread from slips_files.common.abstracts.flowalerts_analyzer import ( IFlowalertsAnalyzer, ) @@ -157,20 +156,8 @@ def is_connection_made_by_different_version(self, profileid, twid, daddr): # by this computer but using a different ip version return True - def check_dns_without_connection(self, profileid, twid, flow): - """ - Makes sure all cached DNS answers are used in contacted_ips - """ - if not self.should_detect_dns_without_conn(flow): - return False - - # One DNS query may not be answered exactly by UID, - # but the computer can re-ask the domain, - # and the next DNS resolution can be - # answered. So dont check the UID, check if the domain has an IP - - # self.print(f'The DNS query to {domain} had as answers {answers} ') - + def get_previous_domain_resolutions(self, query) -> List[str]: + prev_resolutions = [] # It can happen that this domain was already resolved # previously, but with other IPs # So we get from the DB all the IPs for this domain @@ -180,25 +167,27 @@ def check_dns_without_connection(self, profileid, twid, flow): # with AAAA, and the computer chooses the A address. # Therefore, the 2nd DNS resolution # would be treated as 'without connection', but this is false. - if prev_domain_resolutions := self.db.get_domain_data(flow.query): - prev_domain_resolutions = prev_domain_resolutions.get("IPs", []) - # if there's a domain in the cache - # (prev_domain_resolutions) that is not in the - # current answers given to this function, - # append it to the answers list - flow.answers.extend( - [ - ans - for ans in prev_domain_resolutions - if ans not in flow.answers - ] - ) + if prev_domain_resolutions := self.db.get_domain_data(query): + prev_resolutions = prev_domain_resolutions.get("IPs", []) + return prev_resolutions + + def is_any_flow_answer_contacted(self, profileid, twid, flow) -> bool: + """ + checks if any of the answers of the given dns flow were contacted + before + """ + # we're doing this to answer this question, was the query we asked + # the dns for, resolved before to an IP that is not in the + # current flow.answer AND that previous resolution IP was contancted? + # if so, we extend the flow.asnwers to include + # these IPs. the goal is to avoid FPs + flow.answers.extend(self.get_previous_domain_resolutions(flow.query)) if flow.answers == ["-"]: # If no IPs are in the answer, we can not expect # the computer to connect to anything # self.print(f'No ips in the answer, so ignoring') - return False + return True contacted_ips = self.db.get_all_contacted_ips_in_profileid_twid( profileid, twid @@ -220,38 +209,35 @@ def check_dns_without_connection(self, profileid, twid, flow): ) ): # this dns resolution has a connection. We can exit - return False + return True # Check if there was a connection to any of the CNAMEs if self.is_cname_contacted(flow.answers, contacted_ips): # this is not a DNS without resolution + return True + + async def check_dns_without_connection( + self, profileid, twid, flow + ) -> bool: + """ + Makes sure all cached DNS answers are there in contacted_ips + """ + if not self.should_detect_dns_without_conn(flow): return False - # self.print(f'It seems that none of the IPs were contacted') - # Found a DNS query which none of its IPs was contacted - # It can be that Slips is still reading it from the files. - # Lets check back in some time - # Create a timer thread that will wait some seconds for the - # connection to arrive and then check again - if flow.uid not in self.connections_checked_in_dns_conn_timer_thread: - # comes here if we haven't started the timer - # thread for this dns before mark this dns as checked - self.connections_checked_in_dns_conn_timer_thread.append(flow.uid) - params = [profileid, twid, flow] - # self.print(f'Starting the timer to check on {domain}, uid {uid}. - # time {datetime.datetime.now()}') - timer = TimerThread(40, self.check_dns_without_connection, params) - timer.start() - else: - # It means we already checked this dns with the Timer process - # but still no connection for it. - self.set_evidence.dns_without_conn(twid, flow) - # This UID will never appear again, so we can remove it and - # free some memory - with contextlib.suppress(ValueError): - self.connections_checked_in_dns_conn_timer_thread.remove( - flow.uid - ) + if self.is_any_flow_answer_contacted(profileid, twid, flow): + return False + + # Found a DNS query and none of its answers were contacted + await asyncio.sleep(40) + + if self.is_any_flow_answer_contacted(profileid, twid, flow): + return False + + # Reaching here means we already waited some time for the connection + # of this dns to arrive but none was found + self.set_evidence.dns_without_conn(twid, flow) + return True @staticmethod def estimate_shannon_entropy(string): diff --git a/slips_files/core/database/database_manager.py b/slips_files/core/database/database_manager.py index c5f34d7f7..43cd27ef6 100644 --- a/slips_files/core/database/database_manager.py +++ b/slips_files/core/database/database_manager.py @@ -750,7 +750,7 @@ def mark_profile_tw_as_modified(self, *args, **kwargs): def add_tuple(self, *args, **kwargs): return self.rdb.add_tuple(*args, **kwargs) - def search_tws_for_flow(self, profileid, twid, uid, go_back=False): + def search_tws_for_flow(self, twid, uid, go_back=False): """ Search for the given uid in the given twid, or the tws before :param go_back: how many hours back to search? From cf46d40c74bc641202fd2e75a066bf1d039bf59a Mon Sep 17 00:00:00 2001 From: alya Date: Thu, 26 Sep 2024 22:48:52 +0300 Subject: [PATCH 25/44] Imodule: unify the return type of run() --- slips_files/common/abstracts/module.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/slips_files/common/abstracts/module.py b/slips_files/common/abstracts/module.py index 6dfad5500..fcdec3dbe 100644 --- a/slips_files/common/abstracts/module.py +++ b/slips_files/common/abstracts/module.py @@ -157,7 +157,7 @@ def print_traceback(self): self.print(f"Problem in pre_main() line {exception_line}", 0, 1) self.print(traceback.format_exc(), 0, 1) - def run(self) -> bool: + def run(self): """ This is the loop function, it runs non-stop as long as the module is running @@ -166,25 +166,26 @@ def run(self) -> bool: error: bool = self.pre_main() if error or self.should_stop(): self.shutdown_gracefully() - return True + return except KeyboardInterrupt: self.shutdown_gracefully() - return True + return except Exception: self.print_traceback() - return True + return while True: try: if self.should_stop(): self.shutdown_gracefully() - return True + return # if a module's main() returns 1, it means there's an # error and it needs to stop immediately error: bool = self.main() if error: self.shutdown_gracefully() + return except KeyboardInterrupt: self.keyboard_int_ctr += 1 @@ -193,9 +194,9 @@ def run(self) -> bool: # on the second ctrl+c the module immediately stops return True - # on the first ctrl + C keep looping until the should_stop + # on the first ctrl + C keep looping until the should_stop() # returns true continue except Exception: self.print_traceback() - return False + return From 4f8a1bfff84538da796a387ce469f95f863faed6 Mon Sep 17 00:00:00 2001 From: alya Date: Thu, 26 Sep 2024 22:49:34 +0300 Subject: [PATCH 26/44] flowalerts.ssh: use async timer instead of timerthread --- modules/flowalerts/ssh.py | 138 ++++++++++++++++---------------------- 1 file changed, 58 insertions(+), 80 deletions(-) diff --git a/modules/flowalerts/ssh.py b/modules/flowalerts/ssh.py index c7368433b..cadd7fdc9 100644 --- a/modules/flowalerts/ssh.py +++ b/modules/flowalerts/ssh.py @@ -1,7 +1,7 @@ -import contextlib +import asyncio import json +from typing import Optional -from modules.flowalerts.timer_thread import TimerThread from slips_files.common.abstracts.flowalerts_analyzer import ( IFlowalertsAnalyzer, ) @@ -30,97 +30,75 @@ def read_configuration(self): conf.ssh_succesful_detection_threshold() ) - def detect_successful_ssh_by_slips(self, profileid, twid, flow): + def detect_successful_ssh_by_slips( + self, twid, conn_log_flow: dict, ssh_flow + ): """ Try Slips method to detect if SSH was successful by comparing all bytes sent and received to our threshold """ - # this is the ssh flow read from conn.log not ssh.log + size = conn_log_flow["sbytes"] + conn_log_flow["dbytes"] + if size <= self.ssh_succesful_detection_threshold: + return + + daddr = conn_log_flow["daddr"] + saddr = conn_log_flow["saddr"] + # Set the evidence because there is no + # easier way to show how Slips detected + # the successful ssh and not Zeek + self.set_evidence.ssh_successful( + twid, + saddr, + daddr, + size, + ssh_flow.uid, + ssh_flow.starttime, + by="Slips", + ) + return True + + def get_original_ssh_flow(self, flow) -> Optional[dict]: + """original ssh flows are the ones from conn.log""" original_ssh_flow = self.db.get_flow(flow.uid) original_flow_uid = next(iter(original_ssh_flow)) if original_ssh_flow[original_flow_uid]: - ssh_flow_dict = json.loads(original_ssh_flow[original_flow_uid]) - size = ssh_flow_dict["sbytes"] + ssh_flow_dict["dbytes"] - if size > self.ssh_succesful_detection_threshold: - daddr = ssh_flow_dict["daddr"] - saddr = ssh_flow_dict["saddr"] - # Set the evidence because there is no - # easier way to show how Slips detected - # the successful ssh and not Zeek - self.set_evidence.ssh_successful( - twid, - saddr, - daddr, - size, - flow.uid, - flow.starttime, - by="Slips", - ) - with contextlib.suppress(ValueError): - self.connections_checked_in_ssh_timer_thread.remove( - flow.uid - ) - return True - - elif flow.uid not in self.connections_checked_in_ssh_timer_thread: - # It can happen that the original SSH flow is not in the DB yet - # comes here if we haven't started the timer - # thread for this connection before - # mark this connection as checked - # self.print(f'Starting the timer to check on {flow_dict}, uid {uid}. - # time {datetime.datetime.now()}') - self.connections_checked_in_ssh_timer_thread.append(flow.uid) - params = [profileid, twid, flow] - timer = TimerThread(15, self.check_successful_ssh, params) - timer.start() - - def detect_successful_ssh_by_zeek(self, profileid, twid, flow): - """ - Check for auth_success: true in the given zeek flow - """ - original_ssh_flow = self.db.search_tws_for_flow( - profileid, twid, flow.uid + return json.loads(original_ssh_flow[original_flow_uid]) + + def set_evidence_ssh_successful_by_zeek( + self, twid, conn_log_flow, ssh_flow + ): + daddr = conn_log_flow["daddr"] + saddr = conn_log_flow["saddr"] + size = conn_log_flow["sbytes"] + conn_log_flow["dbytes"] + self.set_evidence.ssh_successful( + twid, + saddr, + daddr, + size, + ssh_flow.uid, + ssh_flow.starttime, + by="Zeek", ) - original_flow_uid = next(iter(original_ssh_flow)) - if original_ssh_flow[original_flow_uid]: - ssh_flow_dict = json.loads(original_ssh_flow[original_flow_uid]) - daddr = ssh_flow_dict["daddr"] - saddr = ssh_flow_dict["saddr"] - size = ssh_flow_dict["sbytes"] + ssh_flow_dict["dbytes"] - self.set_evidence.ssh_successful( - twid, - saddr, - daddr, - size, - flow.uid, - flow.starttime, - by="Zeek", - ) - with contextlib.suppress(ValueError): - self.connections_checked_in_ssh_timer_thread.remove(flow.uid) - return True - - elif flow.uid not in self.connections_checked_in_ssh_timer_thread: - # It can happen that the original SSH flow is not in the DB yet - # comes here if we haven't started the timer thread - # for this connection before - # mark this connection as checked - # self.print(f'Starting the timer to check on {flow_dict}, - # uid {uid}. time {datetime.datetime.now()}') - self.connections_checked_in_ssh_timer_thread.append(flow.uid) - params = [flow.uid, flow.starttime, profileid, twid] - timer = TimerThread(15, self.detect_successful_ssh_by_zeek, params) - timer.start() - - def check_successful_ssh(self, profileid, twid, flow): + return True + + async def check_successful_ssh(self, twid, flow): """ Function to check if an SSH connection logged in successfully """ + # this is the ssh flow read from conn.log not ssh.log + conn_log_flow = self.get_original_ssh_flow(flow) + + if not conn_log_flow: + await asyncio.sleep(15) + conn_log_flow = self.get_original_ssh_flow(flow) + if not conn_log_flow: + return + # it's true in zeek json files, T in zeke tab files if flow.auth_success in ["true", "T"]: - self.detect_successful_ssh_by_zeek(profileid, twid, flow) + self.set_evidence_ssh_successful_by_zeek(twid, conn_log_flow, flow) else: - self.detect_successful_ssh_by_slips(profileid, twid, flow) + self.detect_successful_ssh_by_slips(twid, conn_log_flow, flow) def check_ssh_password_guessing(self, profileid, twid, flow): """ @@ -156,5 +134,5 @@ def analyze(self, msg): twid = msg["twid"] flow = self.classifier.convert_to_flow_obj(msg["flow"]) - self.check_successful_ssh(profileid, twid, flow) + self.check_successful_ssh(twid, flow) self.check_ssh_password_guessing(profileid, twid, flow) From e2dc41584cac4f8993e2bb84dba70f26ee3f4eed Mon Sep 17 00:00:00 2001 From: alya Date: Thu, 26 Sep 2024 22:50:06 +0300 Subject: [PATCH 27/44] update ssh unit tests --- modules/flowalerts/ssl.py | 3 ++- modules/flowalerts/timer_thread.py | 2 +- tests/test_ssh.py | 6 +++--- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/modules/flowalerts/ssl.py b/modules/flowalerts/ssl.py index bac0c2ff4..d4d6c60ba 100644 --- a/modules/flowalerts/ssl.py +++ b/modules/flowalerts/ssl.py @@ -42,7 +42,8 @@ def read_configuration(self): def wait_for_ssl_flows_to_appear_in_connlog(self): """ - thread that waits forever for ssl flows to appear in conn.log + thread that waits forever(as long as flowalerts is receiving new + msgs) for ssl flows to appear in conn.log whenever the conn.log flow of an ssl flow is found, thread calls check_pastebin_download ssl flows to wait for are stored in pending_ssl_flows diff --git a/modules/flowalerts/timer_thread.py b/modules/flowalerts/timer_thread.py index ed4a150e8..423ed71a3 100644 --- a/modules/flowalerts/timer_thread.py +++ b/modules/flowalerts/timer_thread.py @@ -17,7 +17,7 @@ def shutdown(self): def run(self): try: - if self._finished.isSet(): + if self._finished.is_set(): return True # sleep for interval or until shutdown diff --git a/tests/test_ssh.py b/tests/test_ssh.py index d3f238583..116734eae 100644 --- a/tests/test_ssh.py +++ b/tests/test_ssh.py @@ -198,7 +198,7 @@ def test_detect_successful_ssh_by_zeek(): ssh.db.search_tws_for_flow = MagicMock(return_value=mock_flow) ssh.set_evidence = MagicMock() ssh.connections_checked_in_ssh_timer_thread = [] - assert ssh.detect_successful_ssh_by_zeek(profileid, twid, flow) + assert ssh.set_evidence_ssh_successful_by_zeek(twid, flow) ssh.set_evidence.ssh_successful.assert_called_once_with( twid, flow_data["saddr"], @@ -248,7 +248,7 @@ def test_detect_successful_ssh_by_zeek_flow_exists_auth_success(): host_key_alg="", host_key="", ) - result = ssh.detect_successful_ssh_by_zeek("profileid", "twid", flow) + result = ssh.set_evidence_ssh_successful_by_zeek("twid", flow) expected_result = True assert result == expected_result @@ -298,7 +298,7 @@ def test_detect_successful_ssh_by_zeek_flow_exists_auth_fail(): host_key_alg="", host_key="", ) - result = ssh.detect_successful_ssh_by_zeek("profileid", "twid", flow) + result = ssh.set_evidence_ssh_successful_by_zeek("twid", flow) expected_result = True assert result == expected_result From 94de7f58d00195483aaf379344499bdeabb416dc Mon Sep 17 00:00:00 2001 From: alya Date: Fri, 27 Sep 2024 14:48:09 +0300 Subject: [PATCH 28/44] module: add handlers for async main() and shutdown_gracefully() functions in flowalerts --- slips_files/common/abstracts/module.py | 67 ++++++++++++++++---------- 1 file changed, 42 insertions(+), 25 deletions(-) diff --git a/slips_files/common/abstracts/module.py b/slips_files/common/abstracts/module.py index fcdec3dbe..c051880db 100644 --- a/slips_files/common/abstracts/module.py +++ b/slips_files/common/abstracts/module.py @@ -1,17 +1,21 @@ +import asyncio +import inspect import sys import traceback +import warnings from abc import ABC, abstractmethod from multiprocessing import Process, Event from typing import ( Dict, Optional, ) - from slips_files.common.printer import Printer from slips_files.core.output import Output from slips_files.common.slips_utils import utils from slips_files.core.database.database_manager import DBManager +warnings.filterwarnings("ignore", category=RuntimeWarning) + class IModule(ABC, Process): """ @@ -47,27 +51,13 @@ def __init__( # set its own channels # tracks whether or not in the last iteration there was a msg # received in that channel - self.channel_tracker: Dict[str, dict] = self.init_channel_tracker() + self.channel_tracker: Dict[str, Dict[str, bool]] + self.channel_tracker = self.init_channel_tracker() def print(self, *args, **kwargs): return self.printer.print(*args, **kwargs) - @property - @abstractmethod - def name(self): - pass - - @property - @abstractmethod - def description(self): - pass - - @property - @abstractmethod - def authors(self): - pass - - def init_channel_tracker(self) -> Dict[str, bool]: + def init_channel_tracker(self) -> Dict[str, Dict[str, bool]]: """ tracks if in the last loop, a msg was received in any of the subscribed channels or not @@ -137,7 +127,7 @@ def main(self): here will be executed in a loop """ - def pre_main(self): + def pre_main(self) -> bool: """ This function is for initializations that are executed once before the main loop @@ -157,6 +147,34 @@ def print_traceback(self): self.print(f"Problem in pre_main() line {exception_line}", 0, 1) self.print(traceback.format_exc(), 0, 1) + def run_shutdown_gracefully(self): + """ + some modules use async functions like flowalerts, + the goals of this function is to make sure that async and normal + shutdown_gracefully() functions run until completion + """ + if inspect.iscoroutinefunction(self.shutdown_gracefully): + loop = asyncio.get_event_loop() + # Ensure shutdown is completed + loop.run_until_complete(self.shutdown_gracefully()) + return + else: + self.shutdown_gracefully() + return + + def run_main(self): + """ + some modules use async functions like flowalerts, + the goals of this function is to make sure that async and normal + shutdown_gracefully() functions run until completion + """ + if inspect.iscoroutinefunction(self.shutdown_gracefully): + loop = asyncio.get_event_loop() + # Ensure shutdown is completed + return loop.run_until_complete(self.main()) + else: + return self.main() + def run(self): """ This is the loop function, it runs non-stop as long as @@ -165,10 +183,10 @@ def run(self): try: error: bool = self.pre_main() if error or self.should_stop(): - self.shutdown_gracefully() + self.run_shutdown_gracefully() return except KeyboardInterrupt: - self.shutdown_gracefully() + self.run_shutdown_gracefully() return except Exception: self.print_traceback() @@ -177,15 +195,14 @@ def run(self): while True: try: if self.should_stop(): - self.shutdown_gracefully() + self.run_shutdown_gracefully() return # if a module's main() returns 1, it means there's an # error and it needs to stop immediately - error: bool = self.main() + error: bool = self.run_main() if error: - self.shutdown_gracefully() - return + self.run_shutdown_gracefully() except KeyboardInterrupt: self.keyboard_int_ctr += 1 From 641231c42710f6d4e6e923724f51dfb5bf32c62e Mon Sep 17 00:00:00 2001 From: alya Date: Fri, 27 Sep 2024 15:22:26 +0300 Subject: [PATCH 29/44] zeek.py: fix problem parsing FTP flows --- slips_files/core/input_profilers/zeek.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/slips_files/core/input_profilers/zeek.py b/slips_files/core/input_profilers/zeek.py index 711079f41..44249cb12 100644 --- a/slips_files/core/input_profilers/zeek.py +++ b/slips_files/core/input_profilers/zeek.py @@ -162,7 +162,7 @@ def process_line(self, new_line: dict): elif "ftp" in file_type: self.flow: FTP = FTP( starttime, - line.get("uids", []), + line.get("uid", []), line.get("id.orig_h", ""), line.get("id.resp_h", ""), line.get("data_channel.resp_p", False), From 235f58ccd7c75a586085427043414591b6db6ae6 Mon Sep 17 00:00:00 2001 From: alya Date: Fri, 27 Sep 2024 16:50:55 +0300 Subject: [PATCH 30/44] http: use the original conn.log flow of teh weird.log in set_evidence_weird_http_method() --- modules/http_analyzer/http_analyzer.py | 36 +++++++++++++++++++------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/modules/http_analyzer/http_analyzer.py b/modules/http_analyzer/http_analyzer.py index 3fb995689..907f6785c 100644 --- a/modules/http_analyzer/http_analyzer.py +++ b/modules/http_analyzer/http_analyzer.py @@ -1,9 +1,14 @@ +import asyncio import json import urllib from uuid import uuid4 import requests -from typing import Union, Dict +from typing import ( + Union, + Dict, + Optional, +) from slips_files.common.flow_classifier import FlowClassifier from slips_files.common.parsers.config_parser import ConfigParser @@ -582,21 +587,26 @@ def check_pastebin_downloads(self, twid, flow): self.db.set_evidence(evidence) return True - def set_evidence_weird_http_method(self, twid: str, flow) -> None: + def set_evidence_weird_http_method( + self, twid: str, weird_method, flow: dict + ) -> None: confidence = 0.9 threat_level: ThreatLevel = ThreatLevel.MEDIUM - attacker: Attacker = Attacker( - direction=Direction.SRC, attacker_type=IoCType.IP, value=flow.saddr + direction=Direction.SRC, + attacker_type=IoCType.IP, + value=flow["saddr"], ) victim: Victim = Victim( - direction=Direction.DST, victim_type=IoCType.IP, value=flow.daddr + direction=Direction.DST, + victim_type=IoCType.IP, + value=flow["daddr"], ) description: str = ( - f'Weird HTTP method "{flow.addl}" to IP: ' - f"{flow.daddr}. by Zeek." + f"Weird HTTP method {weird_method} to IP: " + f'{flow["daddr"]}. by Zeek.' ) twid_number: int = int(twid.replace("timewindow", "")) @@ -616,7 +626,7 @@ def set_evidence_weird_http_method(self, twid: str, flow) -> None: self.db.set_evidence(evidence) - def check_weird_http_method(self, msg: Dict[str, str]): + async def check_weird_http_method(self, msg: Dict[str, str]): """ detect weird http methods in zeek's weird.log """ @@ -626,7 +636,15 @@ def check_weird_http_method(self, msg: Dict[str, str]): if "unknown_HTTP_method" not in flow.name: return False - self.set_evidence_weird_http_method(twid, flow) + conn_log_flow: Optional[dict] + conn_log_flow = self.utils.get_original_conn_flow(flow, self.db) + if not conn_log_flow: + await asyncio.sleep(15) + conn_log_flow = self.utils.get_original_conn_flow(flow, self.db) + if not conn_log_flow: + return + + self.set_evidence_weird_http_method(twid, flow.addl, conn_log_flow) def pre_main(self): utils.drop_root_privs() From e621eb92972b225750ccbdb1fb6fc80873f7b1dc Mon Sep 17 00:00:00 2001 From: alya Date: Fri, 27 Sep 2024 16:53:19 +0300 Subject: [PATCH 31/44] module: use the main event loop to run async functions --- slips_files/common/abstracts/module.py | 94 +++++++++++++++++--------- 1 file changed, 62 insertions(+), 32 deletions(-) diff --git a/slips_files/common/abstracts/module.py b/slips_files/common/abstracts/module.py index c051880db..a531d0fca 100644 --- a/slips_files/common/abstracts/module.py +++ b/slips_files/common/abstracts/module.py @@ -1,5 +1,4 @@ import asyncio -import inspect import sys import traceback import warnings @@ -8,6 +7,7 @@ from typing import ( Dict, Optional, + Callable, ) from slips_files.common.printer import Printer from slips_files.core.output import Output @@ -72,9 +72,7 @@ def init_channel_tracker(self) -> Dict[str, Dict[str, bool]]: """ tracker = {} for channel_name in self.channels: - tracker[channel_name] = { - "msg_received": False, - } + tracker[channel_name] = {"msg_received": False} return tracker @abstractmethod @@ -147,62 +145,94 @@ def print_traceback(self): self.print(f"Problem in pre_main() line {exception_line}", 0, 1) self.print(traceback.format_exc(), 0, 1) - def run_shutdown_gracefully(self): + def run(self): """ some modules use async functions like flowalerts, the goals of this function is to make sure that async and normal shutdown_gracefully() functions run until completion """ - if inspect.iscoroutinefunction(self.shutdown_gracefully): - loop = asyncio.get_event_loop() - # Ensure shutdown is completed - loop.run_until_complete(self.shutdown_gracefully()) - return - else: + try: + error: bool = self.pre_main() + if error or self.should_stop(): + self.shutdown_gracefully() + return + except KeyboardInterrupt: self.shutdown_gracefully() return + except Exception: + self.print_traceback() + return - def run_main(self): - """ - some modules use async functions like flowalerts, - the goals of this function is to make sure that async and normal - shutdown_gracefully() functions run until completion - """ - if inspect.iscoroutinefunction(self.shutdown_gracefully): - loop = asyncio.get_event_loop() - # Ensure shutdown is completed - return loop.run_until_complete(self.main()) - else: - return self.main() + while True: + try: + if self.should_stop(): + self.shutdown_gracefully() + return + + error: bool = self.main() + if error: + self.shutdown_gracefully() + except KeyboardInterrupt: + keyboard_int_ctr += 1 + if keyboard_int_ctr >= 2: + return + continue + except Exception: + self.print_traceback() + return + + +class AsyncModule(IModule, ABC, Process): + """ + An abstract class for asynchronous slips modules + """ + + name = "abstract class" + + def __init__(self, *args, **kwargs): + IModule.__init__(self, *args, **kwargs) + + def init(self, **kwargs): ... + + async def main(self): ... + + async def shutdown_gracefully(self): + """Implement the async shutdown logic here""" + pass + + async def run_main(self): + return await self.main() + + def run_async_function(self, func: Callable): + loop = asyncio.get_event_loop() + return loop.run_until_complete(func()) def run(self): - """ - This is the loop function, it runs non-stop as long as - the module is running - """ try: error: bool = self.pre_main() if error or self.should_stop(): - self.run_shutdown_gracefully() + self.run_async_function(self.shutdown_gracefully) return except KeyboardInterrupt: - self.run_shutdown_gracefully() + self.run_async_function(self.shutdown_gracefully) return except Exception: self.print_traceback() return + keyboard_int_ctr = 0 while True: try: if self.should_stop(): - self.run_shutdown_gracefully() + self.run_async_function(self.shutdown_gracefully) return # if a module's main() returns 1, it means there's an # error and it needs to stop immediately - error: bool = self.run_main() + error: bool = self.run_async_function(self.run_main) if error: - self.run_shutdown_gracefully() + self.run_async_function(self.shutdown_gracefully) + return except KeyboardInterrupt: self.keyboard_int_ctr += 1 From f54a0e44d150e9c08c992d56b85e7bd246e3b538 Mon Sep 17 00:00:00 2001 From: alya Date: Fri, 27 Sep 2024 17:10:24 +0300 Subject: [PATCH 32/44] module: move asyncmodule class to a separate file --- slips_files/common/abstracts/async_module.py | 70 ++++++++++++++++++++ slips_files/common/abstracts/module.py | 11 +-- 2 files changed, 73 insertions(+), 8 deletions(-) create mode 100644 slips_files/common/abstracts/async_module.py diff --git a/slips_files/common/abstracts/async_module.py b/slips_files/common/abstracts/async_module.py new file mode 100644 index 000000000..92f5c7a43 --- /dev/null +++ b/slips_files/common/abstracts/async_module.py @@ -0,0 +1,70 @@ +import asyncio +from abc import ABC +from multiprocessing import Process +from typing import Callable +from slips_files.common.abstracts.module import IModule + + +class AsyncModule(IModule, ABC, Process): + """ + An abstract class for asynchronous slips modules + """ + + name = "Async Module Abstract Class" + + def __init__(self, *args, **kwargs): + IModule.__init__(self, *args, **kwargs) + + def init(self, **kwargs): ... + + async def main(self): ... + + async def shutdown_gracefully(self): + """Implement the async shutdown logic here""" + pass + + async def run_main(self): + return await self.main() + + def run_async_function(self, func: Callable): + loop = asyncio.get_event_loop() + return loop.run_until_complete(func()) + + def run(self): + try: + error: bool = self.pre_main() + if error or self.should_stop(): + self.run_async_function(self.shutdown_gracefully) + return + except KeyboardInterrupt: + self.run_async_function(self.shutdown_gracefully) + return + except Exception: + self.print_traceback() + return + + keyboard_int_ctr = 0 + while True: + try: + if self.should_stop(): + self.run_async_function(self.shutdown_gracefully) + return + + # if a module's main() returns 1, it means there's an + # error and it needs to stop immediately + error: bool = self.run_async_function(self.run_main) + if error: + self.run_async_function(self.shutdown_gracefully) + return + + except KeyboardInterrupt: + keyboard_int_ctr += 1 + if keyboard_int_ctr >= 2: + # on the second ctrl+c Slips immediately stop + return + # on the first ctrl + C keep looping until the should_stop() + # returns true + continue + except Exception: + self.print_traceback() + return diff --git a/slips_files/common/abstracts/module.py b/slips_files/common/abstracts/module.py index a531d0fca..e9ef7b43e 100644 --- a/slips_files/common/abstracts/module.py +++ b/slips_files/common/abstracts/module.py @@ -1,4 +1,3 @@ -import asyncio import sys import traceback import warnings @@ -7,7 +6,6 @@ from typing import ( Dict, Optional, - Callable, ) from slips_files.common.printer import Printer from slips_files.core.output import Output @@ -173,8 +171,8 @@ def run(self): if error: self.shutdown_gracefully() except KeyboardInterrupt: - keyboard_int_ctr += 1 - if keyboard_int_ctr >= 2: + self.keyboard_int_ctr += 1 + if self.keyboard_int_ctr >= 2: return continue except Exception: @@ -220,7 +218,6 @@ def run(self): self.print_traceback() return - keyboard_int_ctr = 0 while True: try: if self.should_stop(): @@ -236,11 +233,9 @@ def run(self): except KeyboardInterrupt: self.keyboard_int_ctr += 1 - if self.keyboard_int_ctr >= 2: - # on the second ctrl+c the module immediately stops + # on the second ctrl+c Slips immediately stop return True - # on the first ctrl + C keep looping until the should_stop() # returns true continue From 75c70afe22a1293a1f961a0011f6af00c118c8d3 Mon Sep 17 00:00:00 2001 From: alya Date: Fri, 27 Sep 2024 17:13:19 +0300 Subject: [PATCH 33/44] flowalerts: implement asyncmodule instead of IModule --- managers/process_manager.py | 4 +++- modules/flowalerts/flowalerts.py | 33 ++++++++++++++++++++++++++------ 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/managers/process_manager.py b/managers/process_manager.py index 5a4192245..d5d475da0 100644 --- a/managers/process_manager.py +++ b/managers/process_manager.py @@ -581,7 +581,9 @@ def should_run_non_stop(self) -> bool: return True return False - def shutdown_interactive(self, to_kill_first, to_kill_last): + def shutdown_interactive( + self, to_kill_first, to_kill_last + ) -> Tuple[List[Process], List[Process]]: """ Shuts down modules in interactive mode only. it won't work with the daemon's -S because the diff --git a/modules/flowalerts/flowalerts.py b/modules/flowalerts/flowalerts.py index 6f0c57d46..ba6aa74c1 100644 --- a/modules/flowalerts/flowalerts.py +++ b/modules/flowalerts/flowalerts.py @@ -1,5 +1,10 @@ +import asyncio +import inspect +from asyncio import Task +from typing import List + from slips_files.common.slips_utils import utils -from slips_files.common.abstracts.module import IModule +from slips_files.common.abstracts.async_module import AsyncModule from .conn import Conn from .dns import DNS from .downloaded_file import DownloadedFile @@ -8,12 +13,11 @@ from .software import Software from .ssh import SSH from .ssl import SSL -from slips_files.core.helpers.whitelist.whitelist import Whitelist - from .tunnel import Tunnel +from slips_files.core.helpers.whitelist.whitelist import Whitelist -class FlowAlerts(IModule): +class FlowAlerts(AsyncModule): name = "Flow Alerts" description = ( "Alerts about flows: long connection, successful ssh, " @@ -33,6 +37,8 @@ def init(self): self.downloaded_file = DownloadedFile(self.db, flowalerts=self) self.tunnel = Tunnel(self.db, flowalerts=self) self.conn = Conn(self.db, flowalerts=self) + # list of async functions to await before flowalerts shuts down + self.tasks: List[Task] = [] def subscribe_to_channels(self): channels = ( @@ -51,6 +57,9 @@ def subscribe_to_channels(self): channel_obj = self.db.subscribe(channel) self.channels.update({channel: channel_obj}) + async def shutdown_gracefully(self): + await asyncio.gather(*self.tasks) + def pre_main(self): utils.drop_root_privs() self.analyzers_map = { @@ -66,11 +75,23 @@ def pre_main(self): "new_ssl": [self.ssl.analyze], } - def main(self): + async def main(self): for channel, analyzers in self.analyzers_map.items(): msg: dict = self.get_msg(channel) if not msg: continue for analyzer in analyzers: - analyzer(msg) + # some analyzers are async functions + if inspect.iscoroutinefunction(analyzer): + # analyzer will run normally, until it finishes. + # tasks inside this analyzer will run asynchrously, + # and finish whenever they finish, we'll not wait for them + loop = asyncio.get_event_loop() + task = loop.create_task(analyzer(msg)) + # to wait for these functions before flowalerts shuts down + self.tasks.append(task) + # Allow the event loop to run the scheduled task + await asyncio.sleep(0) + else: + analyzer(msg) From c4dbd113d8062b0be6c9cb7d55f4accf18eee43a Mon Sep 17 00:00:00 2001 From: alya Date: Fri, 27 Sep 2024 17:14:12 +0300 Subject: [PATCH 34/44] ssl: use async instead of threads to wait for ssl conn.log flows --- modules/flowalerts/ssl.py | 97 ++++++++------------------------------- 1 file changed, 18 insertions(+), 79 deletions(-) diff --git a/modules/flowalerts/ssl.py b/modules/flowalerts/ssl.py index d4d6c60ba..66b5e82df 100644 --- a/modules/flowalerts/ssl.py +++ b/modules/flowalerts/ssl.py @@ -1,11 +1,7 @@ +import asyncio import json -import multiprocessing -import threading -import time from typing import ( - Tuple, Union, - Dict, ) from slips_files.common.abstracts.flowalerts_analyzer import ( @@ -21,15 +17,6 @@ class SSL(IFlowalertsAnalyzer): def init(self): self.classifier = FlowClassifier() - # in pastebin download detection, we wait for each conn.log flow - # of the seen ssl flow to appear - # this is the dict of ssl flows we're waiting for - self.pending_ssl_flows = multiprocessing.Queue() - # thread that waits for ssl flows to appear in conn.log - self.ssl_thread_started = False - self.ssl_waiting_thread = threading.Thread( - target=self.wait_for_ssl_flows_to_appear_in_connlog, daemon=True - ) def name(self) -> str: return "ssl_analyzer" @@ -40,72 +27,26 @@ def read_configuration(self): conf.get_pastebin_download_threshold() ) - def wait_for_ssl_flows_to_appear_in_connlog(self): - """ - thread that waits forever(as long as flowalerts is receiving new - msgs) for ssl flows to appear in conn.log - whenever the conn.log flow of an ssl flow is found, thread calls - check_pastebin_download - ssl flows to wait for are stored in pending_ssl_flows - """ - # this is the time we give ssl flows to appear in conn.log, - # when this time is over, we check, then wait again, etc. - wait_time = 60 * 2 - - # this thread shouldn't run on interface only because in zeek dirs we - # we should wait for the conn.log to be read too - - while not self.flowalerts.should_stop(): - size = self.pending_ssl_flows.qsize() - if size == 0: - # nothing in queue - time.sleep(30) - continue - # try to get the conn of each pending flow only once - # this is to ensure that re-added flows to the queue aren't checked twice - for ssl_flow in range(size): - try: - ssl_flow: Tuple[Union[SSL, SuricataTLS], str, str] = ( - self.pending_ssl_flows.get(timeout=0.5) - ) - except Exception: - continue - - flow, profileid, twid = ssl_flow - # get the conn.log with the same uid, - # returns {uid: {actual flow..}} - # always returns a dict, never returns None - conn_log_flow: Dict[str, str] = self.db.get_flow(flow.uid) - if conn_log_flow := conn_log_flow.get(flow.uid): - conn_log_flow: dict = json.loads(conn_log_flow) - if "starttime" in conn_log_flow: - # this means the flow is found in conn.log - self.check_pastebin_download(flow, conn_log_flow, twid) - else: - # flow not found in conn.log yet, - # re-add it to the queue to check it later - self.pending_ssl_flows.put((flow, profileid, twid)) - - # give the ssl flows remaining in self.pending_ssl_flows - # 2 more mins to appear - time.sleep(wait_time) - - def check_pastebin_download( + async def check_pastebin_download( self, - ssl_flow: Union[SSL, SuricataTLS], - conn_log_flow: Dict[str, str], twid: str, + ssl_flow: Union[SSL, SuricataTLS], ): """ Alerts on downloads from pastebin.com with more than 12000 bytes This function waits for the ssl.log flow to appear - in conn.log before alerting - : param flow: this is the conn.log of the ssl flow - we're currently checking + in conn.log for 40s before alerting """ if "pastebin" not in ssl_flow.server_name: return False + conn_log_flow = self.utils.get_original_conn_flow(ssl_flow, self.db) + if not conn_log_flow: + await asyncio.sleep(40) + conn_log_flow = utils.get_original_conn_flow(ssl_flow, self.db) + if not conn_log_flow: + return False + # orig_bytes is number of payload bytes downloaded downloaded_bytes = conn_log_flow["resp_bytes"] if downloaded_bytes >= self.pastebin_downloads_threshold: @@ -203,20 +144,18 @@ def detect_doh(self, twid, flow): self.set_evidence.doh(twid, flow) self.db.set_ip_info(flow.daddr, {"is_doh_server": True}) - def analyze(self, msg: dict): - if not self.ssl_thread_started: - self.ssl_waiting_thread.start() - self.ssl_thread_started = True - + async def analyze(self, msg: dict): if utils.is_msg_intended_for(msg, "new_ssl"): msg = json.loads(msg["data"]) profileid = msg["profileid"] twid = msg["twid"] flow = self.classifier.convert_to_flow_obj(msg["flow"]) - # we'll be checking pastebin downloads of this ssl flow - # later - # todo: can i put ssl flow obj in the queue?? - self.pending_ssl_flows.put((flow, profileid, twid)) + + task = asyncio.create_task( + self.check_pastebin_download(twid, flow) + ) + # to wait for these functions before flowalerts shuts down + self.flowalerts.tasks.append(task) self.check_self_signed_certs(twid, flow) self.detect_malicious_ja3(twid, flow) From 70ab33c8595b2d856cfb01abd167e58a7a53b237 Mon Sep 17 00:00:00 2001 From: alya Date: Fri, 27 Sep 2024 17:15:32 +0300 Subject: [PATCH 35/44] move get_original_conn_flow() to utils --- modules/flowalerts/ssh.py | 17 +++++------------ slips_files/common/slips_utils.py | 7 +++++++ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/modules/flowalerts/ssh.py b/modules/flowalerts/ssh.py index cadd7fdc9..fe56372b5 100644 --- a/modules/flowalerts/ssh.py +++ b/modules/flowalerts/ssh.py @@ -1,6 +1,5 @@ import asyncio import json -from typing import Optional from slips_files.common.abstracts.flowalerts_analyzer import ( IFlowalertsAnalyzer, @@ -57,13 +56,6 @@ def detect_successful_ssh_by_slips( ) return True - def get_original_ssh_flow(self, flow) -> Optional[dict]: - """original ssh flows are the ones from conn.log""" - original_ssh_flow = self.db.get_flow(flow.uid) - original_flow_uid = next(iter(original_ssh_flow)) - if original_ssh_flow[original_flow_uid]: - return json.loads(original_ssh_flow[original_flow_uid]) - def set_evidence_ssh_successful_by_zeek( self, twid, conn_log_flow, ssh_flow ): @@ -86,11 +78,11 @@ async def check_successful_ssh(self, twid, flow): Function to check if an SSH connection logged in successfully """ # this is the ssh flow read from conn.log not ssh.log - conn_log_flow = self.get_original_ssh_flow(flow) + conn_log_flow = self.utils.get_original_conn_flow(flow, self.db) if not conn_log_flow: await asyncio.sleep(15) - conn_log_flow = self.get_original_ssh_flow(flow) + conn_log_flow = self.utils.get_original_conn_flow(flow, self.db) if not conn_log_flow: return @@ -133,6 +125,7 @@ def analyze(self, msg): profileid = msg["profileid"] twid = msg["twid"] flow = self.classifier.convert_to_flow_obj(msg["flow"]) - - self.check_successful_ssh(twid, flow) + task = asyncio.create_task(self.check_successful_ssh(twid, flow)) + # to wait for these functions before flowalerts shuts down + self.flowalerts.tasks.append(task) self.check_ssh_password_guessing(profileid, twid, flow) diff --git a/slips_files/common/slips_utils.py b/slips_files/common/slips_utils.py index 4feec94fb..3e60bdaa5 100644 --- a/slips_files/common/slips_utils.py +++ b/slips_files/common/slips_utils.py @@ -114,6 +114,13 @@ def threat_level_to_string(self, threat_level: float) -> str: def is_valid_threat_level(self, threat_level): return threat_level in self.threat_levels + def get_original_conn_flow(self, altflow, db) -> Optional[dict]: + """Returns the original conn.log of the given altflow""" + original_conn_flow = db.get_flow(altflow.uid) + original_flow_uid = next(iter(original_conn_flow)) + if original_conn_flow[original_flow_uid]: + return json.loads(original_conn_flow[original_flow_uid]) + def sanitize(self, input_string): """ Sanitize strings taken from the user From 5dff960459bcd267934f42dfc6e8ae901e055377 Mon Sep 17 00:00:00 2001 From: alya Date: Fri, 27 Sep 2024 17:16:49 +0300 Subject: [PATCH 36/44] make analyze() asynchronous in conn.py and dns.py --- modules/flowalerts/conn.py | 11 ++++++++--- modules/flowalerts/dns.py | 10 ++++++++-- slips_files/core/database/sqlite_db/database.py | 3 ++- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/modules/flowalerts/conn.py b/modules/flowalerts/conn.py index ca8a0c91c..b908783e2 100644 --- a/modules/flowalerts/conn.py +++ b/modules/flowalerts/conn.py @@ -425,7 +425,6 @@ async def check_connection_without_dns_resolution( # The exceptions are: # 1- Do not check for DNS requests # 2- Ignore some IPs like private IPs, multicast, and broadcast - if self.should_ignore_conn_without_dns(flow): return False @@ -722,7 +721,7 @@ def is_dns_conn(flow): self.set_evidence.conn_to_private_ip(twid, flow) - def analyze(self, msg): + async def analyze(self, msg): if utils.is_msg_intended_for(msg, "new_flow"): msg = json.loads(msg["data"]) profileid = msg["profileid"] @@ -741,7 +740,13 @@ def analyze(self, msg): self.check_different_localnet_usage( twid, flow, what_to_check="srcip" ) - self.check_connection_without_dns_resolution(profileid, twid, flow) + task = asyncio.create_task( + self.check_connection_without_dns_resolution( + profileid, twid, flow + ) + ) + # to wait for these functions before flowalerts shuts down + self.flowalerts.tasks.append(task) self.detect_connection_to_multiple_ports(profileid, twid, flow) self.check_data_upload(profileid, twid, flow) self.check_non_http_port_80_conns(twid, flow) diff --git a/modules/flowalerts/dns.py b/modules/flowalerts/dns.py index 0027ddf31..30f889f83 100644 --- a/modules/flowalerts/dns.py +++ b/modules/flowalerts/dns.py @@ -394,14 +394,20 @@ def check_dns_arpa_scan(self, profileid, twid, flow): self.dns_arpa_queries.pop(profileid) return True - def analyze(self, msg): + async def analyze(self, msg): if not utils.is_msg_intended_for(msg, "new_dns"): return False msg = json.loads(msg["data"]) profileid = msg["profileid"] twid = msg["twid"] flow = self.classifier.convert_to_flow_obj(msg["flow"]) - self.check_dns_without_connection(profileid, twid, flow) + task = asyncio.create_task( + self.check_dns_without_connection(profileid, twid, flow) + ) + # Allow the event loop to run the scheduled task + await asyncio.sleep(0) + # to wait for these functions before flowalerts shuts down + self.flowalerts.tasks.append(task) self.check_high_entropy_dns_answers(twid, flow) self.check_invalid_dns_answers(twid, flow) self.detect_dga(profileid, twid, flow) diff --git a/slips_files/core/database/sqlite_db/database.py b/slips_files/core/database/sqlite_db/database.py index 67f40fe09..b153ea7c8 100644 --- a/slips_files/core/database/sqlite_db/database.py +++ b/slips_files/core/database/sqlite_db/database.py @@ -401,7 +401,8 @@ def execute(self, query, params=None): self.trial = 0 # discard query self.print( - f"Error executing query: {query} - {e}. Query discarded", + f"Error executing query: {query} {params}- {e}. " + f"Query discarded", 0, 1, ) From ae870df4f0e0f549c1ec0ec45d2d6da9f6ace3fb Mon Sep 17 00:00:00 2001 From: alya Date: Fri, 27 Sep 2024 17:30:10 +0300 Subject: [PATCH 37/44] update set_evidence_weird_http_method() unit test --- modules/http_analyzer/http_analyzer.py | 13 +++++++------ tests/test_http_analyzer.py | 26 ++++++++++++++++++++++++-- 2 files changed, 31 insertions(+), 8 deletions(-) diff --git a/modules/http_analyzer/http_analyzer.py b/modules/http_analyzer/http_analyzer.py index 907f6785c..8164c8903 100644 --- a/modules/http_analyzer/http_analyzer.py +++ b/modules/http_analyzer/http_analyzer.py @@ -14,6 +14,7 @@ from slips_files.common.parsers.config_parser import ConfigParser from slips_files.common.slips_utils import utils from slips_files.common.abstracts.module import IModule +from slips_files.core.flows.zeek import Weird from slips_files.core.structures.evidence import ( Evidence, ProfileID, @@ -588,7 +589,7 @@ def check_pastebin_downloads(self, twid, flow): return True def set_evidence_weird_http_method( - self, twid: str, weird_method, flow: dict + self, twid: str, weird_flow: Weird, flow: dict ) -> None: confidence = 0.9 threat_level: ThreatLevel = ThreatLevel.MEDIUM @@ -605,7 +606,7 @@ def set_evidence_weird_http_method( ) description: str = ( - f"Weird HTTP method {weird_method} to IP: " + f"Weird HTTP method {weird_flow.addl} to IP: " f'{flow["daddr"]}. by Zeek.' ) @@ -617,10 +618,10 @@ def set_evidence_weird_http_method( victim=victim, threat_level=threat_level, description=description, - profile=ProfileID(ip=flow.saddr), + profile=ProfileID(ip=flow["saddr"]), timewindow=TimeWindow(number=twid_number), - uid=[flow.uid], - timestamp=flow.starttime, + uid=[flow["uid"]], + timestamp=weird_flow.starttime, confidence=confidence, ) @@ -644,7 +645,7 @@ async def check_weird_http_method(self, msg: Dict[str, str]): if not conn_log_flow: return - self.set_evidence_weird_http_method(twid, flow.addl, conn_log_flow) + self.set_evidence_weird_http_method(twid, flow, conn_log_flow) def pre_main(self): utils.drop_root_privs() diff --git a/tests/test_http_analyzer.py b/tests/test_http_analyzer.py index 6ce303c80..893ec01e0 100644 --- a/tests/test_http_analyzer.py +++ b/tests/test_http_analyzer.py @@ -6,6 +6,7 @@ from slips_files.core.flows.zeek import ( HTTP, Weird, + Conn, ) from tests.module_factory import ModuleFactory from unittest.mock import patch, MagicMock @@ -325,7 +326,7 @@ def test_set_evidence_weird_http_method(mocker): "Some IP identification" ) mocker.spy(http_analyzer.db, "set_evidence") - flow = Weird( + weird_flow = Weird( starttime="1726593782.8840969", uid="123", saddr="192.168.1.5", @@ -333,7 +334,28 @@ def test_set_evidence_weird_http_method(mocker): name="", addl="weird_method_here", ) - http_analyzer.set_evidence_weird_http_method(twid, flow) + conn_flow = Conn( + starttime="1726249372.312124", + uid="123", + saddr="192.168.1.1", + daddr="1.1.1.1", + dur=1, + proto="tcp", + appproto="", + sport="0", + dport="12345", + spkts=0, + dpkts=0, + sbytes=0, + dbytes=0, + smac="", + dmac="", + state="Established", + history="", + ) + http_analyzer.set_evidence_weird_http_method( + twid, weird_flow, asdict(conn_flow) + ) http_analyzer.db.set_evidence.assert_called_once() From 49cfb8b2b53eb06ec3d7487ec2541e7536a04610 Mon Sep 17 00:00:00 2001 From: alya Date: Tue, 1 Oct 2024 14:35:18 +0300 Subject: [PATCH 38/44] update http unit tests --- modules/http_analyzer/http_analyzer.py | 14 +++++--------- pytest.ini | 12 ++++++++++++ tests/test_http_analyzer.py | 19 ++++++++++++++----- 3 files changed, 31 insertions(+), 14 deletions(-) create mode 100644 pytest.ini diff --git a/modules/http_analyzer/http_analyzer.py b/modules/http_analyzer/http_analyzer.py index 8164c8903..4c8218135 100644 --- a/modules/http_analyzer/http_analyzer.py +++ b/modules/http_analyzer/http_analyzer.py @@ -196,9 +196,7 @@ def check_multiple_empty_connections(self, twid: str, flow): self.connections_counter[host] = ([], 0) return True - def set_evidence_incompatible_user_agent( - self, profileid, twid, flow, user_agent - ): + def set_evidence_incompatible_user_agent(self, twid, flow, user_agent): os_type: str = user_agent.get("os_type", "").lower() os_name: str = user_agent.get("os_name", "").lower() @@ -300,9 +298,7 @@ def check_incompatible_user_agent(self, profileid, twid, flow): browser = user_agent.get("browser", "").lower() # user_agent = user_agent.get('user_agent', '') if "safari" in browser and "apple" not in vendor: - self.set_evidence_incompatible_user_agent( - profileid, twid, flow, user_agent - ) + self.set_evidence_incompatible_user_agent(twid, flow, user_agent) return True # make sure all of them are lowercase @@ -344,7 +340,7 @@ def check_incompatible_user_agent(self, profileid, twid, flow): # [('microsoft', 'windows', 'NT'), ('android'), ('linux')] # is found in the UA that belongs to an apple device self.set_evidence_incompatible_user_agent( - profileid, twid, flow, user_agent + twid, flow, user_agent ) return True @@ -638,10 +634,10 @@ async def check_weird_http_method(self, msg: Dict[str, str]): return False conn_log_flow: Optional[dict] - conn_log_flow = self.utils.get_original_conn_flow(flow, self.db) + conn_log_flow = utils.get_original_conn_flow(flow, self.db) if not conn_log_flow: await asyncio.sleep(15) - conn_log_flow = self.utils.get_original_conn_flow(flow, self.db) + conn_log_flow = utils.get_original_conn_flow(flow, self.db) if not conn_log_flow: return diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 000000000..64a304dae --- /dev/null +++ b/pytest.ini @@ -0,0 +1,12 @@ +[pytest] +log_level = INFO +log_cli_level = INFO +log_format = %(asctime)s %(levelname)s %(message)s +log_cli_format = %(asctime)s %(levelname)s %(message)s +log_date_format = %H:%M:%S +log_cli_date_format = %H:%M:%S +addopts = -s -vvv -p no:warnings +# ensures that the appropriate event loop scope is selected automatically +# based on the version of pytest-asyncio you're using. +asyncio_mode = auto +asyncio_default_fixture_loop_scope = function diff --git a/tests/test_http_analyzer.py b/tests/test_http_analyzer.py index 893ec01e0..3071dd111 100644 --- a/tests/test_http_analyzer.py +++ b/tests/test_http_analyzer.py @@ -2,6 +2,7 @@ import json from dataclasses import asdict +import pytest from slips_files.core.flows.zeek import ( HTTP, @@ -9,9 +10,12 @@ Conn, ) from tests.module_factory import ModuleFactory -from unittest.mock import patch, MagicMock +from unittest.mock import ( + patch, + MagicMock, + Mock, +) from modules.http_analyzer.http_analyzer import utils -import pytest import requests # dummy params used for testing @@ -412,8 +416,9 @@ def test_read_configuration_valid(mocker, config_value): ), ], ) -def test_check_weird_http_method(mocker, flow_name, evidence_expected): +async def test_check_weird_http_method(mocker, flow_name, evidence_expected): http_analyzer = ModuleFactory().create_http_analyzer_obj() + http_analyzer.set_evidence_weird_http_method = Mock() mocker.spy(http_analyzer, "set_evidence_weird_http_method") msg = { @@ -424,13 +429,17 @@ def test_check_weird_http_method(mocker, flow_name, evidence_expected): saddr="192.168.1.5", daddr="1.1.1.1", name=flow_name, - addl="weird_method_here", + addl=flow_name, ) ), "twid": twid, } - http_analyzer.check_weird_http_method(msg) + with patch( + "slips_files.common.slips_utils.utils.get_original_conn_flow" + ) as mock_get_original_conn_flow: + mock_get_original_conn_flow.side_effect = [None, {"flow": {}}] + await http_analyzer.check_weird_http_method(msg) if evidence_expected: http_analyzer.set_evidence_weird_http_method.assert_called_once() From 21e0741f60a1ebf3802ac0c0478579473c423c8b Mon Sep 17 00:00:00 2001 From: alya Date: Tue, 1 Oct 2024 20:08:22 +0300 Subject: [PATCH 39/44] update ssl unit tests --- modules/flowalerts/ssl.py | 2 +- tests/test_ssl.py | 45 +++++++++++++++------------------------ 2 files changed, 18 insertions(+), 29 deletions(-) diff --git a/modules/flowalerts/ssl.py b/modules/flowalerts/ssl.py index 66b5e82df..2e10937fe 100644 --- a/modules/flowalerts/ssl.py +++ b/modules/flowalerts/ssl.py @@ -40,7 +40,7 @@ async def check_pastebin_download( if "pastebin" not in ssl_flow.server_name: return False - conn_log_flow = self.utils.get_original_conn_flow(ssl_flow, self.db) + conn_log_flow = utils.get_original_conn_flow(ssl_flow, self.db) if not conn_log_flow: await asyncio.sleep(40) conn_log_flow = utils.get_original_conn_flow(ssl_flow, self.db) diff --git a/tests/test_ssl.py b/tests/test_ssl.py index 5365ef3bf..e9c1c2196 100644 --- a/tests/test_ssl.py +++ b/tests/test_ssl.py @@ -1,7 +1,10 @@ """Unit test for modules/flowalerts/ssl.py""" from dataclasses import asdict -from unittest.mock import Mock +from unittest.mock import ( + Mock, + patch, +) from slips_files.core.flows.zeek import ( SSL, @@ -189,7 +192,7 @@ def test_detect_doh(mocker, is_doh, expected_calls): ("www.example.com", 15000, False), ], ) -def test_check_pastebin_download( +async def test_check_pastebin_download( mocker, server_name, downloaded_bytes, @@ -201,7 +204,6 @@ def test_check_pastebin_download( "modules.flowalerts.set_evidence.SetEvidnceHelper.pastebin_download" ) - conn_log_flow = {"resp_bytes": downloaded_bytes} flow = SSL( starttime="1726593782.8840969", uid="123", @@ -224,7 +226,12 @@ def test_check_pastebin_download( ja3s="", is_DoH="", ) - ssl.check_pastebin_download(flow, conn_log_flow, twid) + conn_log_flow = {"resp_bytes": downloaded_bytes} + with patch( + "slips_files.common.slips_utils.utils.get_original_conn_flow" + ) as mock_get_original_conn_flow: + mock_get_original_conn_flow.side_effect = [None, conn_log_flow] + await ssl.check_pastebin_download(twid, flow) assert mock_set_evidence.call_count == expected_call_count @@ -353,11 +360,8 @@ def test_check_non_ssl_port_443_conns( assert mock_set_evidence.call_count == expected_call_count -def test_analyze_new_ssl_msg(mocker): +async def test_analyze_new_ssl_msg(mocker): ssl = ModuleFactory().create_ssl_analyzer_obj() - mock_pending_ssl_flows_put = mocker.patch.object( - ssl.pending_ssl_flows, "put" - ) mock_check_self_signed_certs = mocker.patch.object( ssl, "check_self_signed_certs" ) @@ -403,20 +407,9 @@ def test_analyze_new_ssl_msg(mocker): ), } - ssl.analyze(msg) - - mock_pending_ssl_flows_put.assert_called_once_with( - ( - flow, - "profile_192.168.1.1", - "timewindow1", - ) - ) - + await ssl.analyze(msg) mock_check_self_signed_certs.assert_called_once_with("timewindow1", flow) - mock_detect_malicious_ja3.assert_called_once_with("timewindow1", flow) - mock_detect_incompatible_cn.assert_called_once_with( "profile_192.168.1.1", "timewindow1", flow ) @@ -424,7 +417,7 @@ def test_analyze_new_ssl_msg(mocker): mock_detect_doh.assert_called_once_with("timewindow1", flow) -def test_analyze_new_flow_msg(mocker): +async def test_analyze_new_flow_msg(mocker): ssl = ModuleFactory().create_ssl_analyzer_obj() mock_check_non_ssl_port_443_conns = mocker.patch.object( ssl, "check_non_ssl_port_443_conns" @@ -460,21 +453,18 @@ def test_analyze_new_flow_msg(mocker): ), } - ssl.analyze(msg) + await ssl.analyze(msg) mock_check_non_ssl_port_443_conns.assert_called_once_with( "timewindow1", flow ) -def test_analyze_no_messages( +async def test_analyze_no_messages( mocker, ): ssl = ModuleFactory().create_ssl_analyzer_obj() - mock_pending_ssl_flows_put = mocker.patch.object( - ssl.pending_ssl_flows, "put" - ) mock_check_self_signed_certs = mocker.patch.object( ssl, "check_self_signed_certs" ) @@ -489,9 +479,8 @@ def test_analyze_no_messages( ssl, "check_non_ssl_port_443_conns" ) - ssl.analyze({}) + await ssl.analyze({}) - mock_pending_ssl_flows_put.assert_not_called() mock_check_self_signed_certs.assert_not_called() mock_detect_malicious_ja3.assert_not_called() mock_detect_incompatible_cn.assert_not_called() From 978aa2e2abd6bfd8600c8768020ebcf1237c9212 Mon Sep 17 00:00:00 2001 From: alya Date: Tue, 1 Oct 2024 20:17:28 +0300 Subject: [PATCH 40/44] update ssh unit tests --- modules/flowalerts/ssh.py | 6 +++--- tests/module_factory.py | 9 +-------- tests/test_ssh.py | 40 ++++++++++++++++++++++++--------------- 3 files changed, 29 insertions(+), 26 deletions(-) diff --git a/modules/flowalerts/ssh.py b/modules/flowalerts/ssh.py index fe56372b5..7160e2b34 100644 --- a/modules/flowalerts/ssh.py +++ b/modules/flowalerts/ssh.py @@ -78,11 +78,11 @@ async def check_successful_ssh(self, twid, flow): Function to check if an SSH connection logged in successfully """ # this is the ssh flow read from conn.log not ssh.log - conn_log_flow = self.utils.get_original_conn_flow(flow, self.db) + conn_log_flow = utils.get_original_conn_flow(flow, self.db) if not conn_log_flow: await asyncio.sleep(15) - conn_log_flow = self.utils.get_original_conn_flow(flow, self.db) + conn_log_flow = utils.get_original_conn_flow(flow, self.db) if not conn_log_flow: return @@ -117,7 +117,7 @@ def check_ssh_password_guessing(self, profileid, twid, flow): # reset the counter del self.password_guessing_cache[cache_key] - def analyze(self, msg): + async def analyze(self, msg): if not utils.is_msg_intended_for(msg, "new_ssh"): return diff --git a/tests/module_factory.py b/tests/module_factory.py index 903403b71..704fe32a5 100644 --- a/tests/module_factory.py +++ b/tests/module_factory.py @@ -217,13 +217,7 @@ def create_smtp_analyzer_obj(self, mock_db): @patch(DB_MANAGER, name="mock_db") def create_ssl_analyzer_obj(self, mock_db): flowalerts = self.create_flowalerts_obj() - with patch( - "modules.flowalerts.ssl.SSL" - ".wait_for_ssl_flows_to_appear_in_connlog", - return_value=Mock(), - ): - ssl = SSL(flowalerts.db, flowalerts=flowalerts) - return ssl + return SSL(flowalerts.db, flowalerts=flowalerts) @patch(DB_MANAGER, name="mock_db") def create_ssh_analyzer_obj(self, mock_db): @@ -608,4 +602,3 @@ def create_riskiq_obj(self, mock_db): termination_event, ) return riskiq - diff --git a/tests/test_ssh.py b/tests/test_ssh.py index 116734eae..0197f3ca4 100644 --- a/tests/test_ssh.py +++ b/tests/test_ssh.py @@ -5,9 +5,12 @@ from slips_files.core.flows.zeek import SSH from tests.module_factory import ModuleFactory import json -from unittest.mock import patch +from unittest.mock import ( + patch, +) from unittest.mock import MagicMock import pytest +from tests.common_test_utils import get_mock_coro # dummy params used for testing profileid = "profile_192.168.1.1" @@ -18,7 +21,7 @@ @pytest.mark.parametrize( - "auth_success, expected_called_zeek, expected_called_slips", + "auth_success, expected_zeek_evidence, expected_called_slips", [ # Test case 1: auth_success is 'true' - # should call detect_successful_ssh_by_zeek @@ -37,12 +40,12 @@ ("some_other_value", False, True), ], ) -def test_check_successful_ssh( - mocker, auth_success, expected_called_zeek, expected_called_slips +async def test_check_successful_ssh( + mocker, auth_success, expected_zeek_evidence, expected_called_slips ): ssh = ModuleFactory().create_ssh_analyzer_obj() - mock_detect_zeek = mocker.patch( - "modules.flowalerts.ssh.SSH.detect_successful_ssh_by_zeek" + mock_set_evidence_ssh_successful_by_zeek = mocker.patch( + "modules.flowalerts.ssh.SSH.set_evidence_ssh_successful_by_zeek" ) mock_detect_slips = mocker.patch( "modules.flowalerts.ssh.SSH.detect_successful_ssh_by_slips" @@ -64,9 +67,16 @@ def test_check_successful_ssh( host_key_alg="", host_key="", ) - ssh.check_successful_ssh(profileid, twid, flow) - - assert mock_detect_zeek.called == expected_called_zeek + with patch( + "slips_files.common.slips_utils.utils.get_original_conn_flow" + ) as mock_get_original_conn_flow: + mock_get_original_conn_flow.side_effect = [None, {"flow": {}}] + await ssh.check_successful_ssh(twid, flow) + + assert ( + mock_set_evidence_ssh_successful_by_zeek.called + == expected_zeek_evidence + ) assert mock_detect_slips.called == expected_called_slips @@ -314,23 +324,23 @@ def test_detect_successful_ssh_by_zeek_flow_exists_auth_fail(): assert flow.uid not in ssh.connections_checked_in_ssh_timer_thread -def test_analyze_no_message(): +async def test_analyze_no_message(): ssh = ModuleFactory().create_ssh_analyzer_obj() ssh.flowalerts = MagicMock() ssh.flowalerts.get_msg.return_value = None ssh.check_successful_ssh = MagicMock() ssh.check_ssh_password_guessing = MagicMock() - ssh.analyze({}) + await ssh.analyze({}) ssh.check_successful_ssh.assert_not_called() ssh.check_ssh_password_guessing.assert_not_called() @pytest.mark.parametrize("auth_success", ["true", "false"]) -def test_analyze_with_message(auth_success): +async def test_analyze_with_message(auth_success): ssh = ModuleFactory().create_ssh_analyzer_obj() - ssh.check_successful_ssh = MagicMock() + ssh.check_successful_ssh = get_mock_coro(True) ssh.check_ssh_password_guessing = MagicMock() flow = SSH( starttime="1726655400.0", @@ -356,9 +366,9 @@ def test_analyze_with_message(auth_success): "flow": asdict(flow), } - ssh.analyze({"channel": "new_ssh", "data": json.dumps(msg_data)}) + await ssh.analyze({"channel": "new_ssh", "data": json.dumps(msg_data)}) - ssh.check_successful_ssh.assert_called_once_with(profileid, twid, flow) + ssh.check_successful_ssh.assert_called_once_with(twid, flow) ssh.check_ssh_password_guessing.assert_called_once_with( profileid, twid, flow ) From 48284f75fc49ef78aa03a76b77c50bb5e1e0cf62 Mon Sep 17 00:00:00 2001 From: alya Date: Tue, 1 Oct 2024 20:18:28 +0300 Subject: [PATCH 41/44] update input.py unit tests --- tests/test_inputProc.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tests/test_inputProc.py b/tests/test_inputProc.py index 91c33109e..15182ff12 100644 --- a/tests/test_inputProc.py +++ b/tests/test_inputProc.py @@ -403,13 +403,11 @@ def test_shutdown_gracefully_all_components_active(): input_process.zeek_thread = MagicMock() input_process.zeek_thread.start() input_process.open_file_handlers = {"test_file.log": MagicMock()} - input_process.zeek_pid = os.getpid() + input_process.zeek_pid = 123 with patch("os.kill") as mock_kill: - assert input_process.shutdown_gracefully() is True - mock_kill.assert_called_once_with( - input_process.zeek_pid, signal.SIGKILL - ) + assert input_process.shutdown_gracefully() + mock_kill.assert_called_with(input_process.zeek_pid, signal.SIGKILL) assert input_process.open_file_handlers["test_file.log"].close.called From 5d49d40c5008b30988cfe44606193a6cf86c9060 Mon Sep 17 00:00:00 2001 From: alya Date: Tue, 1 Oct 2024 20:19:35 +0300 Subject: [PATCH 42/44] common_test_utils.py: add a mock to use for async functions --- tests/common_test_utils.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/common_test_utils.py b/tests/common_test_utils.py index 50b87e0d6..08ba1df9b 100644 --- a/tests/common_test_utils.py +++ b/tests/common_test_utils.py @@ -8,6 +8,7 @@ Dict, Optional, ) +from unittest.mock import Mock IS_IN_A_DOCKER_CONTAINER = os.environ.get("IS_IN_A_DOCKER_CONTAINER", False) @@ -20,6 +21,19 @@ path.mkdir(parents=True, exist_ok=True) +def get_mock_coro(return_value): + """ + instead of doing async_func = Mock() which doesn't work + you should use this function to mock it + so async_func = get_mock_coro(x) + """ + + async def mock_coro(*args, **kwargs): + return return_value + + return Mock(wraps=mock_coro) + + def do_nothing(*args): """Used to override the print function because using the self.print causes broken pipes""" pass From b33fb2dbe674052c6cfadb9676c48c4eef756e94 Mon Sep 17 00:00:00 2001 From: alya Date: Tue, 1 Oct 2024 20:23:59 +0300 Subject: [PATCH 43/44] update dns.py unit tests --- tests/test_dns.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/test_dns.py b/tests/test_dns.py index 4e5464e74..386cb3bbb 100644 --- a/tests/test_dns.py +++ b/tests/test_dns.py @@ -3,6 +3,7 @@ from dataclasses import asdict from slips_files.core.flows.zeek import DNS +from tests.common_test_utils import get_mock_coro from tests.module_factory import ModuleFactory from numpy import arange from unittest.mock import patch, Mock @@ -439,17 +440,17 @@ def test_check_high_entropy_dns_answers_no_call( ), ], ) -def test_analyze_new_flow_msg(test_case, expected_calls): +async def test_analyze_new_flow_msg(test_case, expected_calls): dns = ModuleFactory().create_dns_analyzer_obj() dns.connections_checked_in_dns_conn_timer_thread = [] - dns.check_dns_without_connection = Mock() + dns.check_dns_without_connection = get_mock_coro(True) dns.check_high_entropy_dns_answers = Mock() dns.check_invalid_dns_answers = Mock() dns.detect_dga = Mock() dns.detect_young_domains = Mock() dns.check_dns_arpa_scan = Mock() - dns.analyze({"channel": "new_dns", "data": test_case["data"]}) + await dns.analyze({"channel": "new_dns", "data": test_case["data"]}) assert ( dns.check_dns_without_connection.call_count From 4eec4b3c3dd75a5866bd8b2a530dab146eb6bf0d Mon Sep 17 00:00:00 2001 From: alya Date: Tue, 1 Oct 2024 20:53:50 +0300 Subject: [PATCH 44/44] update ssh.py unit tests --- tests/test_ssh.py | 158 ++-------------------------------------------- 1 file changed, 7 insertions(+), 151 deletions(-) diff --git a/tests/test_ssh.py b/tests/test_ssh.py index 0197f3ca4..f80e91ea4 100644 --- a/tests/test_ssh.py +++ b/tests/test_ssh.py @@ -162,9 +162,13 @@ def test_detect_successful_ssh_by_slips(): host_key_alg="", host_key="", ) - result = ssh.detect_successful_ssh_by_slips("profileid", "twid", flow) - expected_result = True - assert result == expected_result + conn_log_flow = { + "saddr": flow.saddr, + "daddr": flow.daddr, + "sbytes": 2000, + "dbytes": 2000, + } + assert ssh.detect_successful_ssh_by_slips("twid", conn_log_flow, flow) ssh.set_evidence.ssh_successful.assert_called_once_with( "twid", "192.168.1.1", @@ -174,154 +178,6 @@ def test_detect_successful_ssh_by_slips(): flow.starttime, by="Slips", ) - assert "1234" not in ssh.connections_checked_in_ssh_timer_thread - - -def test_detect_successful_ssh_by_zeek(): - ssh = ModuleFactory().create_ssh_analyzer_obj() - profileid = "profile_192.168.1.1" - twid = "timewindow1" - flow = SSH( - starttime="1726655400.0", - uid="1234", - daddr="192.168.1.2", - saddr="192.168.1.1", - version="", - auth_success="true", - auth_attempts="", - client="", - server="", - cipher_alg="", - mac_alg="", - compression_alg="", - kex_alg="", - host_key_alg="", - host_key="", - ) - flow_data = { - "daddr": "192.168.1.2", - "saddr": "192.168.1.1", - "sbytes": 1000, - "dbytes": 1000, - } - mock_flow = {"1234": json.dumps(flow_data)} - ssh.db.search_tws_for_flow = MagicMock(return_value=mock_flow) - ssh.set_evidence = MagicMock() - ssh.connections_checked_in_ssh_timer_thread = [] - assert ssh.set_evidence_ssh_successful_by_zeek(twid, flow) - ssh.set_evidence.ssh_successful.assert_called_once_with( - twid, - flow_data["saddr"], - flow_data["daddr"], - flow_data["sbytes"] + flow_data["dbytes"], - flow.uid, - flow.starttime, - by="Zeek", - ) - assert flow.uid not in ssh.connections_checked_in_ssh_timer_thread - ssh.db.search_tws_for_flow.assert_called_once_with( - profileid, twid, flow.uid - ) - - -def test_detect_successful_ssh_by_zeek_flow_exists_auth_success(): - ssh = ModuleFactory().create_ssh_analyzer_obj() - - mock_flow = { - "test_uid": json.dumps( - { - "daddr": "192.168.1.2", - "saddr": "192.168.1.1", - "sbytes": 1000, - "dbytes": 1000, - "auth_success": True, - } - ) - } - - ssh.db.search_tws_for_flow = MagicMock(return_value=mock_flow) - ssh.set_evidence = MagicMock() - flow = SSH( - starttime="1726655400.0", - uid="1234", - saddr="192.168.1.1", - daddr="192.168.1.2", - version="", - auth_success="true", - auth_attempts="", - client="", - server="", - cipher_alg="", - mac_alg="", - compression_alg="", - kex_alg="", - host_key_alg="", - host_key="", - ) - result = ssh.set_evidence_ssh_successful_by_zeek("twid", flow) - - expected_result = True - assert result == expected_result - ssh.set_evidence.ssh_successful.assert_called_once_with( - "twid", - "192.168.1.1", - "192.168.1.2", - 2000, - flow.uid, - flow.starttime, - by="Zeek", - ) - assert flow.uid not in ssh.connections_checked_in_ssh_timer_thread - - -def test_detect_successful_ssh_by_zeek_flow_exists_auth_fail(): - ssh = ModuleFactory().create_ssh_analyzer_obj() - - mock_flow = { - "test_uid": json.dumps( - { - "daddr": "192.168.1.2", - "saddr": "192.168.1.1", - "sbytes": 1000, - "dbytes": 1000, - "auth_success": False, - } - ) - } - - ssh.db.search_tws_for_flow = MagicMock(return_value=mock_flow) - ssh.set_evidence = MagicMock() - flow = SSH( - starttime="1726655400.0", - uid="1234", - saddr="192.168.1.1", - daddr="192.168.1.2", - version="", - auth_success="true", - auth_attempts="", - client="", - server="", - cipher_alg="", - mac_alg="", - compression_alg="", - kex_alg="", - host_key_alg="", - host_key="", - ) - result = ssh.set_evidence_ssh_successful_by_zeek("twid", flow) - - expected_result = True - assert result == expected_result - ssh.set_evidence.ssh_successful.assert_called_once_with( - "twid", - "192.168.1.1", - "192.168.1.2", - 2000, - flow.uid, - flow.starttime, - by="Zeek", - ) - assert flow.uid not in ssh.connections_checked_in_ssh_timer_thread async def test_analyze_no_message():