Merge branch 'main' into alternative-cpu-utilization-reporting

* main: Tcp dump (#919) Hash must be decoded to understand spaces [skip ci] Allowing Deeplinks to specific phases [skip ci] (#1016) Bump python from 3.13.0-slim-bookworm to 3.13.1-slim-bookworm in /docker (#1015) Bump pydantic from 2.10.2 to 2.10.3 (#1010) Bump fastapi[standard] from 0.115.5 to 0.115.6 (#1011) Bump aiohttp from 3.11.9 to 3.11.10 (#1013) Bump redis from 5.2.0 to 5.2.1 (#1014) Bump python from 3.12.7-slim-bookworm to 3.13.0-slim-bookworm in /docker (#949) Bump hiredis from 3.0.0 to 3.1.0 (#1012) Bump pylint from 3.3.1 to 3.3.2 (#1008) Bump pytest from 8.3.3 to 8.3.4 (#1007) Bump aiohttp from 3.11.7 to 3.11.9 (#1009) Added kill script for GMT Adding cachetools as requirement EE Update
green-coding-solutions · Dec 9, 2024 · 7320a74 · 7320a74
2 parents 7af955a + c085073
commit 7320a74
Show file tree

Hide file tree

Showing 13 changed files with 265 additions and 20 deletions.
diff --git a/config.yml.example b/config.yml.example
@@ -192,8 +192,11 @@ measurement:
 #        Hardware_Availability_Year: 2011
 ######### vhost_ratio is the virtualization degree of the machine. For Bare Metal this is 1. For 1 out of 4 VMs this would be 0.25 etc.
 #        VHost_Ratio: 1
-
-  #--- END
+#
+###### DEBUG
+#      network.connections.tcpdump.system.provider.NetworkConnectionsTcpdumpSystemProvider:
+#        split_ports: True
+#--- END
 
 
 sci:

diff --git a/docker/Dockerfile-gunicorn b/docker/Dockerfile-gunicorn
@@ -1,4 +1,4 @@
-FROM python:3.12.7-slim-bookworm
+FROM python:3.13.1-slim-bookworm
 ENV DEBIAN_FRONTEND=noninteractive
 
 WORKDIR /var/www/startup/

diff --git a/docker/requirements.txt b/docker/requirements.txt
@@ -1,7 +1,7 @@
 gunicorn==23.0.0
 psycopg[binary]==3.2.3
 psycopg_pool==3.2.4
-fastapi[standard]==0.115.5
+fastapi[standard]==0.115.6
 starlette>=0.35
 uvicorn[standard]==0.32.1
 pandas==2.2.3
@@ -11,9 +11,10 @@ orjson==3.10.12
 scipy==1.14.1
 schema==0.7.7
 deepdiff==8.0.1
-redis==5.2.0
-hiredis==3.0.0
+redis==5.2.1
+hiredis==3.1.0
 requests==2.32.3
 uvicorn-worker==0.2.0
+cachetools==5.5.0
 
-pytest==8.3.3 # needed because we need to exit in db.py if tests run with wrong config
+pytest==8.3.4 # needed because we need to exit in db.py if tests run with wrong config
diff --git a/ee b/ee
diff --git a/frontend/js/helpers/phase-stats.js b/frontend/js/helpers/phase-stats.js
@@ -298,5 +298,19 @@ const displayComparisonMetrics = (phase_stats_object) => {
     // marks the first runtime step and is shown by default
     document.querySelector('a.step[data-tab="[RUNTIME]"').dispatchEvent(new Event('click'));
 
+    // now we override if given
+    let phase_to_display = decodeURIComponent(window.location.hash).split('#')[1];
+    if (phase_to_display != null) {
+        const allowed_phases = ['BASELINE', 'INSTALLATION', 'BOOT', 'IDLE', 'RUNTIME', 'REMOVE'];
+        phase_to_display = phase_to_display.split('__');
+        if (allowed_phases.includes(phase_to_display[0])) {
+            document.querySelector(`a.step[data-tab="[${phase_to_display[0]}]"`).dispatchEvent(new Event('click'));
+        }
+        const sub_phase_regex = /^[\.\s0-9a-zA-Z_\(\)-]+$/; // Matches strings containing only letters and digits
+        if (phase_to_display[1] != null && sub_phase_regex.test(phase_to_display[1])) {
+            document.querySelector(`a.runtime-step[data-tab="${phase_to_display[1]}"`).dispatchEvent(new Event('click'));
+        }
+    }
+
     window.dispatchEvent(new Event('resize'));
 }
diff --git a/metric_providers/network/connections/tcpdump/system/README.md b/metric_providers/network/connections/tcpdump/system/README.md
@@ -0,0 +1,3 @@
+# Information
+
+See https://docs.green-coding.io/docs/measuring/metric-providers/network-connections-tcpdump-system/ for details.
diff --git a/metric_providers/network/connections/tcpdump/system/provider.py b/metric_providers/network/connections/tcpdump/system/provider.py
@@ -0,0 +1,179 @@
+import os
+import re
+from collections import defaultdict
+import ipaddress
+#import netifaces
+
+from metric_providers.base import BaseMetricProvider
+from lib.db import DB
+
+class NetworkConnectionsTcpdumpSystemProvider(BaseMetricProvider):
+    def __init__(self, *, split_ports=True, skip_check=False):
+        super().__init__(
+            metric_name='network_connections_tcpdump_system',
+            metrics={},
+            resolution=None,
+            unit=None,
+            current_dir=os.path.dirname(os.path.abspath(__file__)),
+            metric_provider_executable='tcpdump.sh',
+            skip_check=skip_check
+        )
+        self.split_ports = split_ports
+
+
+    def read_metrics(self, run_id, containers=None):
+        with open(self._filename, 'r', encoding='utf-8') as file:
+            lines = file.readlines()
+
+        stats = parse_tcpdump(lines, split_ports=self.split_ports)
+
+        if rows := len(stats):
+            DB().query("""
+                UPDATE runs
+                SET logs= COALESCE(logs, '') || %s -- append
+                WHERE id = %s
+                """, params=(generate_stats_string(stats), run_id))
+            return rows
+
+        return 0
+
+    def get_stderr(self):
+        stderr = super().get_stderr()
+
+        if not stderr:
+            return stderr
+
+        # truncate the first two bogus line with information similar to:
+        # tcpdump: listening on eno2, link-type EN10MB (Ethernet), snapshot length 262144 bytes
+        line_token = stderr.find("\n")
+        if line_token and 'tcpdump: data link type' in stderr[:line_token]:
+            stderr = stderr[stderr.find("\n")+1:]
+        if line_token and 'tcpdump: listening on' in stderr[:line_token]:
+            stderr = stderr[stderr.find("\n")+1:]
+
+        return stderr
+
+def get_primary_interface():
+    gateways = netifaces.gateways()
+    if 'default' in gateways and netifaces.AF_INET in gateways['default']:
+        return gateways['default'][netifaces.AF_INET][1]
+
+    raise RuntimeError('Could not get primary network interface')
+
+def get_ip_addresses(interface):
+    addresses = []
+
+    try:
+        addrs = netifaces.ifaddresses(interface)
+
+        if netifaces.AF_INET in addrs:
+            addresses.append(addrs[netifaces.AF_INET][0]['addr'])
+
+        if netifaces.AF_INET6 in addrs:
+            # Get the first non-link-local IPv6 address
+            for addr in addrs[netifaces.AF_INET6]:
+                if not addr['addr'].startswith('fe80:') and not addr['addr'].startswith('fd00:'):
+                    addresses.append(addr['addr'])
+                    break
+    except RuntimeError as e:
+        print(f"Error getting IP addresses: {e}")
+
+    if not addresses:
+        raise RuntimeError('Could not determine either IPv4 or IPv6 address')
+
+    return addresses
+
+def parse_tcpdump(lines, split_ports=False):
+    stats = defaultdict(lambda: {'ports': defaultdict(lambda: {'packets': 0, 'bytes': 0}), 'total_bytes': 0})
+    ip_pattern = r'(\S+) > (\S+):'
+    #tcp_pattern = r'Flags \[(.+?)\]'
+
+    for line in lines:
+        ip_match = re.search(ip_pattern, line)
+        #tcp_match = re.search(tcp_pattern, line)
+
+        if ip_match:
+            src, dst = ip_match.groups()
+            src_ip, src_port = parse_ip_port(src)
+            dst_ip, dst_port = parse_ip_port(dst)
+
+            if src_ip and dst_ip:
+                protocol = "UDP" if "UDP" in line else "TCP"
+
+                if protocol == "UDP":
+                    # For UDP, use the reported length
+                    length_pattern = r'length:? (\d+)'
+                    length_match = re.search(length_pattern, line)
+                    if not length_match or not length_match.group(1):
+                        raise RuntimeError(f"Could not find UDP packet length for line: {line}")
+                    packet_length = int(length_match.group(1))
+
+                else:
+                    # For TCP, estimate packet length (this is a simplification)
+                    length_pattern = r'length (\d+)'
+                    length_match = re.search(length_pattern, line)
+
+                    if not length_match or not length_match.group(1):
+                        if '.53 ' in line or '.53:' in line or '.5353 ' in line or '.5353:' in line: # try DNS / MDNS match
+                            dns_packet_length = re.match(r'.*\((\d+)\)$', line)
+                            if not dns_packet_length:
+                                raise RuntimeError(f"Could not find TCP packet length for line: {line}")
+                            packet_length = int(dns_packet_length[1])
+                        else:
+                            raise RuntimeError(f"No packet length was detected for line {line}")
+                    else:
+                        packet_length = 40 + int(length_match.group(1))  # Assuming 40 bytes for IP + TCP headers
+
+                # Update source IP stats
+                if split_ports:
+                    stats[src_ip]['ports'][f"{src_port}/{protocol}"]['packets'] += 1
+                    stats[src_ip]['ports'][f"{src_port}/{protocol}"]['bytes'] += packet_length
+                else:
+                    stats[src_ip]['ports'][f"{protocol}"]['packets'] += 1 # alternative without splitting by port
+                    stats[src_ip]['ports'][f"{protocol}"]['bytes'] += packet_length  # alternative without splitting by port
+
+                stats[src_ip]['total_bytes'] += packet_length
+
+                # Update destination IP stats
+                if split_ports:
+                    stats[dst_ip]['ports'][f"{dst_port}/{protocol}"]['packets'] += 1
+                    stats[dst_ip]['ports'][f"{dst_port}/{protocol}"]['bytes'] += packet_length
+                else:
+                    stats[dst_ip]['ports'][f"{protocol}"]['packets'] += 1 # alternative without splitting by port
+                    stats[dst_ip]['ports'][f"{protocol}"]['bytes'] += packet_length  # alternative without splitting by port
+
+                stats[dst_ip]['total_bytes'] += packet_length
+
+    return stats
+
+def parse_ip_port(address):
+    try:
+        if ']' in address:  # IPv6
+            ip, port = address.rsplit('.', 1)
+            ip = ip.strip('[]')
+        else:  # IPv4
+            ip, port = address.rsplit('.', 1)
+
+        # Validate IP address
+        ipaddress.ip_address(ip)
+        return ip, int(port)
+    except ValueError:
+        return None, None
+
+def generate_stats_string(stats, filter_host=False):
+    primary_interface = get_primary_interface()
+    ip_addresses = get_ip_addresses(primary_interface)
+
+    buffer = []
+    for ip, data in stats.items():
+        if filter_host and ip in ip_addresses:
+            continue
+
+        buffer.append(f"IP: {ip} (as sender or receiver. aggregated)")
+        buffer.append(f"  Total transmitted data: {data['total_bytes']} bytes")
+        buffer.append('  Ports:')
+        for port, port_data in data['ports'].items():
+            buffer.append(f"    {port}: {port_data['packets']} packets, {port_data['bytes']} bytes")
+        buffer.append('\n')
+
+    return '\n'.join(buffer)
diff --git a/metric_providers/network/connections/tcpdump/system/tcpdump.sh b/metric_providers/network/connections/tcpdump/system/tcpdump.sh
@@ -0,0 +1,25 @@
+#! /bin/bash
+set -euo pipefail
+
+check_system=false
+while getopts "c" o; do
+    case "$o" in
+        c)
+            check_system=true
+            ;;
+    esac
+done
+
+
+if $check_system; then
+    # This will try to capture one packet only. However since no network traffic might be happening we also limit to 5 seconds
+    first_line=$(timeout 3 tcpdump -tt --micro -n -v -c 1)
+    # timeout will raise error code 124
+    if [ $? -eq 1 ]; then
+        echo "tcpdump could not be started. Missing sudo permissions?"
+        exit 1
+    fi
+    exit 0
+fi
+
+tcpdump -tt --micro -n -v
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -1,5 +1,5 @@
 -r requirements.txt
-pydantic==2.10.2
-pylint==3.3.1
+pydantic==2.10.3
+pylint==3.3.2
 pytest-randomly==3.16.0
 pytest-playwright==0.6.2
diff --git a/requirements.txt b/requirements.txt
@@ -5,7 +5,7 @@ psycopg_pool==3.2.4
 pyserial==3.5
 psutil==6.1.0
 schema==0.7.7
-aiohttp==3.11.7
+aiohttp==3.11.10
 
 # calibration script dep
 tqdm==4.67.1

diff --git a/runner.py b/runner.py
@@ -1041,12 +1041,19 @@ def start_metric_providers(self, allow_container=True, allow_other=True):
 
         print(TerminalColors.HEADER, '\nStarting metric providers', TerminalColors.ENDC)
 
+        # Here we start all container related providers
+        # This includes tcpdump, which is only for debugging of the containers itself
+        # If debugging of the tool itself is wanted tcpdump should be started adjacent to the tool and not inline
         for metric_provider in self.__metric_providers:
-            if metric_provider._metric_name.endswith('_container') and not allow_container:
+            if (metric_provider._metric_name.endswith('_container') or metric_provider._metric_name == 'network_connections_tcpdump_system' ) and not allow_container:
                 continue
-            if not metric_provider._metric_name.endswith('_container') and not allow_other:
+
+            if not metric_provider._metric_name.endswith('_container') and metric_provider._metric_name != 'network_connections_tcpdump_system' and not allow_other:
                 continue
 
+            if metric_provider.has_started():
+                raise RuntimeError(f"Metric provider {metric_provider.__class__.__name__} was already started!")
+
             message = f"Booting {metric_provider.__class__.__name__}"
             metric_provider.start_profiling(self.__containers)
             if self._verbose_provider_boot:
@@ -1058,9 +1065,10 @@ def start_metric_providers(self, allow_container=True, allow_other=True):
         self.custom_sleep(2)
 
         for metric_provider in self.__metric_providers:
-            if metric_provider._metric_name.endswith('_container') and not allow_container:
+            if (metric_provider._metric_name.endswith('_container') or metric_provider._metric_name == 'network_connections_tcpdump_system' ) and not allow_container:
                 continue
-            if not metric_provider._metric_name.endswith('_container') and not allow_other:
+
+            if not metric_provider._metric_name.endswith('_container') and metric_provider._metric_name != 'network_connections_tcpdump_system' and not allow_other:
                 continue
 
             stderr_read = metric_provider.get_stderr()
@@ -1444,7 +1452,7 @@ def save_stdout_logs(self):
         if logs_as_str:
             DB().query("""
                 UPDATE runs
-                SET logs=%s
+                SET logs = COALESCE(logs, '') || %s -- append
                 WHERE id = %s
                 """, params=(logs_as_str, self._run_id))
 
@@ -1617,21 +1625,21 @@ def run(self):
                 raise exc
             finally:
                 try:
-                    self.read_and_cleanup_processes()
+                    self.stop_metric_providers()
                 except BaseException as exc:
                     self.add_to_log(exc.__class__.__name__, str(exc))
                     self.set_run_failed()
                     raise exc
                 finally:
                     try:
-                        self.save_notes_runner()
+                        self.read_and_cleanup_processes()
                     except BaseException as exc:
                         self.add_to_log(exc.__class__.__name__, str(exc))
                         self.set_run_failed()
                         raise exc
                     finally:
                         try:
-                            self.stop_metric_providers()
+                            self.save_notes_runner()
                         except BaseException as exc:
                             self.add_to_log(exc.__class__.__name__, str(exc))
                             self.set_run_failed()

diff --git a/tests/test_functions.py b/tests/test_functions.py
@@ -132,9 +132,9 @@ def run_until(self, step):
             self.__runner.update_start_and_end_times()
             self.__runner.store_phases()
             self.__runner.read_container_logs()
+            self.__runner.stop_metric_providers()
             self.__runner.read_and_cleanup_processes()
             self.__runner.save_notes_runner()
-            self.__runner.stop_metric_providers()
             self.__runner.save_stdout_logs()
 
             if self.__runner._dev_no_phase_stats is False:

diff --git a/tools/kill_gmt.sh b/tools/kill_gmt.sh
@@ -0,0 +1,12 @@
+#!/usr/env bash
+
+read -p "This will kill all processes know to be forked by GMT. It may also kill other similar named processes and should only be used on dedicated measurement nodes. In case you are looged in remotely it will also kill the current terminal session, so you must log in again. Do you want to continue? (y/N) : " kill_gmt
+
+if [[  "$kill_gmt" == "Y" || "$kill_gmt" == "y" ]] ; then
+    pgrep python3 | xargs kill
+    pgrep tinyproxy | xargs kill
+    pgrep metric_providers | xargs kill
+    pgrep tcpdump | xargs kill
+    docker rm -f $(docker ps -aq) 2>/dev/null
+    pgrep bash | xargs kill -9
+fi
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		# Information

		See https://docs.green-coding.io/docs/measuring/metric-providers/network-connections-tcpdump-system/ for details.