From f20aaa3c99d4e332a4c87439105f882e7b208e96 Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Mon, 21 Oct 2024 13:57:58 +0200 Subject: [PATCH 01/12] Update Docker file (#35) Update Dockerfile & optimise docker build --- etc/docker/Dockerfile | 75 ++++++++++++++++------------------------ etc/docker/entrypoint.sh | 2 ++ 2 files changed, 32 insertions(+), 45 deletions(-) diff --git a/etc/docker/Dockerfile b/etc/docker/Dockerfile index a470699..8688c1c 100644 --- a/etc/docker/Dockerfile +++ b/etc/docker/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.11-slim +FROM python:3.12-slim LABEL org.opencontainers.image.title="AIP" \ org.opencontainers.image.description="This image runs the AIP framework for blocklist generation." \ org.opencontainers.image.version="0.1.0" \ @@ -7,67 +7,52 @@ LABEL org.opencontainers.image.title="AIP" \ org.opencontainers.image.source="Joaquin Bogado " \ org.opencontainers.image.authors="Veronica Valeros " -ARG DEBIAN_FRONTEND=noninteractive -ENV TZ=Etc/UTC -SHELL [ "/bin/bash", "--login", "-c" ] - -# Create a non-root user +# Define arguments for username, UID, and GID ARG username=aip ARG uid=1000 -ARG gid=100 -ENV USER $username -ENV UID $uid -ENV GID $gid -ENV HOME /home/$USER - -RUN adduser --disabled-password \ - --gecos "Non-root user" \ - --uid $UID \ - --gid $GID \ - --home $HOME \ - $USER - -RUN apt-get update && \ - apt-get install -y bzip2 wget && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* +ARG gid=1000 + +# Set environment variables based on these arguments +ENV USER=$username +ENV UID=$uid +ENV GID=$gid +ENV HOME=/home/$USER + +# Create a group and user based on the UID and GID +RUN groupadd -g $GID $USER && \ + useradd -m -u $UID -g $GID -s /bin/bash $USER COPY etc/docker/entrypoint.sh /usr/local/bin/ RUN chmod u+x /usr/local/bin/entrypoint.sh +# Switch to the non-root user USER $USER +ENV PATH="$HOME/miniconda3/bin:$PATH" +ENV ENV_PREFIX=$HOME/env -ENV MINICONDA_VERSION latest -ENV CONDA_DIR $HOME/miniconda3 -RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-$MINICONDA_VERSION-Linux-x86_64.sh -O ~/miniconda.sh && \ - chmod +x ~/miniconda.sh && \ - ~/miniconda.sh -b -p $CONDA_DIR && \ - rm ~/miniconda.sh -# Make non-activate conda commands available -ENV PATH=$CONDA_DIR/bin:$PATH - -# Make conda activate command available from /bin/bash --login shells -RUN echo ". $CONDA_DIR/etc/profile.d/conda.sh" >> ~/.profile +# Conda installation and setup +RUN python -c "import urllib.request; urllib.request.urlretrieve('https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh', '$HOME/miniconda.sh')" && \ + bash ~/miniconda.sh -b -p $HOME/miniconda3 && \ + rm ~/miniconda.sh -# Make conda activate command available from /bin/bash --interative shells RUN conda init bash -ENV PROJECT_DIR $HOME/AIP -RUN mkdir $PROJECT_DIR -WORKDIR $PROJECT_DIR +# Set the working directory +WORKDIR $HOME/AIP -COPY environment.yml requirements.txt $PROJECT_DIR/ +COPY environment.yml requirements.txt $HOME/AIP/ -ENV ENV_PREFIX $HOME/env RUN conda update --name base conda -RUN conda env create --file $PROJECT_DIR/environment.yml --force -RUN conda clean --all --yes +RUN conda env create --file environment.yml && \ + conda clean --all --yes + +# Copy application +COPY . . -# Include AIP as python package -COPY . $PROJECT_DIR -RUN ln -s /home/aip/AIP/lib/aip /home/aip/miniconda3/envs/aip/lib/python3.11/site-packages/ +# Dynamically link aip to the correct site-packages folder +RUN ln -s $HOME/AIP/lib/aip $(conda run -n aip python -c "import site; print(site.getsitepackages()[0])")/aip RUN echo 'conda activate aip' >> $HOME/.bashrc diff --git a/etc/docker/entrypoint.sh b/etc/docker/entrypoint.sh index b747494..79fd5aa 100644 --- a/etc/docker/entrypoint.sh +++ b/etc/docker/entrypoint.sh @@ -1,6 +1,8 @@ #!/bin/bash --login set -e +source $HOME/miniconda3/etc/profile.d/conda.sh + conda activate aip export force_color_prompt=yes exec "$@" From b69b09f8027c484a0e0597a273f65de514aa7954 Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Wed, 23 Oct 2024 10:31:38 +0200 Subject: [PATCH 02/12] Better handle missing files (#36) * Update base image * Update env format * Update user creation and remove install of packages * Update conda installation * Remove layers not needed * Optimize layers * Avoid hardcoded paths * Use good practice to copy multiple files * Add requirement for conda to work * Explain entrypoint.sh functionality * Add docstring to validate_and_convert_date function * Remove unused import * Check file exists bf initialising DF * Check file exists or return empty array * Raise error if file does not exist. It's required. --- bin/aip | 8 ++++---- etc/docker/entrypoint.sh | 10 +++++++++- lib/aip/data/access.py | 8 ++++++++ lib/aip/models/base.py | 13 ++++++++++++- 4 files changed, 33 insertions(+), 6 deletions(-) diff --git a/bin/aip b/bin/aip index 2325887..0977d6c 100755 --- a/bin/aip +++ b/bin/aip @@ -37,12 +37,15 @@ from aip.models.alpha import Alpha from aip.models.prioritize import New from aip.models.prioritize import Consistent from aip.models.prioritize import RandomForest -from pathlib import Path from os import makedirs, path, scandir from datetime import date, timedelta, datetime import sys + def validate_and_convert_date(date_str): + """ + Validates a date string in 'YYYY-MM-DD' format and converts it to a date object. + """ try: dateobj = datetime.strptime(date_str, '%Y-%m-%d') return dateobj.date() @@ -50,7 +53,6 @@ def validate_and_convert_date(date_str): print('Invalid date format. It should be YYYY-MM-DD') raise e -#project_dir = Path(__file__).resolve().parents[1] if __name__ == '__main__': if len(sys.argv) == 2: @@ -59,12 +61,10 @@ if __name__ == '__main__': else: day = date.today() - log_fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' #logging.basicConfig(level=logging.INFO, format=log_fmt) logging.basicConfig(level=logging.DEBUG, format=log_fmt) - #Alpha Model output_dir = path.join(project_dir, 'data', 'output', 'Alpha') if not path.exists(output_dir): diff --git a/etc/docker/entrypoint.sh b/etc/docker/entrypoint.sh index 79fd5aa..3e4a4b5 100644 --- a/etc/docker/entrypoint.sh +++ b/etc/docker/entrypoint.sh @@ -1,8 +1,16 @@ #!/bin/bash --login + +# Exit immediately if any command exits with a non-zero status set -e +# Force the command prompt to display colors +export force_color_prompt=yes + +# Initialize Conda in the current shell session source $HOME/miniconda3/etc/profile.d/conda.sh +# Activate the conda environment already created in the docker conda activate aip -export force_color_prompt=yes + +# Execute any command passed to the container when run exec "$@" diff --git a/lib/aip/data/access.py b/lib/aip/data/access.py index 671521c..67cc325 100644 --- a/lib/aip/data/access.py +++ b/lib/aip/data/access.py @@ -50,6 +50,14 @@ def _get_honeypot_ips(for_date=None): ''' Filter those honeypots active due date for_date, if there are operation dates in the honeypot file. ''' + logger = logging.getLogger(__name__) + # Check if the file exists before attempting to read it + honeypot_public_ips = path.join(project_dir, 'data', 'external', 'honeypots_public_ips.csv') + + if not path.exists(honeypot_public_ips): + logger.error(f"File 'honeypot_public_ips.csv' does not exist. Raising error.") + raise FileNotFoundError("Required file 'honeypots_public_ips.csv' does not exist.") + honeypots = pd.read_csv(path.join(project_dir, 'data', 'external', 'honeypots_public_ips.csv'), comment='#') if for_date is not None: for_date = pd.to_datetime(for_date) diff --git a/lib/aip/models/base.py b/lib/aip/models/base.py index f423182..4539cac 100644 --- a/lib/aip/models/base.py +++ b/lib/aip/models/base.py @@ -25,6 +25,7 @@ __version__ = "0.0.1" import pandas as pd +import logging from aip.data.access import data_dir from aip.utils.autoload import register, models @@ -36,9 +37,19 @@ class BaseModel(): Template class for AIP models ''' def __init__(self): + # Set up the logger for the class + self.logger = logging.getLogger(self.__class__.__name__) + # Model initialization and configuration self.blocklist = pd.DataFrame() - self.donotblocklist = pd.read_csv(path.join(data_dir, 'external', 'do_not_block_these_ips.csv')) + exclude_ips = path.join(data_dir, 'external', 'do_not_block_these_ips.csv') + + if path.exists(exclude_ips): + self.donotblocklist = pd.read_csv(exclude_ips) + else: + # Warning: File 'do_not_block_these_ips.csv' does not exist. Initializing with empty DataFrame. + self.logger.warning("File 'do_not_block_these_ips.csv' does not exist. Initializing with empty DataFrame.") + self.donotblocklist = pd.DataFrame(columns=['ip']) def sanitize(self, blocklist=None): if blocklist is None: From 356679d6e3af23f7a5f8988d4bb9f8ecc08795a6 Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Wed, 23 Oct 2024 14:35:58 +0200 Subject: [PATCH 03/12] Better management of exceptions when running models (#37) * Create run_model to group logic * Use run_model for all models --- bin/aip | 68 ++++++++++++++++++++++----------------------------------- 1 file changed, 26 insertions(+), 42 deletions(-) diff --git a/bin/aip b/bin/aip index 0977d6c..3b712b5 100755 --- a/bin/aip +++ b/bin/aip @@ -53,6 +53,22 @@ def validate_and_convert_date(date_str): print('Invalid date format. It should be YYYY-MM-DD') raise e +def run_model(aip_model_name, aip_output_dir, aip_model, date_day): + """ + Run a given model with exception handling + """ + blocklist="" + # Make sure output directory is created + if not path.exists(aip_output_dir): + makedirs(aip_output_dir) + + try: + blocklist = aip_model.run(day) + blocklist.to_csv(path.join(aip_output_dir, f'AIP-{aip_model_name}-{str(date_day)}.csv.gz'), index=False, compression='gzip') + logging.info(f"{aip_model_name} model completed successfully.") + except Exception as e: + logging.error(f"Error running {aip_model_name} model: {e}", exc_info=True) + if __name__ == '__main__': if len(sys.argv) == 2: @@ -61,53 +77,21 @@ if __name__ == '__main__': else: day = date.today() + # Set up logging log_fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' - #logging.basicConfig(level=logging.INFO, format=log_fmt) logging.basicConfig(level=logging.DEBUG, format=log_fmt) - #Alpha Model - output_dir = path.join(project_dir, 'data', 'output', 'Alpha') - if not path.exists(output_dir): - makedirs(output_dir) - alpha = Alpha() - blocklist = alpha.run(day) - blocklist = blocklist.rename(columns={'ip':'attacker'}) - pd.DataFrame(blocklist, columns=['attacker']).to_csv(path.join(output_dir, - f'AIP-Alpha-{str(day)}.csv.gz'), index=False, compression='gzip') - - #Alpha 7 Model (seven days in the past) - output_dir = path.join(project_dir, 'data', 'output', 'Alpha7') - if not path.exists(output_dir): - makedirs(output_dir) - alpha7 = Alpha(lookback=7) - blocklist = alpha7.run(day) - blocklist = blocklist.rename(columns={'ip':'attacker'}) - pd.DataFrame(blocklist, columns=['attacker']).to_csv(path.join(output_dir, - f'AIP-Alpha7-{str(day)}.csv.gz'), index=False, compression='gzip') + # Run Alpha Model + run_model('Alpha', path.join(data_path, 'output', 'Alpha'), Alpha(), day) + + # Alpha 7 Model + run_model('Alpha7', path.join(data_path, 'output', 'Alpha7'), Alpha(lookback=7), day) # Prioritize New Model - output_dir = path.join(data_path, 'output', 'Prioritize_New') - if not path.exists(output_dir): - makedirs(output_dir) - pn = New() - blocklist = pn.run(day) - blocklist.to_csv(path.join(output_dir, - f'AIP-Prioritize_New-{str(day)}.csv.gz'), index=False, compression='gzip') + run_model('Prioritize_New', path.join(data_path, 'output', 'Prioritize_New'), New(), day) # Prioritize Consistent Model - output_dir = path.join(data_path, 'output', 'Prioritize_Consistent') - if not path.exists(output_dir): - makedirs(output_dir) - pc = Consistent() - blocklist = pc.run(day) - blocklist.to_csv(path.join(output_dir, - f'AIP-Prioritize_Consistent-{str(day)}.csv.gz'), index=False, compression='gzip') - + run_model('Prioritize_Consistent', path.join(data_path, 'output', 'Prioritize_Consistent'), Consistent(), day) + # Prioritize Random Forest Model - output_dir = path.join(data_path, 'output', 'random_forest') - if not path.exists(output_dir): - makedirs(output_dir) - rf = RandomForest() - blocklist = rf.run(day) - blocklist.to_csv(path.join(output_dir, - f'AIP-Random_Forest-{str(day)}.csv.gz'), index=False, compression='gzip') + run_model('Random_Forest', path.join(data_path, 'output', 'random_forest'), RandomForest(), day) From 8eb53cd9da9e0ad78cf2957ef4dc9bf0c2290b2e Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Wed, 23 Oct 2024 15:12:46 +0200 Subject: [PATCH 04/12] Update docker documentation (#38) * Update docker instructions * Reference docker docs in README --- README.md | 5 +++++ etc/docker/README.md | 46 +++++++++++++++++++++++++++++++++----------- 2 files changed, 40 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 3e2fc4f..e1e79a0 100644 --- a/README.md +++ b/README.md @@ -9,4 +9,9 @@ Given a honeypot network in your organization, it should be easy to use AIP to g ![Description of the AIP pipeline](images/AIP_Diagram.png "AIP Tool pipeline") +## Docker +Check the instructions on how to run the AIP using [Docker](etc/docker/README.md). + +# About +This tool was developed at the Stratosphere Laboratory at the Czech Technical University in Prague. \ No newline at end of file diff --git a/etc/docker/README.md b/etc/docker/README.md index 3a5749e..88818af 100644 --- a/etc/docker/README.md +++ b/etc/docker/README.md @@ -1,21 +1,45 @@ # Docker image for AIP + AIP docker aims to help in development and deployment of AIP algorithms to newcommers. The code of the repository is mounted and available from inside the docker image. The source of the data and the output folder of AIP is also mounted as a data volume inside the data/ folder to decouple from where this data really is in the host machine, easying the AIP deployment. -### Build the image +## Build the image + To build the image, you can run the following command. -``` -docker build --file etc/docker/Dockerfile --tag aip:aip . -``` -### Run the container -To run the container of that image you can run the following command. This will mount the root folder of the project in $HOME/AIP inside the docker container. -``` -docker run --rm -v $(pwd):/home/aip/AIP/:rw -it aip:aip bash +```bash +:~$ git clone https://github.com/stratosphereips/AIP.git +:~$ cd AIP/ +:~/AIP$ docker build --build-arg uid=1000 --file etc/docker/Dockerfile --tag aip:latest . ``` -### Run the tests -Once inside the container you need to activate the conda aip environment in order to be able to run the tests. +## Prepare the data + +AIP needs raw network flow data to run. In this case, we assume you have Zeek logs in `/opt/zeek/logs`. + +Additionally, the following two files need to be edited and populated: +- `data/external/do_not_block_these_ips_example.csv`: you want to add here IPs that should not appear on the AIP blocklists +- `data/external/honeypots_public_ips_example.csv`: you want to add here the public IP of the honeypot or machine running Zeek + +First copy the files and then edit them: +```bash +:~/AIP$ cp data/external/do_not_block_these_ips_example.csv data/external/do_not_block_these_ips.csv +:~/AIP$ cp data/external/honeypots_public_ips_example.csv data/external/honeypots_public_ips.csv ``` -(aip) aip@a2aa875c07cf:~/AIP$ pytest tests/ + +## Run the container + +To run the container of that image you can run the following command: + +```bash +:~/AIP$ docker run --rm -v /opt/zeek/logs/:/home/aip/AIP/data/raw:ro -v ${PWD}/data/:/home/aip/AIP/data/:rw --name aip aip:latest bin/aip ``` +An example output is shown below: +``` +2024-10-23 13:02:36,513 - aip.data.access - DEBUG - Creating attacks for dates ['2024-10-22'] +2024-10-23 13:02:36,513 - aip.data.access - DEBUG - Making dataset from raw data for dates ['2024-10-22'] +2024-10-23 13:02:37,197 - aip.data.access - DEBUG - Writting file: /home/aip/AIP/data/interim/daily.conn.2024-10-22.csv.gz +2024-10-23 13:02:37,539 - aip.data.access - DEBUG - Writting file: /home/aip/AIP/data/processed/attacks.2024-10-22.csv.gz +2024-10-23 13:02:37,648 - aip.data.access - DEBUG - Creating attacks for dates ['2024-10-20', '2024-10-21'] +... +``` \ No newline at end of file From a384d309fb5ba75e596e3855606ecb58a9a7e6c3 Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Wed, 23 Oct 2024 16:05:57 +0200 Subject: [PATCH 05/12] Update docker image for MacM1 (#39) --- etc/docker/Dockerfile_MacM1 | 89 ++++++++++++++++--------------------- 1 file changed, 38 insertions(+), 51 deletions(-) diff --git a/etc/docker/Dockerfile_MacM1 b/etc/docker/Dockerfile_MacM1 index 61a470b..8735353 100644 --- a/etc/docker/Dockerfile_MacM1 +++ b/etc/docker/Dockerfile_MacM1 @@ -1,72 +1,59 @@ -FROM ubuntu:focal -ARG DEBIAN_FRONTEND=noninteractive -ENV TZ=Etc/UTC +FROM python:3.12-slim +LABEL org.opencontainers.image.title="AIP" \ + org.opencontainers.image.description="This image runs the AIP framework for blocklist generation." \ + org.opencontainers.image.version="0.1.0" \ + org.opencontainers.image.created="2023-08-01" \ + org.opencontainers.image.source="https://github.com/stratosphereips/AIP" \ + org.opencontainers.image.source="Joaquin Bogado " \ + org.opencontainers.image.authors="Veronica Valeros " -LABEL maintainer="Joaquin Bogado " -SHELL [ "/bin/bash", "--login", "-c" ] - -RUN apt-get update --fix-missing && \ - apt-get install -y wget bzip2 curl git vim argus-client&& \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* - -# Create a non-root user +# Define arguments for username, UID, and GID ARG username=aip ARG uid=1000 -ARG gid=100 -ENV USER $username -ENV UID $uid -ENV GID $gid -ENV HOME /home/$USER - -RUN adduser --disabled-password \ - --gecos "Non-root user" \ - --uid $UID \ - --gid $GID \ - --home $HOME \ - $USER - -COPY environment.yml requirements.txt /tmp/ -RUN chown $UID:$GID /tmp/environment.yml /tmp/requirements.txt +ARG gid=1000 + +# Set environment variables based on these arguments +ENV USER=$username +ENV UID=$uid +ENV GID=$gid +ENV HOME=/home/$USER + +# Create a group and user based on the UID and GID +RUN groupadd -g $GID $USER && \ + useradd -m -u $UID -g $GID -s /bin/bash $USER COPY etc/docker/entrypoint.sh /usr/local/bin/ -RUN chown $UID:$GID /usr/local/bin/entrypoint.sh && \ - chmod u+x /usr/local/bin/entrypoint.sh +RUN chmod u+x /usr/local/bin/entrypoint.sh +# Switch to the non-root user USER $USER +ENV PATH="$HOME/miniconda3/bin:$PATH" +ENV ENV_PREFIX=$HOME/env -ENV MINICONDA_VERSION latest -ENV CONDA_DIR $HOME/miniconda3 -RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-arm64.sh -O ~/miniconda.sh && \ - chmod +x ~/miniconda.sh && \ - ~/miniconda.sh -b -p $CONDA_DIR && \ - rm ~/miniconda.sh -# make non-activate conda commands available -ENV PATH=$CONDA_DIR/bin:$PATH - -# make conda activate command available from /bin/bash --login shells -RUN echo ". $CONDA_DIR/etc/profile.d/conda.sh" >> ~/.profile +# Conda installation and setup +RUN python -c "import urllib.request; urllib.request.urlretrieve('https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-aarch64.sh', '$HOME/miniconda.sh')" && \ + bash ~/miniconda.sh -b -p $HOME/miniconda3 && \ + rm ~/miniconda.sh -# make conda activate command available from /bin/bash --interative shells RUN conda init bash -ENV PROJECT_DIR $HOME/AIP -RUN mkdir $PROJECT_DIR -WORKDIR $PROJECT_DIR +# Set the working directory +WORKDIR $HOME/AIP +COPY environment.yml requirements.txt $HOME/AIP/ -ENV ENV_PREFIX $HOME/env RUN conda update --name base conda -RUN conda env create --file /tmp/environment.yml --force -RUN conda clean --all --yes +RUN conda env create --file environment.yml && \ + conda clean --all --yes -# Include AIP as python package -# RUN mkdir -p /home/aip/AIP/lib/python3.10/site-packages/ -RUN ln -s /home/aip/AIP/lib/aip /home/aip/miniconda3/envs/aip/lib/python3.10/site-packages/ +# Copy application +COPY . . + +# Dynamically link aip to the correct site-packages folder +RUN ln -s $HOME/AIP/lib/aip $(conda run -n aip python -c "import site; print(site.getsitepackages()[0])")/aip RUN echo 'conda activate aip' >> $HOME/.bashrc ENTRYPOINT [ "/usr/local/bin/entrypoint.sh" ] - From bdadca7340370f92e292b1398900642c9ae6e109 Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Wed, 23 Oct 2024 16:19:44 +0200 Subject: [PATCH 06/12] Add CITATION.cff --- CITATION.cff | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 CITATION.cff diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 0000000..fb63e4c --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,25 @@ +cff-version: 1.2.0 +title: "Stratosphere AIP: Attacker IP Prioritizer" +message: 'If you use this software, please cite it as specified below.' +url: "https://github.com/stratosphereips/AIP" +type: software +authors: + - given-names: Thomas + family-names: O'Hara + - given-names: Joaquin + family-names: Bogado + orcid: 'https://orcid.org/0000-0001-9491-5698' + - given-names: Veronica + family-names: Valeros + email: valerver@fel.cvut.cz + affiliation: >- + Stratosphere Laboratory, AIC, FEL, Czech + Technical University in Prague + orcid: 'https://orcid.org/0000-0003-2554-3231' + - given-names: Sebastian + family-names: Garcia + email: garciseb@fel.cvut.cz + affiliation: >- + Stratosphere Laboratory, AIC, FEL, Czech + Technical University in Prague + orcid: 'https://orcid.org/0000-0001-6238-9910' \ No newline at end of file From 31fb739898805e1dc697b6226b9e888c34aa648f Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Wed, 23 Oct 2024 16:32:02 +0200 Subject: [PATCH 07/12] Ignore .venv folders --- .gitignore | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index ec826dc..a6fb424 100644 --- a/.gitignore +++ b/.gitignore @@ -2,7 +2,7 @@ .env *__pycache__* src/data/__pycache__/ -edna-run.sh -automatic_run.sh +*-run.sh venv/ +*.venv/ *.swp From 067dcca7da7ce116ac5346788158d30390752185 Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Sun, 27 Oct 2024 15:45:17 +0100 Subject: [PATCH 08/12] Decouple AIP from Conda (#42) * File no longer needed * File not updated and not needed * Add dockerignore * Remove unused production requirements * Update to use venv * New multi-stage slim dockerfile * Not needed anymore --- .dockerignore | 33 ++++++++++++++++ environment.yml | 7 ---- etc/docker/Dockerfile | 75 +++++++++++++++++++++---------------- etc/docker/Dockerfile_MacM1 | 59 ----------------------------- etc/docker/entrypoint.sh | 9 ++--- requirements.txt | 4 -- version.py | 10 ----- 7 files changed, 79 insertions(+), 118 deletions(-) create mode 100644 .dockerignore delete mode 100644 environment.yml delete mode 100644 etc/docker/Dockerfile_MacM1 delete mode 100644 version.py diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..20b2a98 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,33 @@ +venv/ +env/ +*.pyc +*.pyo +*.pyd +__pycache__/ +*.so +*.egg +*.egg-info/ +.eggs/ +.git/ +.gitignore +.DS_Store +Thumbs.db +*.log +*.swp +*.tmp +build/ +dist/ +*.egg-info/ +*.tar.gz +*.zip +node_modules/ +.idea/ +.vscode/ +*.sublime-project +*.sublime-workspace +.dockerignore +Dockerfile +Dockerfile_MacM1 +images/ +.github +data/ diff --git a/environment.yml b/environment.yml deleted file mode 100644 index 94e800d..0000000 --- a/environment.yml +++ /dev/null @@ -1,7 +0,0 @@ -name: aip - -dependencies: - - python=3.11 - - pip - - pip: - - -r requirements.txt diff --git a/etc/docker/Dockerfile b/etc/docker/Dockerfile index 8688c1c..1571d59 100644 --- a/etc/docker/Dockerfile +++ b/etc/docker/Dockerfile @@ -1,59 +1,68 @@ -FROM python:3.12-slim -LABEL org.opencontainers.image.title="AIP" \ - org.opencontainers.image.description="This image runs the AIP framework for blocklist generation." \ - org.opencontainers.image.version="0.1.0" \ - org.opencontainers.image.created="2023-08-01" \ - org.opencontainers.image.source="https://github.com/stratosphereips/AIP" \ - org.opencontainers.image.source="Joaquin Bogado " \ - org.opencontainers.image.authors="Veronica Valeros " - +FROM python:3.12-slim AS builder -# Define arguments for username, UID, and GID +# Create a non-root user ARG username=aip ARG uid=1000 ARG gid=1000 -# Set environment variables based on these arguments ENV USER=$username ENV UID=$uid ENV GID=$gid ENV HOME=/home/$USER -# Create a group and user based on the UID and GID +RUN apt-get update && \ + apt-get install -y python3-venv && \ + rm -rf /var/lib/apt/lists/* + RUN groupadd -g $GID $USER && \ useradd -m -u $UID -g $GID -s /bin/bash $USER -COPY etc/docker/entrypoint.sh /usr/local/bin/ -RUN chmod u+x /usr/local/bin/entrypoint.sh - # Switch to the non-root user USER $USER -ENV PATH="$HOME/miniconda3/bin:$PATH" -ENV ENV_PREFIX=$HOME/env +WORKDIR $HOME/AIP + +COPY requirements.txt . -# Conda installation and setup -RUN python -c "import urllib.request; urllib.request.urlretrieve('https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh', '$HOME/miniconda.sh')" && \ - bash ~/miniconda.sh -b -p $HOME/miniconda3 && \ - rm ~/miniconda.sh +RUN python -m venv venv && \ + ./venv/bin/pip install --no-cache-dir -r requirements.txt -RUN conda init bash +# Remove unnecessary files +RUN find venv/ -type d -name '__pycache__' -exec rm -rf {} + +RUN find venv/ -type d -name 'tests' -exec rm -rf {} + && \ + find venv/ -type d -name '*.dist-info' -exec rm -rf {} + -# Set the working directory -WORKDIR $HOME/AIP +# Stage 2: Final stage +FROM python:3.12-slim + +# Create a non-root user +ARG username=aip +ARG uid=1000 +ARG gid=1000 -COPY environment.yml requirements.txt $HOME/AIP/ +ENV USER=$username +ENV UID=$uid +ENV GID=$gid +ENV HOME=/home/$USER + +RUN groupadd -g $GID $USER && \ + useradd -m -u $UID -g $GID -s /bin/bash $USER -RUN conda update --name base conda -RUN conda env create --file environment.yml && \ - conda clean --all --yes +# Copy the entrypoint script +COPY etc/docker/entrypoint.sh /usr/local/bin/ +RUN chmod u+x /usr/local/bin/entrypoint.sh + +# Switch to the non-root user +USER $USER + +WORKDIR $HOME/AIP -# Copy application -COPY . . +# Copy venv from the builder stage +COPY --from=builder $HOME/AIP/venv $HOME/AIP/venv -# Dynamically link aip to the correct site-packages folder -RUN ln -s $HOME/AIP/lib/aip $(conda run -n aip python -c "import site; print(site.getsitepackages()[0])")/aip +# Copy aip files +COPY --chown=$USER:$USER . . -RUN echo 'conda activate aip' >> $HOME/.bashrc +ENV PATH="$HOME/AIP/venv/bin:$PATH" ENTRYPOINT [ "/usr/local/bin/entrypoint.sh" ] diff --git a/etc/docker/Dockerfile_MacM1 b/etc/docker/Dockerfile_MacM1 deleted file mode 100644 index 8735353..0000000 --- a/etc/docker/Dockerfile_MacM1 +++ /dev/null @@ -1,59 +0,0 @@ -FROM python:3.12-slim -LABEL org.opencontainers.image.title="AIP" \ - org.opencontainers.image.description="This image runs the AIP framework for blocklist generation." \ - org.opencontainers.image.version="0.1.0" \ - org.opencontainers.image.created="2023-08-01" \ - org.opencontainers.image.source="https://github.com/stratosphereips/AIP" \ - org.opencontainers.image.source="Joaquin Bogado " \ - org.opencontainers.image.authors="Veronica Valeros " - - -# Define arguments for username, UID, and GID -ARG username=aip -ARG uid=1000 -ARG gid=1000 - -# Set environment variables based on these arguments -ENV USER=$username -ENV UID=$uid -ENV GID=$gid -ENV HOME=/home/$USER - -# Create a group and user based on the UID and GID -RUN groupadd -g $GID $USER && \ - useradd -m -u $UID -g $GID -s /bin/bash $USER - -COPY etc/docker/entrypoint.sh /usr/local/bin/ -RUN chmod u+x /usr/local/bin/entrypoint.sh - -# Switch to the non-root user -USER $USER -ENV PATH="$HOME/miniconda3/bin:$PATH" -ENV ENV_PREFIX=$HOME/env - - -# Conda installation and setup -RUN python -c "import urllib.request; urllib.request.urlretrieve('https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-aarch64.sh', '$HOME/miniconda.sh')" && \ - bash ~/miniconda.sh -b -p $HOME/miniconda3 && \ - rm ~/miniconda.sh - -RUN conda init bash - -# Set the working directory -WORKDIR $HOME/AIP - -COPY environment.yml requirements.txt $HOME/AIP/ - -RUN conda update --name base conda -RUN conda env create --file environment.yml && \ - conda clean --all --yes - -# Copy application -COPY . . - -# Dynamically link aip to the correct site-packages folder -RUN ln -s $HOME/AIP/lib/aip $(conda run -n aip python -c "import site; print(site.getsitepackages()[0])")/aip - -RUN echo 'conda activate aip' >> $HOME/.bashrc - -ENTRYPOINT [ "/usr/local/bin/entrypoint.sh" ] diff --git a/etc/docker/entrypoint.sh b/etc/docker/entrypoint.sh index 3e4a4b5..409a6b5 100644 --- a/etc/docker/entrypoint.sh +++ b/etc/docker/entrypoint.sh @@ -6,11 +6,10 @@ set -e # Force the command prompt to display colors export force_color_prompt=yes -# Initialize Conda in the current shell session -source $HOME/miniconda3/etc/profile.d/conda.sh +# Activate the virtual environment +source "$HOME/AIP/venv/bin/activate" -# Activate the conda environment already created in the docker -conda activate aip # Execute any command passed to the container when run -exec "$@" +PYTHONPATH="$HOME/AIP/lib:$PYTHONPATH" python "$@" + diff --git a/requirements.txt b/requirements.txt index 05ff93d..8e269bd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,10 +1,6 @@ netaddr==0.8.0 -maxminddb==2.2.0 zeeklog2pandas -ipython scikit-learn -pathlib joblib python-dotenv -matplotlib pandas diff --git a/version.py b/version.py deleted file mode 100644 index b8b4d21..0000000 --- a/version.py +++ /dev/null @@ -1,10 +0,0 @@ -""" -GNU GENERAL PUBLIC LICENSE -Version 3, 29 June 2007 -Copyright (C) 2007 Free Software Foundation, Inc. -Everyone is permitted to copy and distribute verbatim copies -of this license document, but changing it is not allowed. -""" -#! /usr/local/bin/python3 - -__version__ = "2.1.0" From 080a48235a23108d4fee62299dba2114dbec15fc Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Sun, 27 Oct 2024 16:45:42 +0100 Subject: [PATCH 09/12] Improve README (#46) * Improve readme with usage and models * Link license to readme * Link Thomas' original thesis --- README.md | 54 ++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 46 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index e1e79a0..65c7994 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,55 @@ # Attacker IP Prioritization (AIP) Tool -The Attacker IP Prioritization (AIP) is a tool to generate IP blocklists based on network traffic captured from honeypot networks. Originally designed to create the blocklists for the [Stratosphere Blocklist Generation project](https://mcfp.felk.cvut.cz/publicDatasets/CTU-AIPP-BlackList/), it aims to generate an IoT-friendly blocklist. With the advent of 5G, IoT devices will be directly connected to the Internet instead of being protected by a router's firewall. Therefore we need blocklists that are small and portable and designed to block those IPs that are targeting IoT devices. The main models used to this end are the Prioritize Consistent and the Prioritize New. +The Attacker IP Prioritization (AIP) is a tool to generate efficient and economic IP blocklists based on network traffic captured from honeypot networks. +With the advent of 5G, IoT devices are directly connected often without firewall protection. Therefore we need blocklists that are small, efficient and economic. The AIP structure is shown below. -Eventually, the project evolved, aiming to test new blocklists generation models beyond the PN and PC. The actual codebase allows a fast developing and testing of those new models, providing a common interface to access the attacks from several sensors deployed on the Public Internet, and a common set of metrics to compare the output of the models. +![Description of the AIP pipeline](images/AIP_Diagram.png "AIP Tool pipeline") +## AIP Models -Given a honeypot network in your organization, it should be easy to use AIP to generate your own local blocklists based on the traffic reaching the honeypots. +Each AIP model generates its own blocklist based on a specific criteria. The main models are: -![Description of the AIP pipeline](images/AIP_Diagram.png "AIP Tool pipeline") +1. **Prioritize New (PN)** + - Focuses on IPs that are new or have not been seen frequently in previous data. + - Useful to identify emerging attackers that are starting to target a network. +2. **Prioritize Consistent (PC)** + - Focuses on IPs that have consistently attacked over time in previous data. + - Useful to identify persistent attackers that continuously target a network. +3. **Alpha** + - Provides a baseline identifying all attackers seen in the last 24 hours. + - Useful to compare the effectiveness of other models. +4. **Alpha7** + - Provides a baseline identifying all attackers seen in the last 7 days. + - Useful to further compare the effectiveness of other models. +5. **Random Forest** + - Focuses on IPs that are more likely to attack in the future. + - A more experimental approach to increase blocklist efficiency. + + +## AIP Docker + +The best way to run AIP right now is using [Docker](etc/docker/README.md). + +## Usage + +AIP will automatically attempt to run all the models using the available data. Assuming the Zeek data is located in its usual location: + +```bash +:~$ cd AIP +:~$ docker run --rm -v /opt/zeek/logs/:/home/aip/AIP/data/raw:ro -v ${PWD}/data/:/home/aip/AIP/data/:rw --name aip stratosphereips/aip:latest bin/aip +``` + +To run AIP for a specific day: +```bash +:~$ cd AIP +:~$ docker run --rm -v /opt/zeek/logs/:/home/aip/AIP/data/raw:ro -v ${PWD}/data/:/home/aip/AIP/data/:rw --name aip stratosphereips/aip:latest bin/aip YYYY-MM-DD +``` + +## License -## Docker +The Stratosphere AIP tool is licensed under [GNU General Public License v3.0](https://github.com/stratosphereips/AIP/blob/main/LICENSE). -Check the instructions on how to run the AIP using [Docker](etc/docker/README.md). +## About +This tool was developed at the Stratosphere Laboratory at the Czech Technical University in Prague. This is part of the [Stratosphere blocklist generation project](https://mcfp.felk.cvut.cz/publicDatasets/CTU-AIPP-BlackList/). -# About -This tool was developed at the Stratosphere Laboratory at the Czech Technical University in Prague. \ No newline at end of file +This tool was originally born from the bachelor thesis of Thomas O'Hara, [The Attacker IP Prioritizer: An IoT Optimized Blacklisting Algorithm (2021)](https://dspace.cvut.cz/handle/10467/96722). \ No newline at end of file From cfd8ccb1e7557870a60e509ea12132cae47132cb Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Mon, 28 Oct 2024 16:52:21 +0100 Subject: [PATCH 10/12] Add argparse to bin/aip (#51) * Refactor imports * Fix bad date reference (introduced error) * Add argparse with date and model options * Run if model selected or all * Move date funct. to utils * Create output dir on run_model to simplify logic * Add param to adjust logging level --- bin/aip | 71 +++++++++++++++++++------------------ lib/aip/utils/date_utils.py | 14 ++++++++ 2 files changed, 51 insertions(+), 34 deletions(-) create mode 100644 lib/aip/utils/date_utils.py diff --git a/bin/aip b/bin/aip index 3b712b5..d28055b 100755 --- a/bin/aip +++ b/bin/aip @@ -29,69 +29,72 @@ __license__ = "GPLv3" __maintainer__ = "Joaquin Bogado" __version__ = "1.0.0" +import argparse import logging -import pandas as pd - -from aip.data.access import data_path, project_dir +from datetime import date +from os import makedirs +from os import path +from aip.data.access import data_path from aip.models.alpha import Alpha from aip.models.prioritize import New from aip.models.prioritize import Consistent from aip.models.prioritize import RandomForest -from os import makedirs, path, scandir -from datetime import date, timedelta, datetime -import sys - +from aip.utils.date_utils import validate_and_convert_date -def validate_and_convert_date(date_str): - """ - Validates a date string in 'YYYY-MM-DD' format and converts it to a date object. - """ - try: - dateobj = datetime.strptime(date_str, '%Y-%m-%d') - return dateobj.date() - except ValueError as e: - print('Invalid date format. It should be YYYY-MM-DD') - raise e -def run_model(aip_model_name, aip_output_dir, aip_model, date_day): +def run_model(aip_model_name, aip_model, date_day): """ Run a given model with exception handling """ blocklist="" + model_output_dir = path.join(data_path,'output',aip_model_name) # Make sure output directory is created - if not path.exists(aip_output_dir): - makedirs(aip_output_dir) + if not path.exists(model_output_dir): + makedirs(model_output_dir) try: - blocklist = aip_model.run(day) - blocklist.to_csv(path.join(aip_output_dir, f'AIP-{aip_model_name}-{str(date_day)}.csv.gz'), index=False, compression='gzip') + blocklist = aip_model.run(date_day) + blocklist.to_csv(path.join(model_output_dir, f'AIP-{aip_model_name}-{str(date_day)}.csv.gz'), index=False, compression='gzip') logging.info(f"{aip_model_name} model completed successfully.") except Exception as e: logging.error(f"Error running {aip_model_name} model: {e}", exc_info=True) -if __name__ == '__main__': - if len(sys.argv) == 2: - datestr = sys.argv[1] - day = validate_and_convert_date(datestr) - else: - day = date.today() +def main(): + parser = argparse.ArgumentParser(description='Attacker IP Prioritization (AIP) Tool') + parser.add_argument('--date', type=str, help='The date for running the models in YYYY-MM-DD format. Defaults to today.', default=str(date.today())) + parser.add_argument('--model', type=str, choices=['Alpha', 'Alpha7', 'Prioritize_New', 'Prioritize_Consistent', 'Random_Forest', 'all'], default='all', help='Select AIP model to run. Defaults to all.') + parser.add_argument('-d', '--debug', required=False, help="Debugging mode.", action="store_const", dest="log_level", const=logging.DEBUG, default=logging.ERROR,) + parser.add_argument('-v', '--verbose', required=False, help="Verbose mode", action="store_const", dest="log_level", const=logging.INFO,) + + args = parser.parse_args() # Set up logging log_fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' - logging.basicConfig(level=logging.DEBUG, format=log_fmt) + logging.basicConfig(level=args.log_level, format=log_fmt) + + # Validate input date + run_date_day = validate_and_convert_date(args.date) # Run Alpha Model - run_model('Alpha', path.join(data_path, 'output', 'Alpha'), Alpha(), day) + if args.model in ['Alpha', 'all']: + run_model('Alpha', Alpha(), run_date_day) # Alpha 7 Model - run_model('Alpha7', path.join(data_path, 'output', 'Alpha7'), Alpha(lookback=7), day) + if args.model in ['Alpha7', 'all']: + run_model('Alpha7', Alpha(lookback=7), run_date_day) # Prioritize New Model - run_model('Prioritize_New', path.join(data_path, 'output', 'Prioritize_New'), New(), day) + if args.model in ['Prioritize_New', 'all']: + run_model('Prioritize_New', New(), run_date_day) # Prioritize Consistent Model - run_model('Prioritize_Consistent', path.join(data_path, 'output', 'Prioritize_Consistent'), Consistent(), day) + if args.model in ['Prioritize_Consistent', 'all']: + run_model('Prioritize_Consistent', Consistent(), run_date_day) # Prioritize Random Forest Model - run_model('Random_Forest', path.join(data_path, 'output', 'random_forest'), RandomForest(), day) + if args.model in ['Random_Forest', 'all']: + run_model('Random_Forest', RandomForest(), run_date_day) + +if __name__ == '__main__': + main() diff --git a/lib/aip/utils/date_utils.py b/lib/aip/utils/date_utils.py new file mode 100644 index 0000000..6cdf829 --- /dev/null +++ b/lib/aip/utils/date_utils.py @@ -0,0 +1,14 @@ +import logging +from datetime import datetime + + +def validate_and_convert_date(date_str): + """ + Validates a date string in 'YYYY-MM-DD' format and converts it to a date object. + """ + try: + dateobj = datetime.strptime(date_str, '%Y-%m-%d') + return dateobj.date() + except ValueError as e: + logging.error(f"Invalid date format for '{date_str}', expected YYYY-MM-DD") + raise ValueError(f"Invalid date format: {date_str}, expected YYYY-MM-DD") from e \ No newline at end of file From ad774a9c2d7153ffd924487a4f66223b939c7711 Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Tue, 29 Oct 2024 10:11:23 +0100 Subject: [PATCH 11/12] Bring back tests to AIP (#54) * Add date_utils tests * Update ignore files --- .dockerignore | 1 + .gitignore | 4 +- tests/__init__.py | 0 tests/test_lib_aip_utils_date_utils.py | 52 ++++++++++++++++++++++++++ 4 files changed, 55 insertions(+), 2 deletions(-) create mode 100644 tests/__init__.py create mode 100644 tests/test_lib_aip_utils_date_utils.py diff --git a/.dockerignore b/.dockerignore index 20b2a98..13a8cad 100644 --- a/.dockerignore +++ b/.dockerignore @@ -31,3 +31,4 @@ Dockerfile_MacM1 images/ .github data/ +tests/ diff --git a/.gitignore b/.gitignore index a6fb424..42b02e2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,8 @@ .idea/ .env -*__pycache__* -src/data/__pycache__/ +__pycache__/ *-run.sh venv/ *.venv/ *.swp + diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_lib_aip_utils_date_utils.py b/tests/test_lib_aip_utils_date_utils.py new file mode 100644 index 0000000..79a663c --- /dev/null +++ b/tests/test_lib_aip_utils_date_utils.py @@ -0,0 +1,52 @@ +import unittest +import logging +from datetime import date +from lib.aip.utils.date_utils import validate_and_convert_date + + +# Suppress logging messages below CRITICAL level +# to just get the result of the tests. +logging.disable(logging.CRITICAL) + +class TestValidateAndConvertDate(unittest.TestCase): + + def test_valid_date(self): + # Test with a valid date + self.assertEqual(validate_and_convert_date("2024-10-27"), date(2024, 10, 27)) + + def test_empty_string(self): + # Test with an empty string + with self.assertRaises(ValueError): + validate_and_convert_date("") + + def test_invalid_format(self): + # Test with various invalid formats + with self.assertRaises(ValueError): + validate_and_convert_date("2024/10/27") + + with self.assertRaises(ValueError): + validate_and_convert_date("27-10-2024") + + with self.assertRaises(ValueError): + validate_and_convert_date("October 27, 2024") + + def test_nonexistent_date(self): + # Test not existing date Feb 30th + with self.assertRaises(ValueError): + validate_and_convert_date("2024-02-30") + + # Test not existing month 13 + with self.assertRaises(ValueError): + validate_and_convert_date("2024-13-01") + + def test_none_value(self): + # Test with None as input + with self.assertRaises(TypeError): + validate_and_convert_date(None) + + def test_edge_case(self): + # Test leap years + self.assertEqual(validate_and_convert_date("2024-02-29"), date(2024, 2, 29)) + +if __name__ == '__main__': + unittest.main() From 5c60f8012bad707fe699005945ac4291af6f1ddf Mon Sep 17 00:00:00 2001 From: Veronica Valeros Date: Tue, 29 Oct 2024 10:49:45 +0100 Subject: [PATCH 12/12] Update contributing with test section (#55) --- .github/CONTRIBUTING.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 87a1a8e..ad7eae3 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -20,6 +20,10 @@ to follow when contributing: - refactor-<>: pull request branch, contains code refactoring, +## Tests + +Our project uses `unittest` for testing. To ensure code quality and maintainability, please run all tests before opening a pull request. + ## Creating a pull request Commits: