Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better handle missing files #36

Merged
merged 16 commits into from
Oct 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions bin/aip
Original file line number Diff line number Diff line change
Expand Up @@ -37,20 +37,22 @@ from aip.models.alpha import Alpha
from aip.models.prioritize import New
from aip.models.prioritize import Consistent
from aip.models.prioritize import RandomForest
from pathlib import Path
from os import makedirs, path, scandir
from datetime import date, timedelta, datetime
import sys


def validate_and_convert_date(date_str):
"""
Validates a date string in 'YYYY-MM-DD' format and converts it to a date object.
"""
try:
dateobj = datetime.strptime(date_str, '%Y-%m-%d')
return dateobj.date()
except ValueError as e:
print('Invalid date format. It should be YYYY-MM-DD')
raise e

#project_dir = Path(__file__).resolve().parents[1]

if __name__ == '__main__':
if len(sys.argv) == 2:
Expand All @@ -59,12 +61,10 @@ if __name__ == '__main__':
else:
day = date.today()


log_fmt = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
#logging.basicConfig(level=logging.INFO, format=log_fmt)
logging.basicConfig(level=logging.DEBUG, format=log_fmt)


#Alpha Model
output_dir = path.join(project_dir, 'data', 'output', 'Alpha')
if not path.exists(output_dir):
Expand Down
10 changes: 9 additions & 1 deletion etc/docker/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
#!/bin/bash --login

# Exit immediately if any command exits with a non-zero status
set -e

# Force the command prompt to display colors
export force_color_prompt=yes

# Initialize Conda in the current shell session
source $HOME/miniconda3/etc/profile.d/conda.sh

# Activate the conda environment already created in the docker
conda activate aip
export force_color_prompt=yes

# Execute any command passed to the container when run
exec "$@"
8 changes: 8 additions & 0 deletions lib/aip/data/access.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,14 @@ def _get_honeypot_ips(for_date=None):
'''
Filter those honeypots active due date for_date, if there are operation dates in the honeypot file.
'''
logger = logging.getLogger(__name__)
# Check if the file exists before attempting to read it
honeypot_public_ips = path.join(project_dir, 'data', 'external', 'honeypots_public_ips.csv')

if not path.exists(honeypot_public_ips):
logger.error(f"File 'honeypot_public_ips.csv' does not exist. Raising error.")
raise FileNotFoundError("Required file 'honeypots_public_ips.csv' does not exist.")

honeypots = pd.read_csv(path.join(project_dir, 'data', 'external', 'honeypots_public_ips.csv'), comment='#')
if for_date is not None:
for_date = pd.to_datetime(for_date)
Expand Down
13 changes: 12 additions & 1 deletion lib/aip/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
__version__ = "0.0.1"

import pandas as pd
import logging

from aip.data.access import data_dir
from aip.utils.autoload import register, models
Expand All @@ -36,9 +37,19 @@ class BaseModel():
Template class for AIP models
'''
def __init__(self):
# Set up the logger for the class
self.logger = logging.getLogger(self.__class__.__name__)

# Model initialization and configuration
self.blocklist = pd.DataFrame()
self.donotblocklist = pd.read_csv(path.join(data_dir, 'external', 'do_not_block_these_ips.csv'))
exclude_ips = path.join(data_dir, 'external', 'do_not_block_these_ips.csv')

if path.exists(exclude_ips):
self.donotblocklist = pd.read_csv(exclude_ips)
else:
# Warning: File 'do_not_block_these_ips.csv' does not exist. Initializing with empty DataFrame.
self.logger.warning("File 'do_not_block_these_ips.csv' does not exist. Initializing with empty DataFrame.")
self.donotblocklist = pd.DataFrame(columns=['ip'])

def sanitize(self, blocklist=None):
if blocklist is None:
Expand Down