Skip to content

Commit

Permalink
Various logging improvements, ASM summary tweaks, and scan fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
aloftus23 committed Jun 10, 2024
1 parent b402cb0 commit 9e8166a
Show file tree
Hide file tree
Showing 22 changed files with 589 additions and 289 deletions.
64 changes: 40 additions & 24 deletions src/pe_mailer/email_reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,16 @@
import os
import re
import sys
import time
from typing import Any, Dict

# Third-Party Libraries
import boto3
from botocore.exceptions import ClientError
import docopt
import pymongo.errors
from schema import And, Schema, SchemaError, Use
import yaml

# cisagov Libraries
import pe_reports
Expand All @@ -44,7 +47,6 @@
from .pe_message import PEMessage
from .stats_message import StatsMessage

# Setup logging
LOGGER = logging.getLogger(__name__)
MAILER_AWS_PROFILE = "cool-dns-sessendemail-cyber.dhs.gov"
MAILER_ARN = os.environ.get("MAILER_ARN")
Expand Down Expand Up @@ -192,11 +194,11 @@ def send_message(ses_client, message, counter=None):


def send_pe_reports(ses_client, pe_report_dir, to):
"""
Send out Posture and Exposure reports.
"""Send out Posture and Exposure reports.
Parameters
----------
ses_client : boto3.client
The boto3 SES client via which the message is to be sent.
Expand Down Expand Up @@ -227,22 +229,25 @@ def send_pe_reports(ses_client, pe_report_dir, to):
try:
# The directory must contain one usable report
cyhy_agencies = len(pe_orgs)
LOGGER.info(f"{cyhy_agencies} agencies found in P&E.")
LOGGER.info(f"Running report mailer for {cyhy_agencies} organizations")
1 / cyhy_agencies
except ZeroDivisionError:
LOGGER.critical("No report data is found in %s", pe_report_dir)
LOGGER.critical("No report data was found in %s", pe_report_dir)
sys.exit(1)

staging_conn = connect()
# org_contacts = get_orgs_contacts(staging_conn) # old tsql ver.
org_contacts = get_orgs_contacts() # api ver.

org_contacts = get_orgs_contacts() # api ver.
agencies_emailed_pe_reports = 0
reports_not_mailed = 0
# Iterate over cyhy_requests, if necessary
if pe_report_dir:
for org in pe_orgs:
id = org[2]
if id == "GSEC":
LOGGER.warning(f"The PDF report for {org[2]} was intentionally set to not be mailed")
reports_not_mailed += 1
continue
if to is not None:
to_emails = to
Expand Down Expand Up @@ -274,9 +279,10 @@ def send_pe_reports(ses_client, pe_report_dir, to):

# At most one Cybex report and CSV should match
if len(pe_report_filenames) > 2:
LOGGER.warning("More than two PDF reports found")
LOGGER.warning(f"More than two encrypted PDF reports found for {org[2]}")
elif not pe_report_filenames:
LOGGER.error("No PDF report found")
LOGGER.warning(f"No encrypted PDF report found for {org[2]}, no report will be mailed")
reports_not_mailed += 1
continue

if pe_report_filenames:
Expand Down Expand Up @@ -306,10 +312,11 @@ def send_pe_reports(ses_client, pe_report_dir, to):
pe_report_filename, pe_asm_filename, report_date, id, to_emails
)

print(to_emails)
print(pe_report_filename)
print(pe_asm_filename)
print(report_date)
print("Recipient: ", to_emails)
print("Report Date: ", report_date)
print("Report File:", pe_report_filename)
print("ASM Summary File", pe_asm_filename, "\n")


try:
agencies_emailed_pe_reports = send_message(
Expand All @@ -325,7 +332,8 @@ def send_pe_reports(ses_client, pe_report_dir, to):

# Print out and log some statistics
pe_stats_string = f"Out of {cyhy_agencies} agencies with Posture and Exposure reports, {agencies_emailed_pe_reports} ({100.0 * agencies_emailed_pe_reports / cyhy_agencies:.2f}%) were emailed."
LOGGER.info(pe_stats_string)
mail_summary_log_string = f"{agencies_emailed_pe_reports}/{cyhy_agencies} reports were mailed, {reports_not_mailed}/{cyhy_agencies} reports were not mailed"
LOGGER.info(mail_summary_log_string)

return pe_stats_string

Expand All @@ -339,19 +347,20 @@ def send_reports(pe_report_dir, summary_to, test_emails):
return 1

# Assume role to use mailer
sts_client = boto3.client("sts")
assumed_role_object = sts_client.assume_role(
RoleArn=MAILER_ARN, RoleSessionName="AssumeRoleSession1"
sts_client = boto3.client('sts')
assumed_role_object=sts_client.assume_role(
RoleArn=MAILER_ARN,
RoleSessionName="AssumeRoleSession1"
)
credentials = assumed_role_object["Credentials"]
credentials=assumed_role_object['Credentials']

ses_client = boto3.client(
"ses",
ses_client = boto3.client("ses",
region_name="us-east-1",
aws_access_key_id=credentials["AccessKeyId"],
aws_secret_access_key=credentials["SecretAccessKey"],
aws_session_token=credentials["SessionToken"],
aws_access_key_id=credentials['AccessKeyId'],
aws_secret_access_key=credentials['SecretAccessKey'],
aws_session_token=credentials['SessionToken']
)


# Email the summary statistics, if necessary
if test_emails is not None:
Expand Down Expand Up @@ -380,6 +389,8 @@ def send_reports(pe_report_dir, summary_to, test_emails):

def main():
"""Send emails."""
LOGGER.info("--- PE Report Mailing Starting ---")
start_time = time.time()
# Parse command line arguments
args: Dict[str, str] = docopt.docopt(__doc__, version=__version__)

Expand Down Expand Up @@ -416,7 +427,7 @@ def main():
level=log_level.upper(),
)

LOGGER.info("Sending Posture & Exposure Reports, Version : %s", __version__)
LOGGER.info("Posture & Exposure Report Mailer, Version : %s", __version__)

send_reports(
# TODO: Improve use of schema to validate arguments.
Expand All @@ -426,5 +437,10 @@ def main():
validated_args["--test-emails"],
)

end_time = time.time()
LOGGER.info(f"Execution time for PE report mailing: {str(datetime.timedelta(seconds=(end_time - start_time)))} (H:M:S)")
LOGGER.info("--- PE Report Mailing Complete ---")

# Stop logging and clean up
logging.shutdown()

47 changes: 22 additions & 25 deletions src/pe_reports/asm_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,24 +7,25 @@
import os

# Third-Party Libraries
from PyPDF2 import PdfFileReader, PdfFileWriter
import fitz
from PyPDF2 import PdfFileReader, PdfFileWriter
import numpy as np
import pandas as pd

# from reportlab.lib.enums import TA_CENTER
from reportlab.lib.pagesizes import letter
from reportlab.lib.styles import ParagraphStyle
from reportlab.lib.units import inch
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
from reportlab.pdfgen import canvas
from reportlab.platypus import Frame, Paragraph
from reportlab.lib.enums import TA_CENTER
from reportlab.lib.units import inch


# cisagov Libraries
from pe_reports.data.db_query import (
query_cidrs_by_org,
query_extra_ips,
query_foreign_IPs,
query_extra_ips,
query_ports_protocols,
query_roots,
query_software,
Expand All @@ -35,17 +36,14 @@
LOGGER = logging.getLogger(__name__)

BASE_DIR = os.path.abspath(os.path.dirname(__file__))
IN_FILEPATH = BASE_DIR + "/assets_asm/attack_surface_empty.pdf"
ON_PAGE_INDEX = 0
UNDERNEATH = (
False # if True, new content will be placed underneath page (painted first)
)

pdfmetrics.registerFont(TTFont("Frank_Goth", BASE_DIR + "/fonts/FranklinGothic.ttf"))
pdfmetrics.registerFont(
TTFont("Frank_Goth", BASE_DIR + "/assets_asm/FranklinGothic.ttf")
)
pdfmetrics.registerFont(
TTFont("Frank_Goth_Book", BASE_DIR + "/assets_asm/Franklin_Gothic_Book_Regular.ttf")
TTFont("Frank_Goth_Book", BASE_DIR + "/fonts/Franklin_Gothic_Book_Regular.ttf")
)


Expand Down Expand Up @@ -108,7 +106,7 @@ def add_stat_frame(current_value, last_value, x, y, width, height, style, can):

def add_attachment(org_uid, final_output, pdf_file, asm_json, asm_xlsx):
"""Create and add JSON attachment."""
LOGGER.info("Creating attachment")
LOGGER.info("Creating ASM attachments")
# Create ASM Excel file
asmWriter = pd.ExcelWriter(asm_xlsx, engine="xlsxwriter")

Expand All @@ -119,16 +117,14 @@ def add_attachment(org_uid, final_output, pdf_file, asm_json, asm_xlsx):
cidr_dict = cidr_df["network"].to_list()

# Extra IPs
LOGGER.info("Getting extra IPs")
ip_lst = query_extra_ips(org_uid)
ips_df = pd.DataFrame(ip_lst, columns=["ip"])
ips_df.to_excel(asmWriter, sheet_name="Extra IPs", index=False)
ips_dict = ips_df["ip"].to_list()
LOGGER.info("Finished extra IPs")

# Ports/protocols
ports_protocols_df = query_ports_protocols(org_uid)
ports_protocols_df.to_excel(asmWriter, sheet_name="Ports_Protocols", index=False)
ports_protocols_df.to_excel(asmWriter, sheet_name="Ports Protocols", index=False)
ports_protocols_dict = ports_protocols_df.to_dict(orient="records")

# Root domains
Expand All @@ -139,9 +135,12 @@ def add_attachment(org_uid, final_output, pdf_file, asm_json, asm_xlsx):

# Sub-domains
sd_df = query_subs(org_uid)
sd_df = sd_df[["sub_domain"]]
sd_df.to_excel(asmWriter, sheet_name="Sub-domains", index=False)
sd_dict = sd_df["sub_domain"].to_list()
# sd_df = sd_df[["sub_domain"]]
#sd_df = sd_df[["sub_domain", "origin_root_domain", "pe_discovered_asset"]]
sd_df = sd_df[["sub_domain", "origin_root_domain"]]
sd_df.to_excel(asmWriter, sheet_name="Subdomains", index=False)
# sd_dict = sd_df["sub_domain"].to_list()
sd_dict = sd_df.to_dict(orient="records")

# Software
soft_df = query_software(org_uid)
Expand Down Expand Up @@ -205,9 +204,7 @@ def add_attachment(org_uid, final_output, pdf_file, asm_json, asm_xlsx):
return asm_xlsx


def create_summary(
org_uid, final_output, data_dict, file_name, json_filename, excel_filename
):
def create_summary(org_uid, final_output, data_dict, file_name, json_filename, excel_filename):
"""Create ASM summary PDF."""
packet = io.BytesIO()

Expand Down Expand Up @@ -310,8 +307,8 @@ def create_summary(
can,
)
json_title_frame = Frame(
6 * inch, 100, 1.5 * inch, 0.5 * inch, id=None, showBoundary=0
)
6 * inch, 100, 1.5 * inch, 0.5 * inch, id=None, showBoundary=0
)
json_title = Paragraph(
"JSON      EXCEL",
style=json_excel,
Expand All @@ -324,7 +321,7 @@ def create_summary(
new_pdf = PdfFileReader(packet)

# Read existing PDF template
existing_pdf = PdfFileReader(open(BASE_DIR + "/assets_asm/empty_asm.pdf", "rb"))
existing_pdf = PdfFileReader(open(BASE_DIR + "/assets_asm/empty_asm_2024-04-15.pdf", "rb"))
output = PdfFileWriter()

# Add the "watermark" (which is the new pdf) on the existing page
Expand All @@ -341,5 +338,5 @@ def create_summary(
asm_xlsx = add_attachment(
org_uid, final_output, file_name, json_filename, excel_filename
)

return asm_xlsx
return asm_xlsx
Binary file not shown.
37 changes: 22 additions & 15 deletions src/pe_reports/data/db_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,9 @@
CONN_PARAMS_DIC_STAGING = staging_config()

# These need to filled with API key/url path in database.ini
pe_api_key = CONN_PARAMS_DIC_STAGING.get("pe_api_key")
pe_api_url = CONN_PARAMS_DIC_STAGING.get("pe_api_url")
API_DIC = staging_config(section="pe_api")
pe_api_url = API_DIC.get("pe_api_url")
pe_api_key = API_DIC.get("pe_api_key")


def task_api_call(task_url, check_url, data={}, retry_time=3):
Expand All @@ -55,24 +56,36 @@ def task_api_call(task_url, check_url, data={}, retry_time=3):
create_task_url, headers=headers, data=data
).json()
task_id = create_task_result.get("task_id")
LOGGER.info("Created task for", task_url, "query, task_id: ", task_id)
LOGGER.info("Created task for " + task_url + " query, task_id: " + task_id)
check_task_url += task_id
while task_status != "Completed" and task_status != "Failed":
# Ping task status endpoint and get status
check_task_resp = requests.get(check_task_url, headers=headers).json()
# check_task_resp = requests.get(check_task_url, headers=headers).json()
check_task_resp = requests.get(check_task_url, headers=headers)
#print(check_task_resp)
check_task_resp = check_task_resp.json()
task_status = check_task_resp.get("status")
LOGGER.info("\tPinged", check_url, "status endpoint, status:", task_status)
LOGGER.info(
"\tPinged " + check_url + " status endpoint, status: " + task_status
)
time.sleep(retry_time)
except requests.exceptions.HTTPError as errh:
LOGGER.error(errh)
print(errh)
except requests.exceptions.ConnectionError as errc:
LOGGER.error(errc)
print(errc)
except requests.exceptions.Timeout as errt:
LOGGER.error(errt)
print(errt)
except requests.exceptions.RequestException as err:
LOGGER.error(err)
print(err)
except json.decoder.JSONDecodeError as err:
LOGGER.error(err)
print(err)
except Exception as err:
print(err)
# Once task finishes, return result
if task_status == "Completed":
return check_task_resp.get("result")
Expand Down Expand Up @@ -2080,7 +2093,6 @@ def query_subs(org_uid):
Return:
All the subdomains belonging to the specified org as a dataframe
"""
start_time = time.time()
total_num_pages = 1
page_num = 1
total_data = []
Expand All @@ -2100,21 +2112,16 @@ def query_subs(org_uid):
page_num += 1
# Once all data has been retrieved, return overall dataframe
total_data = pd.DataFrame.from_dict(total_data)
LOGGER.info(
"Total time to retrieve all subdomains for this org: "
+ str(time.time() - start_time)
)
# Process data and return
total_data.rename(
columns={
"root_domain_uid_id": "root_domain_uid",
"data_source_uid_id": "data_source_uid",
"dns_record_uid_id": "dns_record_uid",
"root_domain_uid__root_domain": "origin_root_domain",
"identified": "pe_discovered_asset",
},
inplace=True,
)
total_data["first_seen"] = pd.to_datetime(total_data["first_seen"]).dt.date
total_data["last_seen"] = pd.to_datetime(total_data["last_seen"]).dt.date
# total_data["first_seen"] = pd.to_datetime(total_data["first_seen"]).dt.date
# total_data["last_seen"] = pd.to_datetime(total_data["last_seen"]).dt.date
# Return truly empty dataframe if no results
if total_data[total_data.columns].isnull().apply(lambda x: all(x), axis=1)[0]:
total_data.drop(total_data.index, inplace=True)
Expand Down
Binary file added src/pe_reports/fonts/FranklinGothic.ttf
Binary file not shown.
Binary file not shown.
Loading

0 comments on commit 9e8166a

Please sign in to comment.