diff --git a/README.md b/README.md index 02b216a..2a2ade2 100644 --- a/README.md +++ b/README.md @@ -192,10 +192,10 @@ options: ### Report ```bash -usage: raven report [-h] [--redis-host REDIS_HOST] [--redis-port REDIS_PORT] [--clean-redis] [--neo4j-uri NEO4J_URI] - [--neo4j-user NEO4J_USER] [--neo4j-pass NEO4J_PASS] [--clean-neo4j] - [--tag {injection,unauthenticated,fixed,priv-esc,supply-chain}] - [--severity {info,low,medium,high,critical}] [--queries-path QUERIES_PATH] [--format {raw,json}] +usage: raven report [-h] [--redis-host REDIS_HOST] [--redis-port REDIS_PORT] [--clean-redis] [--neo4j-uri NEO4J_URI] [--neo4j-user NEO4J_USER] + [--neo4j-pass NEO4J_PASS] [--clean-neo4j] + [--tag {injection,unauthenticated,fixed,priv-esc,supply-chain,best-practice,endoflife,reconnaissance}] + [--severity {info,low,medium,high,critical}] [--query_ids RQ-1,..,RQ-16] [--queries-path QUERIES_PATH] [--format {raw,json}] {slack} ... positional arguments: @@ -216,10 +216,12 @@ options: --neo4j-pass NEO4J_PASS Neo4j password, default: 123456789 --clean-neo4j, -cn Whether to clean cache, and index from scratch, default: False - --tag {injection,unauthenticated,fixed,priv-esc,supply-chain}, -t {injection,unauthenticated,fixed,priv-esc,supply-chain} + --tag {injection,unauthenticated,fixed,priv-esc,supply-chain,best-practice,endoflife,reconnaissance}, -t {injection,unauthenticated,fixed,priv-esc,supply-chain,best-practice,endoflife,reconnaissance} Filter queries with specific tag --severity {info,low,medium,high,critical}, -s {info,low,medium,high,critical} Filter queries by severity level (default: info) + --query_ids RQ-1,..,RQ-16, -id RQ-1,..,RQ-16 + Filter queries by query ids (example: RQ-2,RQ-8) --queries-path QUERIES_PATH, -dp QUERIES_PATH Queries folder (default: library) --format {raw,json}, -f {raw,json} diff --git a/deployment/test.dockerfile b/deployment/test.dockerfile index 4d03399..bb1e469 100644 --- a/deployment/test.dockerfile +++ b/deployment/test.dockerfile @@ -9,6 +9,7 @@ RUN mkdir -p /raven/tests WORKDIR /raven COPY Makefile requirements.txt /raven/ COPY src /raven/src +COPY library /raven/library COPY tests /raven/tests # Install any needed packages specified in requirements.txt diff --git a/src/cmdline.py b/src/cmdline.py index d5d0e10..33c0df0 100644 --- a/src/cmdline.py +++ b/src/cmdline.py @@ -1,5 +1,6 @@ import argparse import src.logger.log as log +from src.common.utils import validate_query_ids from src.downloader.download import ( download_all_workflows_and_actions, download_account_workflows_and_actions, @@ -31,6 +32,7 @@ REPORT_JSON_FORMAT, SEVERITY_LEVELS, QUERY_TAGS, + QUERY_IDS, ) COMMAND_FUNCTIONS = { @@ -198,6 +200,14 @@ def raven() -> None: choices=SEVERITY_LEVELS.keys(), help="Filter queries by severity level (default: info)", ) + report_parser.add_argument( + "--query_ids", + "-id", + type=validate_query_ids, + default="", + metavar=f"RQ-1,..,{QUERY_IDS[-1]}", + help="Filter queries by query ids (example: RQ-2,RQ-8)", + ) report_parser.add_argument( "--queries-path", "-dp", diff --git a/src/common/utils.py b/src/common/utils.py index c6c3671..dc20a94 100644 --- a/src/common/utils.py +++ b/src/common/utils.py @@ -1,3 +1,4 @@ +import argparse import re import io from typing import List, Dict, Union, Optional @@ -6,7 +7,7 @@ from py2neo.data import Node from src.storage.redis_connection import RedisConnection -from src.config.config import Config +from src.config.config import Config, QUERY_IDS import src.logger.log as log from urllib.parse import urlparse, parse_qs @@ -125,3 +126,17 @@ def str_to_bool(s: str) -> bool: def raw_str_to_bool(s: str) -> bool: return True if s == "true" else False + + +def validate_query_ids(ids_arg: str) -> list: + """check if ids argument (ex: "RQ-1,RQ-3") in config.QUERY_IDS. + return parsed list.""" + if not ids_arg: + return [] + + ids_list = ids_arg.split(",") + if not set(ids_list).issubset(QUERY_IDS): + raise argparse.ArgumentTypeError( + f"Invalid choice: {ids_arg}. Choose from {','.join(QUERY_IDS)}" + ) + return ids_list diff --git a/src/config/config.py b/src/config/config.py index 2ae0aa2..e67159c 100644 --- a/src/config/config.py +++ b/src/config/config.py @@ -58,6 +58,8 @@ "endoflife", "reconnaissance", ] +LAST_QUERY_ID = 16 +QUERY_IDS = [f"RQ-{num}" for num in range(1, LAST_QUERY_ID + 1)] def load_downloader_config(args) -> None: @@ -120,6 +122,7 @@ def load_neo4j_config(args) -> None: def load_reporter_config(args): Config.tags = args.get("tag") Config.severity = args.get("severity") + Config.query_ids = args.get("query_ids") Config.queries_path = args.get("queries_path") Config.format = args.get("format") Config.reporter = args.get("report_command") @@ -164,6 +167,7 @@ class Config: # Report Config Constants tags: list = [] severity: str = None + query_ids: list = [] format: str = None queries_path: str = QUERIES_PATH_DEFAULT reporter: str = None diff --git a/src/queries/__init__.py b/src/queries/__init__.py index a3b2510..aeb234f 100644 --- a/src/queries/__init__.py +++ b/src/queries/__init__.py @@ -25,7 +25,11 @@ def __init__( self.result = None def filter(self) -> bool: - return self.filter_queries_by_tags() and self.filter_queries_by_severity() + return ( + self.filter_queries_by_tags() + and self.filter_queries_by_severity() + and self.filter_queries_by_query_id() + ) def filter_queries_by_severity(self): severity_level = SEVERITY_LEVELS.get(Config.severity, 0) @@ -52,6 +56,15 @@ def filter_queries_by_tags(self): # skip this detection return False + def filter_queries_by_query_id(self): + if not Config.query_ids: + return True + + if self.id in Config.query_ids: + return True + + return False + def run(self) -> list: """ Will run the cypher code with the given query. diff --git a/tests/unit/test_report.py b/tests/unit/test_report.py new file mode 100644 index 0000000..5bf7bc4 --- /dev/null +++ b/tests/unit/test_report.py @@ -0,0 +1,35 @@ +from pathlib import Path +from src.config.config import LAST_QUERY_ID, QUERIES_PATH_DEFAULT + +query_dir = Path(__file__).parent.parent.parent / QUERIES_PATH_DEFAULT + + +def test_report(): + assert query_dir.exists(), f"Directory {query_dir} doesn't exist" + query_files = list(query_dir.glob("query*.yml")) + assert ( + len(query_files) > 0 + ), f"Directory {query_dir} doesn't contain any query*.yml files" + + # get query ids from files in query_dir + query_ids = [] + for query_file in query_files: + with open(query_file, "r") as f: + query_id = "" + while not query_id: # possible that first line is empty + line = f.readline() + if line.startswith("id: RQ-"): + query_id = line[4:].strip() + query_ids.append(query_id) + + max_id_num = max([int(query_id[3:]) for query_id in query_ids]) + + # sequence + assert set(query_ids) == set( + [f"RQ-{num}" for num in range(1, max_id_num + 1)] + ), f"Query ids in {query_dir} are not continuous from 1 to {max_id_num}: {query_ids}" + + # last id in files == config.LAST_QUERY_ID + assert ( + LAST_QUERY_ID == max_id_num + ), f"LAST_QUERY_ID in config ({LAST_QUERY_ID}) != max id in query files ({max_id_num})"