diff --git a/prospector/evaluation/analyse.py b/prospector/evaluation/analyse.py index c1db5590e..4104be1a1 100644 --- a/prospector/evaluation/analyse.py +++ b/prospector/evaluation/analyse.py @@ -80,7 +80,7 @@ def analyse_prospector_reports(filename: str, selected_cves: str): # Keep track of the CVEs where there is no report file reports_not_found = [] - #### Data to insert into table + # Data to insert into table if BATCH in ["regular", "old_code"]: results = { "high": [], @@ -620,7 +620,7 @@ def generate_checkmarks_table(input_dataset: str, selected_cves): rule_checks = {rule: "" for rule in all_rules} for r in matched_rules: - rule_checks[r] = "\checkmark" + rule_checks[r] = "\checkmark" # noqa: W605 row.extend([rule_checks[r] for r in all_rules]) row.extend([str(overall_exectime), str(llm_exectime)]) @@ -785,9 +785,7 @@ def generate_sankey_diagram(file1: str, file2: str, file3: str): height=800, ) - output_file = ( - ANALYSIS_RESULTS_PATH + f"sankey-{file1}-{file2}-{file3}.png" - ) + output_file = ANALYSIS_RESULTS_PATH + f"sankey-{file1}-{file2}-{file3}.png" # Save as PNG write_image(fig, output_file) print(f"Sankey diagram saved to {output_file}") diff --git a/prospector/evaluation/analyse_statistics.py b/prospector/evaluation/analyse_statistics.py index ab8181c4f..edfc566cd 100644 --- a/prospector/evaluation/analyse_statistics.py +++ b/prospector/evaluation/analyse_statistics.py @@ -63,16 +63,6 @@ def analyse_statistics(filename: str): # noqa: C901 avg_cc_time = sum(cc_times) / len(cc_times) avg_total_cc_time = sum(total_cc_times) / len(total_cc_times) - # How many commits was the commit classification rule applied to? - for itm in dataset: - filepath = PROSPECTOR_REPORTS_PATH_HOST + filename + f"/{itm[0]}.json" - try: - cc_num_commits = _get_cc_num_commits(filepath) - break - - except FileNotFoundError: - continue - execution_data = { "timestamp": datetime.now().strftime("%H:%M:%S"), "total_files_found": len(repo_times), diff --git a/prospector/evaluation/cloning_repos.py b/prospector/evaluation/cloning_repos.py deleted file mode 100644 index 99581741a..000000000 --- a/prospector/evaluation/cloning_repos.py +++ /dev/null @@ -1,30 +0,0 @@ -from evaluation.utils import load_dataset -from git.git import clone_repo_multiple - - -# Get the URLs from d63.csv -> set -urls = set() -dataset = load_dataset( - "/home/i748376/prospector/project-kb/prospector/evaluation/data/input/d63.csv" -) - -for cve_record in dataset: - urls.add(cve_record[1]) - -urls = list(urls) -urls = [ - "https://github.com/hueniverse/undefsafe", -] - -print(f"Retrieved {len(urls)} distinct repositories from the dataset.") - -# Call clone_repo_multiple() on this set -results = clone_repo_multiple( - urls, - output_folder="/home/i748376/data/gitcache", - skip_existing=False, - shallow=False, - concurrent=1, -) - -print("Cloning completed. Results: ", results) diff --git a/prospector/evaluation/compare.py b/prospector/evaluation/compare.py deleted file mode 100644 index 76da0a928..000000000 --- a/prospector/evaluation/compare.py +++ /dev/null @@ -1,266 +0,0 @@ -# Compare a list of Prospector JSON reports to their counterparts -# - to find out which reports have changed since D63 and what the difference is -from datetime import datetime -import json -import os -from evaluation.utils import ( - ANALYSIS_RESULTS_PATH, - load_dataset, - logger, - config, - load_json_file, -) - - -def is_same_report(report1, report2) -> bool: - json1 = load_json_file(report1) - json2 = load_json_file(report2) - return json1 == json2 - - -def has_candidates(path: str) -> bool: - report = load_json_file(path) - return len(report["commits"]) > 0 - - -def is_first_candidate_same(path1: str, path2: str) -> bool: - report1 = load_json_file(path1) - report2 = load_json_file(path2) - - if not has_candidates(path1) and has_candidates(path2): - return True - - if not has_candidates(path1) or not has_candidates(path2): - return False - - id1 = report1["commits"][0]["commit_id"] - id2 = report2["commits"][0]["commit_id"] - - same = id1 == id2 - - if not same and report1["commits"][0]["twins"]: - # Check if they are twins - twins_report1 = [twin[1] for twin in report1["commits"][0]["twins"]] - if id2 in twins_report1: - same = True - - return same - - -def references_are_same(path1: str, path2: str) -> bool: - report1 = load_json_file(path1) - report2 = load_json_file(path2) - - return ( - report1["advisory_record"]["references"] - == report2["advisory_record"]["references"] - ) - - -def candidate_in_both(path1: str, path2: str) -> bool: - report1 = load_json_file(path1) - report2 = load_json_file(path2) - - report2_candidates = [commit["commit_id"] for commit in report2["commits"]] - if report1["commits"][0]["commit_id"] in report2_candidates: - return True - - return False - - -def tags_are_same(path1: str, path2: str) -> bool: - report1 = load_json_file(path1) - report2 = load_json_file(path2) - - id_first_candidate1 = report1["commits"][0]["commit_id"] - tags_first_candidate1 = report1["commits"][0]["tags"] - - for commit in report2["commits"]: - if commit["commit_id"] == id_first_candidate1: - return tags_first_candidate1 == commit["tags"] - - return False - - -def main(): - directory1 = config.compare_directory1 - directory2 = config.compare_directory2 - - logger.info(f"Comparing reports in {directory1} and {directory2}.") - - file = "evaluation/data/input/d63.csv" - dataset = load_dataset(file) - - ## Things to measure - counterpart_exists = [] - missing_in_directory1 = [] - missing_in_directory2 = [] - - missing_in_1_compared_to_gt = [] - missing_in_2_compared_to_gt = [] - - entirely_same = [] - same_references = [] - same_first_candidate = [] - different_first_candidate = [] - has_no_candidates = [] - - # Different first candidate - dfc_references = [] - dfc_first_candidate_not_in_counterpart = [] - dfc_not_in_counterpart_despite_same_references = [] - dfc_not_in_counterpart_despite_same_tags = [] - dfc_tags_and_refs = [] - dfc_only_tags = [] - - # Get reports from first directory - reports1 = [f for f in os.listdir(directory1)] - # get reports from second directory - reports2 = [f for f in os.listdir(directory2)] - - # Get how many reports are missing compared to the ground truth - for report in dataset: - if f"{report[0]}.json" not in reports1: - missing_in_1_compared_to_gt.append(report[0]) - - if f"{report[0]}.json" not in reports2: - missing_in_2_compared_to_gt.append(report[0]) - - for report in reports1: - if report not in reports2: - missing_in_directory2.append(report) - continue - - counterpart_exists.append(report) - reports2.remove(report) - - if is_same_report(directory1 + report, directory2 + report): - entirely_same.append(report) - same_references.append(report) - same_first_candidate.append(report) - continue - - if is_first_candidate_same(directory1 + report, directory2 + report): - same_first_candidate.append(report) - continue - - # Reports have different first candidates - different_first_candidate.append(report) - - # because of different references - if not references_are_same(directory1 + report, directory2 + report): - dfc_references.append(report) - - # because one of the reports has no ranked candidates - if not has_candidates(directory1 + report): - has_no_candidates.append((report, "directory 1")) - continue - elif not has_candidates(directory2 + report): - has_no_candidates.append((report, "directory 2")) - continue - - if not candidate_in_both(directory1 + report, directory2 + report): - dfc_first_candidate_not_in_counterpart.append(report) - if report not in dfc_references: - dfc_not_in_counterpart_despite_same_references.append(report) - elif report not in (dfc_tags_and_refs + dfc_only_tags): - dfc_not_in_counterpart_despite_same_tags.append(report) - continue - - # because of different tags - if not tags_are_same(directory1 + report, directory2 + report): - if report in dfc_references: - dfc_tags_and_refs.append(report) - else: - dfc_only_tags.append(report) - continue - - print(report) - - missing_in_directory1 = reports2 - - # Prepare results - results = { - "timestamp": datetime.now().strftime("%d-%m-%Y, %H:%M"), - "directory1": directory1, - "directory2": directory2, - "directory1_vs_gt": { - "count": len(missing_in_1_compared_to_gt), - "reports": missing_in_1_compared_to_gt, - }, - "directory2_vs_gt": { - "count": len(missing_in_2_compared_to_gt), - "reports": missing_in_2_compared_to_gt, - }, - "counterparts_exist": len(counterpart_exists), - "missing_in_directory1": { - "count": len(missing_in_directory1), - "reports": missing_in_directory1, - }, - "missing_in_directory2": { - "count": len(missing_in_directory2), - "reports": missing_in_directory2, - }, - "reports_comparison": { - "entirely_same": len(entirely_same), - "same_first_candidate": { - "count": len(same_first_candidate), - }, - "different_first_candidate": { - "count": len(different_first_candidate), - "reports": different_first_candidate, - "of_which_have_different_references": { - "count": len(dfc_references), - "reports": dfc_references, - }, - "of_which_have_different_tags": { - "count": len(dfc_only_tags), - "reports": dfc_only_tags, - }, - "one_report_has_no_candidates_at_all": { - "count": len(has_no_candidates), - "reports": has_no_candidates, - }, - "first_candidate_not_in_counterpart": { - "count": len(dfc_first_candidate_not_in_counterpart), - "reports": dfc_first_candidate_not_in_counterpart, - "of_which_have_same_references": { - "count": len( - dfc_not_in_counterpart_despite_same_references - ), - "reports": dfc_not_in_counterpart_despite_same_references, - }, - "of_which_have_same_tags": { - "count": len( - dfc_not_in_counterpart_despite_same_tags, - ), - "reports": dfc_not_in_counterpart_despite_same_tags, - }, - }, - }, - }, - } - - # Append results to JSON file - output_path = os.path.join(ANALYSIS_RESULTS_PATH, "reports_comparison.json") - - try: - with open(output_path, "r") as f: - existing_data = json.load(f) - - except (FileNotFoundError, json.JSONDecodeError): - existing_data = {"reports_comparison": []} - - # Append new result - existing_data["reports_comparison"].append(results) - - # Write results to JSON file - output_path = os.path.join(ANALYSIS_RESULTS_PATH, "reports_comparison.json") - with open(output_path, "w") as f: - json.dump(existing_data, f, indent=2) - - logger.info(f"Comparison results written to {output_path}") - - -if __name__ == "__main__": - main() diff --git a/prospector/evaluation/compare_reports.py b/prospector/evaluation/compare_reports.py deleted file mode 100644 index c27a0e196..000000000 --- a/prospector/evaluation/compare_reports.py +++ /dev/null @@ -1,133 +0,0 @@ -# This script compares the reports of the same CVEs for two different batches -# of reports. It uses the flow-analysis.json file generated by the analysis.py -# file to have a list of CVEs that are classified differently in both batches. - - -from collections import Counter -from evaluation.utils import ( - ANALYSIS_RESULTS_PATH, - logger, - config, - load_json_file, -) - - -def process_cve(cve, from_category, to_category): - is_diff_order = False - is_same_rules = False - # Find Matteo's code report and my report - try: - matteo_report = load_json_file( - f"../../../data/prospector_reports/reports_now_with_matteos_code/{cve}.json" - ) - my_report = load_json_file( - f"../../../data/prospector_reports/reports_without_llm_mvi/{cve}.json" - ) - - except Exception as e: - # print(f"Couldn't open a report: {e}") - pass - - # Get lists of the candidates - matteo_candidate_list = [ - commit["commit_id"] for commit in matteo_report["commits"] - ] - - my_candidate_list = [commit["commit_id"] for commit in my_report["commits"]] - - if _same_elements(matteo_candidate_list, my_candidate_list): - print(f"Processing: {cve}, from {from_category} to {to_category}") - print(f"Same ranked candidates for {cve}") - # Are they also ordered the same? - if matteo_candidate_list != my_candidate_list: - print(f"Same candidates, but ranked differently!") - - is_diff_order = True - print("---") - - # They are not the same candidates, the reports found different candidates - else: - # Do the first 10 candidates match the same rules? - matteo_relevance_scores = [ - sum([rule["relevance"] for rule in commit["matched_rules"]]) - for commit in matteo_report["commits"][:10] - ] - my_relevance_scores = [ - sum([rule["relevance"] for rule in commit["matched_rules"]]) - for commit in my_report["commits"][:10] - ] - if matteo_relevance_scores == my_relevance_scores: - print(f"Processing: {cve}, from {from_category} to {to_category}") - print( - f"First ten candidates have equal relevances for {cve}: {my_relevance_scores}" - ) - # print(f"Candidates Matteo: {matteo_candidate_list[:10]}") - is_same_rules = True - print("---") - # print(f"Candidates Me: {my_candidate_list[:10]}") - - else: - num_same, list_different = _count_same_elements( - matteo_candidate_list, my_candidate_list - ) - # print(f"{num_same} candidates are the same: {list_different}") - # print(f"{num_same} candidates are the same.") - - return is_diff_order, is_same_rules - - -def _same_elements(list1: list, list2: list): - set1 = set(list1) - set2 = set(list2) - - # Check if one set is a subset of the other - return set1.issubset(set2) or set2.issubset(set1) - - -def _count_same_elements(list1, list2): - num_different = len(set(list1) & set(list2)) - - min_length = min(len(list1), len(list2)) - - result = [] - for i in range(min_length): - if list1[i] == list2[i]: - result.append("S") - else: - result.append("D") - - return num_different, result - - -def main(): - # Get all the different CVEs from the flow analysis - flow_analysis_data = load_json_file( - "evaluation/data/results/summary_execution/flow-analysis.json" - ) - different_candidate_order = [] - different_candidates_matching_same_rules = [] - # Iterate through these CVEs - for outer_key, outer_value in flow_analysis_data.items(): - for inner_key, cve_list in outer_value.items(): - for cve in cve_list: - try: - is_diff_order, is_same_rules = process_cve( - cve, outer_key, inner_key - ) - if is_diff_order: - different_candidate_order.append(cve) - if is_same_rules: - different_candidates_matching_same_rules.append(cve) - except: - continue - - print( - f"Same candidates, but differently ordered: {different_candidate_order}" - ) - print( - f"Different candidates, but equivalent relevance score in first 10 candidates: {different_candidate_order}" - ) - - -if __name__ == "__main__": - main() diff --git a/prospector/evaluation/create_jobs.py b/prospector/evaluation/create_jobs.py deleted file mode 100644 index e036133f2..000000000 --- a/prospector/evaluation/create_jobs.py +++ /dev/null @@ -1,160 +0,0 @@ -import json -import sys -import time -from datetime import datetime - -import redis -import requests -from rq import Connection, Queue, get_current_job - -from backenddb.postgres import PostgresBackendDB -from core.prospector import prospector -from core.report import generate_report -from llm.llm_service import LLMService -from log.logger import logger -from util.config_parser import parse_config_file - -from evaluation.utils import ( - PROSPECTOR_REPORTS_PATH_CONTAINER, - logger, - config, -) - -prospector_config = config.prospector_settings - - -async def enqueue_jobs(): - db = connect_to_db() - processed_vulns = db.get_processed_vulns_not_in_job() - print(processed_vulns) - created_by = "Auto" - for processed_vuln in processed_vulns: - pv_id = processed_vuln["_id"] - pv_repository = processed_vuln["repository"] - pv_versions = processed_vuln["versions"] - v_vuln_id = processed_vuln["vuln_id"] - - try: - job = _create_prospector_job(v_vuln_id, pv_repository, pv_versions) - except Exception: - logger.error( - "error while creating automatically the jobs", exc_info=True - ) - - try: - db.save_job( - job.get_id(), - pv_id, - job.args, - job.created_at, - job.started_at, - job.ended_at, - job.result, - created_by, - job.get_status(refresh=True), - ) - except Exception: - logger.error( - "error while saving automatically the jobs", exc_info=True - ) - - db.disconnect() - - -def _create_prospector_job(vuln_id, repo, version, at_front=False): - with Connection(redis.from_url(prospector_config.redis_url)): - queue = Queue(default_timeout=800) - if at_front: - job = queue.enqueue( - _run_prospector_and_generate_report, - args=(vuln_id, repo, version), - at_front=True, - ) - else: - job = queue.enqueue( - _run_prospector_and_generate_report, - args=(vuln_id, repo, version), - ) - - return job - - -def _run_prospector_and_generate_report(vuln_id, repo_url, v_int): - job = get_current_job() - job_id = job.get_id() - url = f"{prospector_config.backend}/jobs/{job_id}" - data = { - "status": job.get_status(), - "started_at": job.started_at.isoformat(), - } - - try: - response = requests.put(url, json=data) - if response.status_code == 200: - response_object = response.json() - print(response_object) - else: - print("Error:", response.status_code) - except requests.exceptions.RequestException as e: - print("Error:", e) - - params = { - "vulnerability_id": vuln_id, - "repository_url": repo_url, - "version_interval": v_int, - "use_backend": True, - "backend_address": prospector_config.backend, - "git_cache": "/tmp/gitcache", - "limit_candidates": 2000, - "use_llm_repository_url": False, - "enabled_rules": prospector_config.enabled_rules, - } - - try: - LLMService(prospector_config.llm_service) - except Exception as e: - logger.error(f"LLM Service could not be instantiated: {e}") - raise e - - try: - results, advisory_record = prospector(**params) - generate_report( - results, - advisory_record, - "json", - f"{PROSPECTOR_REPORTS_PATH_CONTAINER}{vuln_id}.json", - prospector_params=params, - ) - status = "finished" - results = f"data_sources/reports/{vuln_id}_{job_id}" - except Exception as e: - status = "failed" - results = None - logger.error(f"job failed during execution: {e}") - finally: - end_time = datetime.now().strftime("%Y-%m-%dT%H:%M:%S.%f") - print(job_id, status, end_time, results) - data = {"status": status, "finished_at": end_time, "results": results} - try: - response = requests.put(url, json=data) - if response.status_code == 200: - response_object = response.json() - print(response_object) - else: - print("Error:", response.status_code) - except requests.exceptions.RequestException as e: - print("Error:", e) - - return f"data_sources/reports/{vuln_id}_{job_id}" - - -def connect_to_db(): - db = PostgresBackendDB( - prospector_config.database.user, - prospector_config.database.password, - prospector_config.database.host, - prospector_config.database.port, - prospector_config.database.dbname, - ) - db.connect() - return db diff --git a/prospector/evaluation/dispatch_jobs.py b/prospector/evaluation/dispatch_jobs.py index 6186c4e8e..62a4b7f19 100644 --- a/prospector/evaluation/dispatch_jobs.py +++ b/prospector/evaluation/dispatch_jobs.py @@ -6,7 +6,6 @@ from core.prospector import prospector from core.report import generate_report -from evaluation.create_jobs import _create_prospector_job, enqueue_jobs from evaluation.utils import ( INPUT_DATA_PATH, PROSPECTOR_REPORTS_PATH_HOST, @@ -101,7 +100,7 @@ def _run_prospector_and_generate_report( logger.error(f"prospector() crashed at {cve_id}: {e}") raise e - logger.info(f"prospector() returned. Generating report now.") + logger.info("prospector() returned. Generating report now.") try: generate_report( diff --git a/prospector/evaluation/extract_errors.py b/prospector/evaluation/extract_errors.py deleted file mode 100644 index 545ab0e41..000000000 --- a/prospector/evaluation/extract_errors.py +++ /dev/null @@ -1,26 +0,0 @@ -import re - -from evaluation.utils import ( - INPUT_DATA_PATH, - ANALYSIS_RESULTS_PATH, - load_dataset, -) - - -def extract_crash_lines(log_file_path, output_file_path): - crash_pattern = re.compile(r".*prospector\(\) crashed at.*") - - with open(log_file_path, "r") as log_file, open( - output_file_path, "a" - ) as output_file: - for line in log_file: - if crash_pattern.match(line): - output_file.write(line) - - -# Usage -log_file_path = f"evaluation.log" -output_file_path = f"{ANALYSIS_RESULTS_PATH}error_lines.log" - -extract_crash_lines(log_file_path, output_file_path) -print(f"Error lines have been extracted to {output_file_path}") diff --git a/prospector/rules/rules.py b/prospector/rules/rules.py index aee22fbdb..cdc1d8d64 100644 --- a/prospector/rules/rules.py +++ b/prospector/rules/rules.py @@ -452,10 +452,10 @@ def apply( r.raise_for_status() commit_data = r.json()[0] - # is_security_relevant = commit_data.get("security_relevant") - # if is_security_relevant is not None: - # candidate.security_relevant = is_security_relevant - # return is_security_relevant + is_security_relevant = commit_data.get("security_relevant") + if is_security_relevant is not None: + candidate.security_relevant = is_security_relevant + return is_security_relevant candidate.security_relevant = LLMService().classify_commit( candidate.diff, candidate.repository, candidate.message