From 6fed6434e098679fcb8e750f46ace543db56775f Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Tue, 27 Feb 2018 19:00:38 -0800 Subject: [PATCH 01/20] [Style] Cleanup def chains and save a little --- pyt/definition_chains.py | 20 ++++++++------------ pyt/save.py | 36 ++++++++++++++++++------------------ 2 files changed, 26 insertions(+), 30 deletions(-) diff --git a/pyt/definition_chains.py b/pyt/definition_chains.py index 6c56248e..8c56dea2 100644 --- a/pyt/definition_chains.py +++ b/pyt/definition_chains.py @@ -9,19 +9,15 @@ def get_vars(node): vv = VarsVisitor() - if isinstance(node.ast_node, ast.While)\ - or isinstance(node.ast_node, ast.If): + if isinstance(node.ast_node, (ast.If, ast.While)): vv.visit(node.ast_node.test) - elif isinstance(node.ast_node, ast.FunctionDef) or\ - isinstance(node.ast_node, ast.ClassDef): - return list() + elif isinstance(node.ast_node, (ast.ClassDef, ast.FunctionDef)): + return set() else: try: vv.visit(node.ast_node) except AttributeError: # If no ast_node - vv.result = list() - - # remove duplicates: + vv.result = set() vv.result = set(vv.result) # Filter out lvars: @@ -58,11 +54,9 @@ def build_use_def_chain(cfg_nodes): def varse(node): vv = VarsVisitor() - if isinstance(node.ast_node, ast.FunctionDef) or\ - isinstance(node.ast_node, ast.ClassDef): + if isinstance(node.ast_node, (ast.ClassDef, ast.FunctionDef)): return list() - elif isinstance(node.ast_node, ast.While)\ - or isinstance(node.ast_node, ast.If): + elif isinstance(node.ast_node, (ast.If, ast.While)): vv.visit(node.ast_node.test) else: try: @@ -84,11 +78,13 @@ def build_def_use_chain(cfg_nodes): def_use = dict() lattice = Lattice(cfg_nodes, ReachingDefinitionsAnalysis) + # Make an empty list for each def for node in cfg_nodes: if isinstance(node, AssignmentNode): def_use[node] = list() for node in cfg_nodes: + # Rename varse for var in varse(node): for cnode in get_constraint_nodes(node, lattice): if var in cnode.left_hand_side: diff --git a/pyt/save.py b/pyt/save.py index 5ee6ebf2..e6bda65d 100644 --- a/pyt/save.py +++ b/pyt/save.py @@ -81,28 +81,28 @@ def __exit__(self, type, value, traceback): def def_use_chain_to_file(cfg_list): with Output('def-use_chain.pyt') as fd: - for i, cfg in enumerate(cfg_list): - fd.write('##### Def-use chain for CFG {} #####{}' - .format(i, os.linesep)) - def_use = build_def_use_chain(cfg.nodes) - for k, v in def_use.items(): - fd.write('Def: {} -> Use: [{}]{}' - .format(k.label, - ', '.join([n.label for n in v]), - os.linesep)) + for i, cfg in enumerate(cfg_list): + fd.write('##### Def-use chain for CFG {} #####{}' + .format(i, os.linesep)) + def_use = build_def_use_chain(cfg.nodes) + for k, v in def_use.items(): + fd.write('Def: {} -> Use: [{}]{}' + .format(k.label, + ', '.join([n.label for n in v]), + os.linesep)) def use_def_chain_to_file(cfg_list): with Output('use-def_chain.pyt') as fd: - for i, cfg in enumerate(cfg_list): - fd.write('##### Use-def chain for CFG {} #####{}' - .format(i, os.linesep)) - def_use = build_use_def_chain(cfg.nodes) - for k, v in def_use.items(): - fd.write('Use: {} -> Def: [{}]{}' - .format(k.label, - ', '.join([n[1].label for n in v]), - os.linesep)) + for i, cfg in enumerate(cfg_list): + fd.write('##### Use-def chain for CFG {} #####{}' + .format(i, os.linesep)) + def_use = build_use_def_chain(cfg.nodes) + for k, v in def_use.items(): + fd.write('Use: {} -> Def: [{}]{}' + .format(k.label, + ', '.join([n[1].label for n in v]), + os.linesep)) def cfg_to_file(cfg_list): From 7df6529b83f1fdf1a5fe91f38a0a1deedd2b43ec Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Thu, 1 Mar 2018 18:53:04 -0800 Subject: [PATCH 02/20] [blackbox stuff] wrote get_vulnerability_chains, cleaned up def-use code --- pyt/definition_chains.py | 56 +++++++++++++++++++--------------------- pyt/vulnerabilities.py | 46 ++++++++++++++++++++++++++++----- 2 files changed, 65 insertions(+), 37 deletions(-) diff --git a/pyt/definition_chains.py b/pyt/definition_chains.py index 8c56dea2..ee5c2132 100644 --- a/pyt/definition_chains.py +++ b/pyt/definition_chains.py @@ -1,6 +1,6 @@ import ast -from .base_cfg import AssignmentNode +from .base_cfg import AssignmentNode, EntryOrExitNode from .constraint_table import constraint_table from .lattice import Lattice from .reaching_definitions import ReachingDefinitionsAnalysis @@ -22,9 +22,7 @@ def get_vars(node): # Filter out lvars: for var in vv.result: - try: # if assignment node - # print('r', node.right_hand_side_variables) - # if var not in node.left_hand_side: + try: if var in node.right_hand_side_variables: yield var except AttributeError: @@ -37,56 +35,54 @@ def get_constraint_nodes(node, lattice): yield n +# TKTK: Clean up use-def too def build_use_def_chain(cfg_nodes): use_def = dict() lattice = Lattice(cfg_nodes, ReachingDefinitionsAnalysis) for node in cfg_nodes: definitions = list() - for cnode in get_constraint_nodes(node, lattice): + for constraint_node in get_constraint_nodes(node, lattice): for var in get_vars(node): - if var in cnode.left_hand_side: - definitions.append((var, cnode)) + if var in constraint_node.left_hand_side: + definitions.append((var, constraint_node)) use_def[node] = definitions return use_def -def varse(node): - vv = VarsVisitor() - if isinstance(node.ast_node, (ast.ClassDef, ast.FunctionDef)): - return list() - elif isinstance(node.ast_node, (ast.If, ast.While)): - vv.visit(node.ast_node.test) - else: - try: - vv.visit(node.ast_node) - except AttributeError: - return list() - +def get_uses(node): if isinstance(node, AssignmentNode): + # TKTK: Merge in master, then remove this `if` result = list() - for var in vv.result: - if var not in node.left_hand_side: - result.append(var) + if isinstance(node.right_hand_side_variables, str): + result.append(node.right_hand_side_variables) + else: + for var in node.right_hand_side_variables: + if var not in node.left_hand_side: + result.append(var) return result + elif isinstance(node, EntryOrExitNode): + return [] else: - return vv.result + raise def build_def_use_chain(cfg_nodes): def_use = dict() lattice = Lattice(cfg_nodes, ReachingDefinitionsAnalysis) - # Make an empty list for each def + # For every node for node in cfg_nodes: + # Make an empty list for each def if isinstance(node, AssignmentNode): def_use[node] = list() + # Get its uses + for variable in get_uses(node): + # Loop through all the nodes before it + for earlier_node in get_constraint_nodes(node, lattice): + # and add to the 'uses list' of each earlier node, when applicable + if variable in earlier_node.left_hand_side: + def_use[earlier_node].append(node) - for node in cfg_nodes: - # Rename varse - for var in varse(node): - for cnode in get_constraint_nodes(node, lattice): - if var in cnode.left_hand_side: - def_use[cnode].append(node) return def_use diff --git a/pyt/vulnerabilities.py b/pyt/vulnerabilities.py index c9edcf94..293004dc 100644 --- a/pyt/vulnerabilities.py +++ b/pyt/vulnerabilities.py @@ -10,6 +10,7 @@ RestoreNode, TaintedNode ) +from .definition_chains import build_def_use_chain from .lattice import Lattice from .right_hand_side_visitor import RHSVisitor from .trigger_definitions_parser import default_trigger_word_file, parse @@ -27,8 +28,7 @@ class TriggerNode(): - def __init__(self, trigger_word, sanitisers, cfg_node, - secondary_nodes=None): + def __init__(self, trigger_word, sanitisers, cfg_node, secondary_nodes=None): self.trigger_word = trigger_word self.sanitisers = sanitisers self.cfg_node = cfg_node @@ -271,12 +271,27 @@ def get_sink_args(cfg_node): return vv.result + other_results +def get_vulnerability_chains(current_node, sink, def_use, chain): + for use in def_use[current_node]: + if use == sink: + yield chain + else: + vuln_chain = list(chain) + vuln_chain.append(use) + yield from get_vulnerability_chains( + use, + sink, + def_use, + vuln_chain + ) + + def get_vulnerability(source, sink, triggers, lattice, trim_reassigned_in, - blackbox_assignments): + cfg): """Get vulnerability between source and sink if it exists. Uses triggers to find sanitisers. @@ -287,7 +302,7 @@ def get_vulnerability(source, triggers(Triggers): Triggers of the CFG. lattice(Lattice): The lattice we're analysing. trim_reassigned_in(bool): Whether or not the trim option is set. - blackbox_assignments(set[AssignmentNode]): used in is_unknown. + cfg(CFG): .blackbox_assignments used in is_unknown, .nodes used in build_def_use_chain Returns: A Vulnerability if it exists, else None @@ -295,12 +310,16 @@ def get_vulnerability(source, source_in_sink = lattice.in_constraint(source.cfg_node, sink.cfg_node) secondary_in_sink = list() - if source.secondary_nodes: + # When this is True and source_in_sink is not + # It is due to the secondary being a function call secondary_in_sink = [secondary for secondary in source.secondary_nodes if lattice.in_constraint(secondary, sink.cfg_node)] + if not source_in_sink: + if secondary_in_sink: + raise trigger_node_in_sink = source_in_sink or secondary_in_sink sink_args = get_sink_args(sink.cfg_node) @@ -312,7 +331,20 @@ def get_vulnerability(source, secondary_node_in_sink_args = node trimmed_reassignment_nodes = list() + if secondary_node_in_sink_args: + # TKTK: if interactive_mode + def_use = build_def_use_chain(cfg.nodes) + for chain in get_vulnerability_chains( + source.cfg_node, + sink.cfg_node, + def_use, + chain=[source.cfg_node] + ): + for node in chain: + if isinstance(node, BBorBInode): + print(f'Is {node.label} with tainted arg ??? vulnerable?') + trimmed_reassignment_nodes.append(secondary_node_in_sink_args) node_in_the_vulnerability_chain = secondary_node_in_sink_args # Here is where we do backwards slicing to traceback which nodes led to the vulnerability @@ -334,7 +366,7 @@ def get_vulnerability(source, triggers.sanitiser_dict, lattice) blackbox_assignment_in_chain = is_unknown(trimmed_reassignment_nodes, - blackbox_assignments) + cfg.blackbox_assignments) reassignment_nodes = source.secondary_nodes if trim_reassigned_in: reassignment_nodes = trimmed_reassignment_nodes @@ -373,7 +405,7 @@ def find_vulnerabilities_in_cfg(cfg, vulnerability_log, definitions, lattice, tr triggers, lattice, trim_reassigned_in, - cfg.blackbox_assignments) + cfg) if vulnerability: vulnerability_log.append(vulnerability) From e075e934c245d56d1959f3b48306b9e31bda3651 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Thu, 1 Mar 2018 19:10:58 -0800 Subject: [PATCH 03/20] [cleanup] remove if after merging in master that had the RHS vars fix --- pyt/definition_chains.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/pyt/definition_chains.py b/pyt/definition_chains.py index ee5c2132..b6f9ed86 100644 --- a/pyt/definition_chains.py +++ b/pyt/definition_chains.py @@ -53,14 +53,10 @@ def build_use_def_chain(cfg_nodes): def get_uses(node): if isinstance(node, AssignmentNode): - # TKTK: Merge in master, then remove this `if` result = list() - if isinstance(node.right_hand_side_variables, str): - result.append(node.right_hand_side_variables) - else: - for var in node.right_hand_side_variables: - if var not in node.left_hand_side: - result.append(var) + for var in node.right_hand_side_variables: + if var not in node.left_hand_side: + result.append(var) return result elif isinstance(node, EntryOrExitNode): return [] From 12ab2e1057c0b31ea39437bcd2b75144ac20b50c Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Fri, 2 Mar 2018 19:18:23 -0800 Subject: [PATCH 04/20] [blackbox stuff]Add interactive UI mode option, cleanup __main__ a bit --- pyt/__main__.py | 85 ++++++++++++++++++++++++++--------------- pyt/argument_helpers.py | 7 ++++ pyt/github_search.py | 4 +- pyt/vulnerabilities.py | 81 +++++++++++++++++++++++---------------- 4 files changed, 113 insertions(+), 64 deletions(-) diff --git a/pyt/__main__.py b/pyt/__main__.py index c5ca05c5..6f8eae95 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -6,7 +6,7 @@ from datetime import date from pprint import pprint -from .argument_helpers import valid_date +from .argument_helpers import valid_date, UImode from .ast_helper import generate_ast from .draw import draw_cfgs, draw_lattices from .constraint_table import initialize_constraint_table, print_table @@ -37,6 +37,7 @@ verbose_cfg_to_file, vulnerabilities_to_file ) +from .trigger_definitions_parser import default_trigger_word_file from .vulnerabilities import find_vulnerabilities @@ -73,10 +74,18 @@ def parse_args(args): print_group.add_argument('-vp', '--verbose-print', help='Verbose printing of -p.', action='store_true') print_group.add_argument('-trim', '--trim-reassigned-in', - help='Trims the reassigned list to the vulnerability chain.', action='store_true') + help='Trims the reassigned list to the vulnerability chain.', + action='store_true', + default=False) + print_group.add_argument('-i', '--interactive', + help='Will ask you about each vulnerability chain and blackbox nodes.', + action='store_true', + default=False) parser.add_argument('-t', '--trigger-word-file', - help='Input trigger word file.', type=str) + help='Input trigger word file.', + type=str, + default=default_trigger_word_file) parser.add_argument('-py2', '--python-2', help='[WARNING, EXPERIMENTAL] Turns on Python 2 mode,' + ' needed when target file(s) are written in Python 2.', action='store_true') @@ -157,35 +166,42 @@ def parse_args(args): return parser.parse_args(args) -def analyse_repo(github_repo, analysis_type): +def analyse_repo(github_repo, analysis_type, ui_mode): cfg_list = list() project_modules = get_modules(os.path.dirname(github_repo.path)) intraprocedural(project_modules, cfg_list) initialize_constraint_table(cfg_list) analyse(cfg_list, analysis_type=analysis_type) - vulnerability_log = find_vulnerabilities(cfg_list, analysis_type) + vulnerability_log = find_vulnerabilities( + cfg_list, + analysis_type, + args.trigger_word_file, + ui_mode + ) return vulnerability_log def main(command_line_args=sys.argv[1:]): args = parse_args(command_line_args) - analysis = None + analysis = ReachingDefinitionsTaintAnalysis if args.liveness: analysis = LivenessAnalysis elif args.reaching: analysis = ReachingDefinitionsAnalysis - elif args.reaching_taint: - analysis = ReachingDefinitionsTaintAnalysis - else: - analysis = ReachingDefinitionsTaintAnalysis + + ui_mode = UImode.NORMAL + if args.interactive: + ui_mode = UImode.INTERACTIVE + elif args.trim_reassigned_in: + ui_mode = UImode.TRIM cfg_list = list() if args.git_repos: repos = get_repos(args.git_repos) for repo in repos: repo.clone() - vulnerability_log = analyse_repo(repo, analysis) + vulnerability_log = analyse_repo(repo, analysis, ui_mode) vulnerability_log.print_report() if not vulnerability_log.vulnerabilities: repo.clean_up() @@ -194,11 +210,23 @@ def main(command_line_args=sys.argv[1:]): if args.which == 'search': set_github_api_token() if args.start_date: - scan_github(args.search_string, args.start_date, - analysis, analyse_repo, args.csv_path) + scan_github( + args.search_string, + args.start_date, + analysis, + analyse_repo, + args.csv_path, + ui_mode + ) else: - scan_github(args.search_string, date(2010, 1, 1), - analysis, analyse_repo, args.csv_path) + scan_github( + args.search_string, + date(2010, 1, 1), + analysis, + analyse_repo, + args.csv_path, + ui_mode + ) exit() path = os.path.normpath(args.filepath) @@ -218,10 +246,12 @@ def main(command_line_args=sys.argv[1:]): if args.intraprocedural_analysis: intraprocedural(project_modules, cfg_list) else: - interprocedural_cfg = interprocedural(tree, - project_modules, - local_modules, - path) + interprocedural_cfg = interprocedural( + tree, + project_modules, + local_modules, + path + ) cfg_list.append(interprocedural_cfg) framework_route_criteria = is_flask_route_function if args.adaptor: @@ -238,17 +268,12 @@ def main(command_line_args=sys.argv[1:]): analyse(cfg_list, analysis_type=analysis) - vulnerability_log = None - if args.trigger_word_file: - vulnerability_log = find_vulnerabilities(cfg_list, - analysis, - args.trim_reassigned_in, - args.trigger_word_file) - else: - vulnerability_log = find_vulnerabilities(cfg_list, - analysis, - args.trim_reassigned_in) - + vulnerability_log = find_vulnerabilities( + cfg_list, + analysis, + args.trigger_word_file, + ui_mode + ) vulnerability_log.print_report() if args.draw_cfg: diff --git a/pyt/argument_helpers.py b/pyt/argument_helpers.py index c3685df7..1381f8f3 100644 --- a/pyt/argument_helpers.py +++ b/pyt/argument_helpers.py @@ -1,5 +1,6 @@ from argparse import ArgumentTypeError from datetime import datetime +from enum import Enum def valid_date(s): @@ -9,3 +10,9 @@ def valid_date(s): except ValueError: msg = "Not a valid date: '{0}'. Format: {1}".format(s, date_format) raise ArgumentTypeError(msg) + + +class UImode(Enum): + NORMAL = 0 + INTERACTIVE = 1 + TRIM = 2 diff --git a/pyt/github_search.py b/pyt/github_search.py index 0e200cd5..154242cd 100644 --- a/pyt/github_search.py +++ b/pyt/github_search.py @@ -198,7 +198,7 @@ def get_dates(start_date, end_date=date.today(), interval=7): delta.days % interval)) -def scan_github(search_string, start_date, analysis_type, analyse_repo_func, csv_path): +def scan_github(search_string, start_date, analysis_type, analyse_repo_func, csv_path, ui_mode): analyse_repo = analyse_repo_func for d in get_dates(start_date, interval=7): q = Query(SEARCH_REPO_URL, search_string, @@ -221,7 +221,7 @@ def scan_github(search_string, start_date, analysis_type, analyse_repo_func, csv save_repo_scan(repo, r.path, vulnerability_log=None, error='Other Error Unknown while cloning :-(') continue try: - vulnerability_log = analyse_repo(r, analysis_type) + vulnerability_log = analyse_repo(r, analysis_type, ui_mode) if vulnerability_log.vulnerabilities: save_repo_scan(repo, r.path, vulnerability_log) add_repo_to_csv(csv_path, r) diff --git a/pyt/vulnerabilities.py b/pyt/vulnerabilities.py index b1e82e84..24626bb9 100644 --- a/pyt/vulnerabilities.py +++ b/pyt/vulnerabilities.py @@ -3,6 +3,7 @@ import ast from collections import namedtuple +from .argument_helpers import UImode from .base_cfg import ( AssignmentCallNode, AssignmentNode, @@ -286,12 +287,14 @@ def get_vulnerability_chains(current_node, sink, def_use, chain): ) -def get_vulnerability(source, - sink, - triggers, - lattice, - trim_reassigned_in, - cfg): +def get_vulnerability( + source, + sink, + triggers, + lattice, + cfg, + ui_mode +): """Get vulnerability between source and sink if it exists. Uses triggers to find sanitisers. @@ -301,8 +304,8 @@ def get_vulnerability(source, sink(TriggerNode): TriggerNode of the sink. triggers(Triggers): Triggers of the CFG. lattice(Lattice): The lattice we're analysing. - trim_reassigned_in(bool): Whether or not the trim option is set. cfg(CFG): .blackbox_assignments used in is_unknown, .nodes used in build_def_use_chain + ui_mode(UImode): determines if we interact with the user or trim the nodes in the output, if at all. Returns: A Vulnerability if it exists, else None @@ -332,17 +335,17 @@ def get_vulnerability(source, trimmed_reassignment_nodes = list() if secondary_node_in_sink_args: - # TKTK: if interactive_mode - def_use = build_def_use_chain(cfg.nodes) - for chain in get_vulnerability_chains( - source.cfg_node, - sink.cfg_node, - def_use, - chain=[source.cfg_node] - ): - for node in chain: - if isinstance(node, BBorBInode): - print(f'Is {node.label} with tainted arg ??? vulnerable?') + if ui_mode == UImode.INTERACTIVE: + def_use = build_def_use_chain(cfg.nodes) + for chain in get_vulnerability_chains( + source.cfg_node, + sink.cfg_node, + def_use, + chain=[source.cfg_node] + ): + for node in chain: + if isinstance(node, BBorBInode): + print(f'Is {node.label} with tainted arg ??? vulnerable?') trimmed_reassignment_nodes.append(secondary_node_in_sink_args) node_in_the_vulnerability_chain = secondary_node_in_sink_args @@ -372,7 +375,7 @@ def get_vulnerability(source, cfg.blackbox_assignments ) reassignment_nodes = source.secondary_nodes - if trim_reassigned_in: + if ui_mode == UImode.TRIM: reassignment_nodes = trimmed_reassignment_nodes if sink_is_sanitised: return SanitisedVulnerability( @@ -397,7 +400,13 @@ def get_vulnerability(source, return None -def find_vulnerabilities_in_cfg(cfg, vulnerability_log, definitions, lattice, trim_reassigned_in): +def find_vulnerabilities_in_cfg( + cfg, + vulnerability_log, + definitions, + lattice, + ui_mode +): """Find vulnerabilities in a cfg. Args: @@ -405,9 +414,14 @@ def find_vulnerabilities_in_cfg(cfg, vulnerability_log, definitions, lattice, tr vulnerability_log(vulnerability_log.VulnerabilityLog): The log in which to place found vulnerabilities. definitions(trigger_definitions_parser.Definitions): Source and sink definitions. lattice(Lattice): The lattice we're analysing. - trim_reassigned_in(bool): Whether or not the trim option is set. + ui_mode(UImode): determines if we interact with the user or trim the nodes in the output, if at all. """ - triggers = identify_triggers(cfg, definitions.sources, definitions.sinks, lattice) + triggers = identify_triggers( + cfg, + definitions.sources, + definitions.sinks, + lattice + ) for sink in triggers.sinks: for source in triggers.sources: vulnerability = get_vulnerability( @@ -415,23 +429,26 @@ def find_vulnerabilities_in_cfg(cfg, vulnerability_log, definitions, lattice, tr sink, triggers, lattice, - trim_reassigned_in, - cfg + cfg, + ui_mode ) if vulnerability: vulnerability_log.append(vulnerability) -def find_vulnerabilities(cfg_list, - analysis_type, - trim_reassigned_in=False, - trigger_word_file=default_trigger_word_file): +def find_vulnerabilities( + cfg_list, + analysis_type, + trigger_word_file, + ui_mode +): """Find vulnerabilities in a list of CFGs from a trigger_word_file. Args: - cfg_list (list[CFG]): the list of CFGs to scan. - trigger_word_file (string): file containing trigger words. - Defaults to the flask trigger word file. + cfg_list(list[CFG]): the list of CFGs to scan. + analysis_type(AnalysisBase): analysis object used to create lattice. + trigger_word_file(string): file containing trigger words. + ui_mode(UImode): determines if we interact with the user or trim the nodes in the output, if at all. Returns: A VulnerabilityLog with found vulnerabilities. @@ -445,6 +462,6 @@ def find_vulnerabilities(cfg_list, vulnerability_log, definitions, Lattice(cfg.nodes, analysis_type), - trim_reassigned_in + ui_mode ) return vulnerability_log From 57878c92a496ccdc3f3bf26c59718af6a8300891 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Sat, 3 Mar 2018 15:56:46 -0800 Subject: [PATCH 05/20] [nothing significant] changing gears to tox branch --- pyt/base_cfg.py | 24 +++++---- pyt/vulnerabilities.py | 119 ++++++++++++++++++++++++++++++++--------- 2 files changed, 109 insertions(+), 34 deletions(-) diff --git a/pyt/base_cfg.py b/pyt/base_cfg.py index 8fb5ab7d..c1710b3f 100644 --- a/pyt/base_cfg.py +++ b/pyt/base_cfg.py @@ -187,21 +187,24 @@ def __init__(self, label, left_hand_side, right_hand_side_variables, *, line_num class AssignmentCallNode(AssignmentNode): """Node used for X.""" - def __init__(self, - label, - left_hand_side, - ast_node, - right_hand_side_variables, - vv_result, - *, - line_number, - path, - call_node): + def __init__( + self, + label, + left_hand_side, + ast_node, + right_hand_side_variables, + vv_result, + *, + line_number, + path, + call_node + ): """Create a X. Args: label(str): The label of the node, describing the expression it represents. left_hand_side(str): The variable on the left hand side of the assignment. Used for analysis. + ast_node right_hand_side_variables(list[str]): A list of variables on the right hand side. vv_result(list[str]): Necessary to know `image_name = image_name.replace('..', '')` is a reassignment. line_number(Optional[int]): The line of the expression the Node represents. @@ -223,6 +226,7 @@ def __init__(self, label, left_hand_side, ast_node, right_hand_side_variables, * Args: label(str): The label of the node, describing the expression it represents. restore_nodes(list[Node]): List of nodes that were restored in the function call. + ast_node right_hand_side_variables(list[str]): A list of variables on the right hand side. line_number(Optional[int]): The line of the expression the Node represents. path(string): Current filename. diff --git a/pyt/vulnerabilities.py b/pyt/vulnerabilities.py index 24626bb9..82b81cd6 100644 --- a/pyt/vulnerabilities.py +++ b/pyt/vulnerabilities.py @@ -46,16 +46,24 @@ def __repr__(self): output = 'TriggerNode(' if self.trigger_word: - output = output + 'trigger_word is ' + str(self.trigger_word) + ', ' + output = '{} trigger_word is {}, '.format( + output, + self.trigger_word + ) return ( output + - 'sanitisers are ' + str(self.sanitisers) + ', ' - 'cfg_node is ' + str(self.cfg_node) + ')\n' + 'sanitisers are {}, '.format(self.sanitisers) + + 'cfg_node is {})\n'.format(self.cfg_node) ) -def identify_triggers(cfg, sources, sinks, lattice): +def identify_triggers( + cfg, + sources, + sinks, + lattice +): """Identify sources, sinks and sanitisers in a CFG. Args: @@ -83,23 +91,35 @@ def identify_triggers(cfg, sources, sinks, lattice): return Triggers(sources_in_file, sinks_in_file, sanitiser_node_dict) -def filter_cfg_nodes(cfg, cfg_node_type): +def filter_cfg_nodes( + cfg, + cfg_node_type +): return [node for node in cfg.nodes if isinstance(node, cfg_node_type)] -def find_secondary_sources(assignment_nodes, sources, lattice): +def find_secondary_sources( + assignment_nodes, + sources, + lattice +): """ Sets the secondary_nodes attribute of each source in the sources list. Args: assignment_nodes([AssignmentNode]) sources([tuple]) + lattice(Lattice): The lattice we're analysing. """ for source in sources: source.secondary_nodes = find_assignments(assignment_nodes, source, lattice) -def find_assignments(assignment_nodes, source, lattice): +def find_assignments( + assignment_nodes, + source, + lattice +): old = list() # added in order to propagate reassignments of the source node @@ -113,14 +133,24 @@ def find_assignments(assignment_nodes, source, lattice): return new -def update_assignments(assignment_list, assignment_nodes, source, lattice): +def update_assignments( + assignment_list, + assignment_nodes, + source, + lattice +): for node in assignment_nodes: for other in assignment_list: if node not in assignment_list: append_if_reassigned(assignment_list, other, node, lattice) -def append_if_reassigned(assignment_list, secondary, node, lattice): +def append_if_reassigned( + assignment_list, + secondary, + node, + lattice +): try: reassigned = False # vv_result is necessary to know `image_name = image_name.replace('..', '')` is a reassignment. @@ -138,7 +168,10 @@ def append_if_reassigned(assignment_list, secondary, node, lattice): exit(0) -def find_triggers(nodes, trigger_words): +def find_triggers( + nodes, + trigger_words +): """Find triggers from the trigger_word_list in the nodes. Args: @@ -154,7 +187,10 @@ def find_triggers(nodes, trigger_words): return trigger_nodes -def label_contains(node, trigger_words): +def label_contains( + node, + trigger_words +): """Determine if node contains any of the trigger_words provided. Args: @@ -172,7 +208,10 @@ def label_contains(node, trigger_words): yield TriggerNode(trigger_word, sanitisers, node) -def build_sanitiser_node_dict(cfg, sinks_in_file): +def build_sanitiser_node_dict( + cfg, + sinks_in_file +): """Build a dict of string -> TriggerNode pairs, where the string is the sanitiser and the TriggerNode is a TriggerNode of the sanitiser. @@ -201,7 +240,10 @@ def build_sanitiser_node_dict(cfg, sinks_in_file): return sanitiser_node_dict -def find_sanitiser_nodes(sanitiser, sanitisers_in_file): +def find_sanitiser_nodes( + sanitiser, + sanitisers_in_file +): """Find nodes containing a particular sanitiser. Args: @@ -216,15 +258,17 @@ def find_sanitiser_nodes(sanitiser, sanitisers_in_file): yield sanitiser_tuple.cfg_node -def is_sanitised(sink, sanitiser_dict, lattice): +def is_sanitised( + sink, + sanitiser_dict, + lattice +): """Check if sink is sanitised by any santiser in the sanitiser_dict. Args: sink(TriggerNode): TriggerNode of the sink. sanitiser_dict(dict): dictionary of sink sanitiser pairs. - - Returns: - True or False + lattice(Lattice): The lattice we're analysing. """ for sanitiser in sink.sanitisers: for cfg_node in sanitiser_dict[sanitiser]: @@ -237,7 +281,10 @@ class SinkArgsError(Exception): pass -def is_unknown(trimmed_reassignment_nodes, blackbox_assignments): +def is_unknown( + trimmed_reassignment_nodes, + blackbox_assignments +): """Check if vulnerability is unknown by seeing if a blackbox assignment is in trimmed_reassignment_nodes. @@ -254,7 +301,9 @@ def is_unknown(trimmed_reassignment_nodes, blackbox_assignments): return None -def get_sink_args(cfg_node): +def get_sink_args( + cfg_node +): if isinstance(cfg_node.ast_node, ast.Call): rhs_visitor = RHSVisitor() rhs_visitor.visit(cfg_node.ast_node) @@ -272,7 +321,12 @@ def get_sink_args(cfg_node): return vv.result + other_results -def get_vulnerability_chains(current_node, sink, def_use, chain): +def get_vulnerability_chains( + current_node, + sink, + def_use, + chain +): for use in def_use[current_node]: if use == sink: yield chain @@ -287,6 +341,21 @@ def get_vulnerability_chains(current_node, sink, def_use, chain): ) +def is_actually_vulnerable( + chain +): + for i in range(len(chain)): + if isinstance(chain[i], BBorBInode): + user_says = input( + 'Is the return value of {} '.format(chain[i].label) + + 'with tainted argument "{}" vulnerable? (Y/n)'.format(chain[i-1].left_hand_side) + ).lower() + print(f'The user says {user_says}') + if user_says.startswith('n'): + return False + return True + + def get_vulnerability( source, sink, @@ -341,11 +410,12 @@ def get_vulnerability( source.cfg_node, sink.cfg_node, def_use, - chain=[source.cfg_node] + [source.cfg_node] ): - for node in chain: - if isinstance(node, BBorBInode): - print(f'Is {node.label} with tainted arg ??? vulnerable?') + if is_actually_vulnerable(chain): + print('We have a vulnerability!') + + trimmed_reassignment_nodes.append(secondary_node_in_sink_args) node_in_the_vulnerability_chain = secondary_node_in_sink_args @@ -370,6 +440,7 @@ def get_vulnerability( triggers.sanitiser_dict, lattice ) + # TKTK: We do not have to call is_unknown if interactive mode is on blackbox_assignment_in_chain = is_unknown( trimmed_reassignment_nodes, cfg.blackbox_assignments From 2dcd5793d95738a5a21fd76122a7c48ce1717a53 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Mon, 5 Mar 2018 09:57:06 -0800 Subject: [PATCH 06/20] [gotta go to work] made blackbox mapping file option, refactored all namedtuples --- pyt/__main__.py | 27 ++++++++++++++++++------ pyt/argument_helpers.py | 22 +++++++++++++++++++ pyt/base_cfg.py | 29 +++++++++++++++++-------- pyt/definition_chains.py | 10 ++++++--- pyt/interprocedural_cfg.py | 8 ++++++- pyt/trigger_definitions_parser.py | 11 ++++++---- pyt/vulnerabilities.py | 35 ++++++++++++++++++++++++++----- 7 files changed, 114 insertions(+), 28 deletions(-) diff --git a/pyt/__main__.py b/pyt/__main__.py index 6f8eae95..ca0fc9d9 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -6,7 +6,13 @@ from datetime import date from pprint import pprint -from .argument_helpers import valid_date, UImode +from .argument_helpers import ( + default_blackbox_mapping_file, + default_trigger_word_file, + valid_date, + VulnerabilityFiles, + UImode +) from .ast_helper import generate_ast from .draw import draw_cfgs, draw_lattices from .constraint_table import initialize_constraint_table, print_table @@ -37,7 +43,6 @@ verbose_cfg_to_file, vulnerabilities_to_file ) -from .trigger_definitions_parser import default_trigger_word_file from .vulnerabilities import find_vulnerabilities @@ -86,6 +91,10 @@ def parse_args(args): help='Input trigger word file.', type=str, default=default_trigger_word_file) + parser.add_argument('-b', '--blackbox-mapping-file', + help='Input blackbox mapping file.', + type=str, + default=default_blackbox_mapping_file) parser.add_argument('-py2', '--python-2', help='[WARNING, EXPERIMENTAL] Turns on Python 2 mode,' + ' needed when target file(s) are written in Python 2.', action='store_true') @@ -175,8 +184,11 @@ def analyse_repo(github_repo, analysis_type, ui_mode): vulnerability_log = find_vulnerabilities( cfg_list, analysis_type, - args.trigger_word_file, - ui_mode + ui_mode, + VulnerabilityFiles( + args.trigger_word_file, + args.blackbox_mapping_file + ) ) return vulnerability_log @@ -271,8 +283,11 @@ def main(command_line_args=sys.argv[1:]): vulnerability_log = find_vulnerabilities( cfg_list, analysis, - args.trigger_word_file, - ui_mode + ui_mode, + VulnerabilityFiles( + args.trigger_word_file, + arg.blackbox_mapping_file + ) ) vulnerability_log.print_report() diff --git a/pyt/argument_helpers.py b/pyt/argument_helpers.py index 1381f8f3..aa319851 100644 --- a/pyt/argument_helpers.py +++ b/pyt/argument_helpers.py @@ -1,8 +1,21 @@ from argparse import ArgumentTypeError +from collections import namedtuple from datetime import datetime from enum import Enum +default_blackbox_mapping_file = os.path.join( + os.path.dirname(__file__) +) + + +default_trigger_word_file = os.path.join( + os.path.dirname(__file__), + 'trigger_definitions', + 'flask_trigger_words.pyt' +) + + def valid_date(s): date_format = "%Y-%m-%d" try: @@ -16,3 +29,12 @@ class UImode(Enum): NORMAL = 0 INTERACTIVE = 1 TRIM = 2 + + +VulnerabilityFiles = namedtuple( + 'VulnerabilityFiles', + [ + 'triggers', + 'blackbox_mapping' + ] +) diff --git a/pyt/base_cfg.py b/pyt/base_cfg.py index c1710b3f..bda9d454 100644 --- a/pyt/base_cfg.py +++ b/pyt/base_cfg.py @@ -8,13 +8,22 @@ from .vars_visitor import VarsVisitor -ControlFlowNode = namedtuple('ControlFlowNode', - 'test last_nodes break_statements') - -ConnectStatements = namedtuple('ConnectStatements', - 'first_statement' + - ' last_statements' + - ' break_statements') +ControlFlowNode = namedtuple( + 'ControlFlowNode', + [ + 'test', + 'last_nodes', + 'break_statements' + ] +) +ConnectStatements = namedtuple( + 'ConnectStatements', + [ + 'first_statement', + 'last_statements', + 'break_statements' + ] +) CALL_IDENTIFIER = '¤' @@ -169,7 +178,7 @@ def __init__(self, label, left_hand_side, right_hand_side_variables, *, line_num class BBorBInode(AssignmentNode): """Node used for handling restore nodes returning from blackbox or builtin function calls.""" - def __init__(self, label, left_hand_side, right_hand_side_variables, *, line_number, path): + def __init__(self, label, left_hand_side, right_hand_side_variables, *, line_number, path, func_name): """Create a Restore node. Args: @@ -182,6 +191,7 @@ def __init__(self, label, left_hand_side, right_hand_side_variables, *, line_num super().__init__(label, left_hand_side, None, right_hand_side_variables, line_number=line_number, path=path) self.args = list() self.inner_most_call = self + self.func_name = func_name class AssignmentCallNode(AssignmentNode): @@ -835,7 +845,8 @@ def add_blackbox_or_builtin_call(self, node, blackbox): left_hand_side=LHS, right_hand_side_variables=[], line_number=node.lineno, - path=self.filenames[-1] + path=self.filenames[-1], + func_name=call_label.result[:index] ) visual_args = list() rhs_vars = list() diff --git a/pyt/definition_chains.py b/pyt/definition_chains.py index b6f9ed86..5c945d5e 100644 --- a/pyt/definition_chains.py +++ b/pyt/definition_chains.py @@ -58,10 +58,14 @@ def get_uses(node): if var not in node.left_hand_side: result.append(var) return result - elif isinstance(node, EntryOrExitNode): - return [] else: - raise + return [] + # elif isinstance(node, EntryOrExitNode): + # return [] + # else: + # print(f'\n\n\ntype(node) is {type(node)}') + # print(f'node is {node}\n\n\n') + # raise def build_def_use_chain(cfg_nodes): diff --git a/pyt/interprocedural_cfg.py b/pyt/interprocedural_cfg.py index 1be899ef..e1b9b5bc 100644 --- a/pyt/interprocedural_cfg.py +++ b/pyt/interprocedural_cfg.py @@ -35,7 +35,13 @@ from .right_hand_side_visitor import RHSVisitor -SavedVariable = namedtuple('SavedVariable', 'LHS RHS') +SavedVariable = namedtuple( + 'SavedVariable', + [ + 'LHS', + 'RHS' + ] +) BUILTINS = ( 'get', 'Flask', diff --git a/pyt/trigger_definitions_parser.py b/pyt/trigger_definitions_parser.py index 65e13f9f..222b434c 100644 --- a/pyt/trigger_definitions_parser.py +++ b/pyt/trigger_definitions_parser.py @@ -6,10 +6,13 @@ SOURCES_KEYWORD = 'sources:' SINKS_KEYWORD = 'sinks:' -Definitions = namedtuple('Definitions', 'sources sinks') -default_trigger_word_file = os.path.join(os.path.dirname(__file__), - 'trigger_definitions', - 'flask_trigger_words.pyt') +Definitions = namedtuple( + 'Definitions', + [ + 'sources', + 'sinks' + ] +) def parse_section(iterator): diff --git a/pyt/vulnerabilities.py b/pyt/vulnerabilities.py index 82b81cd6..7917e3aa 100644 --- a/pyt/vulnerabilities.py +++ b/pyt/vulnerabilities.py @@ -1,6 +1,7 @@ """Module for finding vulnerabilities based on a definitions file.""" import ast +import json from collections import namedtuple from .argument_helpers import UImode @@ -14,7 +15,7 @@ from .definition_chains import build_def_use_chain from .lattice import Lattice from .right_hand_side_visitor import RHSVisitor -from .trigger_definitions_parser import default_trigger_word_file, parse +from .trigger_definitions_parser import parse from .vars_visitor import VarsVisitor from .vulnerability_log import ( SanitisedVulnerability, @@ -24,8 +25,21 @@ ) -Sanitiser = namedtuple('Sanitiser', 'trigger_word cfg_node') -Triggers = namedtuple('Triggers', 'sources sinks sanitiser_dict') +Sanitiser = namedtuple( + 'Sanitiser', + [ + 'trigger_word', + 'cfg_node' + ] +) +Triggers = namedtuple( + 'Triggers', + [ + 'sources', + 'sinks', + 'sanitiser_dict' + ] +) class TriggerNode(): @@ -352,7 +366,9 @@ def is_actually_vulnerable( ).lower() print(f'The user says {user_says}') if user_says.startswith('n'): + print(f'\n\n\n\nThe user said {chain[i].func_name} DOES NOT ret a tainted val') return False + print(f'\n\n\n\nThe user said {chain[i].func_name} DOES ret a tainted val') return True @@ -510,21 +526,26 @@ def find_vulnerabilities_in_cfg( def find_vulnerabilities( cfg_list, analysis_type, + ui_mode, trigger_word_file, - ui_mode + blackbox_mapping_file ): """Find vulnerabilities in a list of CFGs from a trigger_word_file. Args: cfg_list(list[CFG]): the list of CFGs to scan. analysis_type(AnalysisBase): analysis object used to create lattice. - trigger_word_file(string): file containing trigger words. ui_mode(UImode): determines if we interact with the user or trim the nodes in the output, if at all. + trigger_word_file(string): file containing trigger words. + blackbox_mapping_file(string): file containing whether or not a blackbox function returns a tainted value. Returns: A VulnerabilityLog with found vulnerabilities. """ + # TKTK: change to tuple^ definitions = parse(trigger_word_file) + with open(blackbox_mapping_file) as f: + blackbox_mapping = json.load(f) vulnerability_log = VulnerabilityLog() for cfg in cfg_list: @@ -535,4 +556,8 @@ def find_vulnerabilities( Lattice(cfg.nodes, analysis_type), ui_mode ) + with open(filename, 'w') as f: + json.dump(blackbox_mapping, f) + + return vulnerability_log From 7aa580b8a6c60227f37aa3eddc09dfa741f7a23c Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Mon, 5 Mar 2018 19:50:36 -0800 Subject: [PATCH 07/20] [cleanup] Moved trigger_definitions to vulnerability_definitions to have somewhere to put the blackbox mapping, DRYd the args.startdate --- example/vulnerable_code/multi_chain.py | 19 +++++++++++ pyt/__main__.py | 32 +++++++------------ pyt/argument_helpers.py | 7 ++-- pyt/trigger_definitions_parser.py | 2 +- .../blackbox_mapping.json | 9 ++++++ .../django_trigger_words.pyt | 0 .../flask_trigger_words.pyt | 0 .../test_triggers.pyt | 0 8 files changed, 46 insertions(+), 23 deletions(-) create mode 100644 example/vulnerable_code/multi_chain.py create mode 100644 pyt/vulnerability_definitions/blackbox_mapping.json rename pyt/{trigger_definitions => vulnerability_definitions}/django_trigger_words.pyt (100%) rename pyt/{trigger_definitions => vulnerability_definitions}/flask_trigger_words.pyt (100%) rename pyt/{trigger_definitions => vulnerability_definitions}/test_triggers.pyt (100%) diff --git a/example/vulnerable_code/multi_chain.py b/example/vulnerable_code/multi_chain.py new file mode 100644 index 00000000..7be9e884 --- /dev/null +++ b/example/vulnerable_code/multi_chain.py @@ -0,0 +1,19 @@ +import subprocess +from flask import Flask, render_template, request + + +app = Flask(__name__) + + +@app.route('/multi_chain', methods=['POST']) +def multi_chain(): + suggestion = request.form['suggestion'] + x = fast_eddie(suggestion, 'the') + y = x + 'foo' + z = minnesota_fats(suggestion, 'sting') + ben = graham(y, z) + + subprocess.call(ben, shell=True) + + return render_template('multi_chain.html') + diff --git a/pyt/__main__.py b/pyt/__main__.py index ca0fc9d9..e024d92a 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -171,7 +171,9 @@ def parse_args(args): search_parser.add_argument('-sd', '--start-date', help='Start date for repo search. ' - 'Criteria used is Created Date.', type=valid_date) + 'Criteria used is Created Date.', + type=valid_date, + default=date(2010, 1, 1)) return parser.parse_args(args) @@ -221,24 +223,14 @@ def main(command_line_args=sys.argv[1:]): if args.which == 'search': set_github_api_token() - if args.start_date: - scan_github( - args.search_string, - args.start_date, - analysis, - analyse_repo, - args.csv_path, - ui_mode - ) - else: - scan_github( - args.search_string, - date(2010, 1, 1), - analysis, - analyse_repo, - args.csv_path, - ui_mode - ) + scan_github( + args.search_string, + args.start_date, + analysis, + analyse_repo, + args.csv_path, + ui_mode + ) exit() path = os.path.normpath(args.filepath) @@ -286,7 +278,7 @@ def main(command_line_args=sys.argv[1:]): ui_mode, VulnerabilityFiles( args.trigger_word_file, - arg.blackbox_mapping_file + args.blackbox_mapping_file ) ) vulnerability_log.print_report() diff --git a/pyt/argument_helpers.py b/pyt/argument_helpers.py index aa319851..508e6ebf 100644 --- a/pyt/argument_helpers.py +++ b/pyt/argument_helpers.py @@ -1,3 +1,4 @@ +import os from argparse import ArgumentTypeError from collections import namedtuple from datetime import datetime @@ -5,13 +6,15 @@ default_blackbox_mapping_file = os.path.join( - os.path.dirname(__file__) + os.path.dirname(__file__), + 'vulnerability_definitions', + 'blackbox_mapping.json' ) default_trigger_word_file = os.path.join( os.path.dirname(__file__), - 'trigger_definitions', + 'vulnerability_definitions', 'flask_trigger_words.pyt' ) diff --git a/pyt/trigger_definitions_parser.py b/pyt/trigger_definitions_parser.py index 222b434c..248be29a 100644 --- a/pyt/trigger_definitions_parser.py +++ b/pyt/trigger_definitions_parser.py @@ -40,7 +40,7 @@ def parse_section(iterator): return -def parse(trigger_word_file=default_trigger_word_file): +def parse(trigger_word_file): """Parse the file for source and sink definitions. Returns: diff --git a/pyt/vulnerability_definitions/blackbox_mapping.json b/pyt/vulnerability_definitions/blackbox_mapping.json new file mode 100644 index 00000000..402cab38 --- /dev/null +++ b/pyt/vulnerability_definitions/blackbox_mapping.json @@ -0,0 +1,9 @@ +{ + "does_not_propagate": [ + "fast_eddie" + ], + "propagates": [ + "minnesota_fats", + "graham" + ] +} \ No newline at end of file diff --git a/pyt/trigger_definitions/django_trigger_words.pyt b/pyt/vulnerability_definitions/django_trigger_words.pyt similarity index 100% rename from pyt/trigger_definitions/django_trigger_words.pyt rename to pyt/vulnerability_definitions/django_trigger_words.pyt diff --git a/pyt/trigger_definitions/flask_trigger_words.pyt b/pyt/vulnerability_definitions/flask_trigger_words.pyt similarity index 100% rename from pyt/trigger_definitions/flask_trigger_words.pyt rename to pyt/vulnerability_definitions/flask_trigger_words.pyt diff --git a/pyt/trigger_definitions/test_triggers.pyt b/pyt/vulnerability_definitions/test_triggers.pyt similarity index 100% rename from pyt/trigger_definitions/test_triggers.pyt rename to pyt/vulnerability_definitions/test_triggers.pyt From e51073902aa650da5fcd4c6d391cc2be2549dbd9 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Mon, 5 Mar 2018 19:52:15 -0800 Subject: [PATCH 08/20] [blackbox stuff] start to check and save the mapping, still need to figure out UI --- pyt/vulnerabilities.py | 72 +++++++++++++++++++++++++++--------------- 1 file changed, 47 insertions(+), 25 deletions(-) diff --git a/pyt/vulnerabilities.py b/pyt/vulnerabilities.py index 7917e3aa..9bc19b93 100644 --- a/pyt/vulnerabilities.py +++ b/pyt/vulnerabilities.py @@ -356,19 +356,29 @@ def get_vulnerability_chains( def is_actually_vulnerable( - chain + chain, + blackbox_mapping ): for i in range(len(chain)): if isinstance(chain[i], BBorBInode): - user_says = input( - 'Is the return value of {} '.format(chain[i].label) + - 'with tainted argument "{}" vulnerable? (Y/n)'.format(chain[i-1].left_hand_side) - ).lower() - print(f'The user says {user_says}') - if user_says.startswith('n'): - print(f'\n\n\n\nThe user said {chain[i].func_name} DOES NOT ret a tainted val') + if chain[i].func_name in blackbox_mapping['propagates']: + print(f'so {chain[i].func_name} does propagate') + elif chain[i].func_name in blackbox_mapping['does_not_propagate']: + print(f'so {chain[i].func_name} does not propagate') return False - print(f'\n\n\n\nThe user said {chain[i].func_name} DOES ret a tainted val') + else: + # TKTK: move if interactive mode all the way up here? + user_says = input( + 'Is the return value of {} '.format(chain[i].label) + + 'with tainted argument "{}" vulnerable? (Y/n)'.format(chain[i-1].left_hand_side) + ).lower() + print(f'The user says {user_says}') + if user_says.startswith('n'): + print(f'\n\n\n\nThe user said {chain[i].func_name} DOES NOT ret a tainted val') + blackbox_mapping['does_not_propagate'].append(chain[i].func_name) + return False + blackbox_mapping['propagates'].append(chain[i].func_name) + print(f'\n\n\n\nThe user said {chain[i].func_name} DOES ret a tainted val') return True @@ -378,7 +388,8 @@ def get_vulnerability( triggers, lattice, cfg, - ui_mode + ui_mode, + blackbox_mapping ): """Get vulnerability between source and sink if it exists. @@ -391,6 +402,7 @@ def get_vulnerability( lattice(Lattice): The lattice we're analysing. cfg(CFG): .blackbox_assignments used in is_unknown, .nodes used in build_def_use_chain ui_mode(UImode): determines if we interact with the user or trim the nodes in the output, if at all. + blackbox_mapping(dict): TODO Returns: A Vulnerability if it exists, else None @@ -428,8 +440,13 @@ def get_vulnerability( def_use, [source.cfg_node] ): - if is_actually_vulnerable(chain): - print('We have a vulnerability!') + if is_actually_vulnerable(chain, blackbox_mapping): + # print(f'\n\n\n\nWe have a vulnerability {chain}!') + print(f'\n\n\n\nWe have a vulnerability !') + else: + # print(f'\n\n\n\nWe DO NOT have a vulnerability {chain}!') + print(f'\n\n\n\nWe DO NOT have a vulnerability !') + @@ -471,6 +488,9 @@ def get_vulnerability( sink.sanitisers, reassignment_nodes ) + elif ui_mode == UImode.INTERACTIVE: + print('figure stuff out') + print('figure stuff out') elif blackbox_assignment_in_chain: return UnknownVulnerability( source.cfg_node, source_trigger_word, @@ -492,7 +512,8 @@ def find_vulnerabilities_in_cfg( vulnerability_log, definitions, lattice, - ui_mode + ui_mode, + blackbox_mapping ): """Find vulnerabilities in a cfg. @@ -502,6 +523,7 @@ def find_vulnerabilities_in_cfg( definitions(trigger_definitions_parser.Definitions): Source and sink definitions. lattice(Lattice): The lattice we're analysing. ui_mode(UImode): determines if we interact with the user or trim the nodes in the output, if at all. + blackbox_mapping(dict): TODO """ triggers = identify_triggers( cfg, @@ -517,7 +539,8 @@ def find_vulnerabilities_in_cfg( triggers, lattice, cfg, - ui_mode + ui_mode, + blackbox_mapping ) if vulnerability: vulnerability_log.append(vulnerability) @@ -527,8 +550,7 @@ def find_vulnerabilities( cfg_list, analysis_type, ui_mode, - trigger_word_file, - blackbox_mapping_file + vulnerability_files ): """Find vulnerabilities in a list of CFGs from a trigger_word_file. @@ -536,16 +558,15 @@ def find_vulnerabilities( cfg_list(list[CFG]): the list of CFGs to scan. analysis_type(AnalysisBase): analysis object used to create lattice. ui_mode(UImode): determines if we interact with the user or trim the nodes in the output, if at all. - trigger_word_file(string): file containing trigger words. - blackbox_mapping_file(string): file containing whether or not a blackbox function returns a tainted value. + vulnerability_files(VulnerabilityFiles): contains trigger words and blackbox_mapping files Returns: A VulnerabilityLog with found vulnerabilities. """ - # TKTK: change to tuple^ - definitions = parse(trigger_word_file) - with open(blackbox_mapping_file) as f: + definitions = parse(vulnerability_files.triggers) + with open(vulnerability_files.blackbox_mapping) as f: blackbox_mapping = json.load(f) + print(f'BEFORE blackbox_mapping is {blackbox_mapping}') vulnerability_log = VulnerabilityLog() for cfg in cfg_list: @@ -554,10 +575,11 @@ def find_vulnerabilities( vulnerability_log, definitions, Lattice(cfg.nodes, analysis_type), - ui_mode + ui_mode, + blackbox_mapping ) - with open(filename, 'w') as f: - json.dump(blackbox_mapping, f) - + print(f'AFTER blackbox_mapping is {blackbox_mapping}') + with open(vulnerability_files.blackbox_mapping, 'w') as f: + json.dump(blackbox_mapping, f, indent=4) return vulnerability_log From b2bc7198841aae88125f43689c2933e16e911771 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Thu, 15 Mar 2018 16:10:02 -0700 Subject: [PATCH 09/20] Some huge vulnerabilities.py changes and cleanup, including a vuln_factory + alphabetize enums, make all end line comments have 2 spaces not 1, add AssignmentCallNode docstrings and DRYness, cleanup dead SinkArgsError code, rename vulnerability_log to vulnerability_helper, refactor all [0:..] to [:..] --- pyt/argument_helpers.py | 4 +- pyt/base_cfg.py | 62 ++-- pyt/github_search.py | 3 - pyt/repo_runner.py | 2 +- pyt/vulnerabilities.py | 271 ++++++++---------- .../blackbox_mapping.json | 7 +- ...ability_log.py => vulnerability_helper.py} | 111 +++++-- 7 files changed, 231 insertions(+), 229 deletions(-) rename pyt/{vulnerability_log.py => vulnerability_helper.py} (56%) diff --git a/pyt/argument_helpers.py b/pyt/argument_helpers.py index 508e6ebf..7f4aabdd 100644 --- a/pyt/argument_helpers.py +++ b/pyt/argument_helpers.py @@ -29,8 +29,8 @@ def valid_date(s): class UImode(Enum): - NORMAL = 0 - INTERACTIVE = 1 + INTERACTIVE = 0 + NORMAL = 1 TRIM = 2 diff --git a/pyt/base_cfg.py b/pyt/base_cfg.py index bda9d454..35305a17 100644 --- a/pyt/base_cfg.py +++ b/pyt/base_cfg.py @@ -195,7 +195,7 @@ def __init__(self, label, left_hand_side, right_hand_side_variables, *, line_num class AssignmentCallNode(AssignmentNode): - """Node used for X.""" + """Node used for when a call happens inside of an assignment.""" def __init__( self, @@ -209,7 +209,7 @@ def __init__( path, call_node ): - """Create a X. + """Create an Assignment Call node. Args: label(str): The label of the node, describing the expression it represents. @@ -400,7 +400,7 @@ def stmt_star_handler(self, stmts, prev_node_to_avoid=None): if node and not first_node: # (Make sure first_node isn't already set.) # first_node is always a "node_to_connect", because it won't have ingoing otherwise # If we have e.g. - # import os # An ignored node + # import os # An ignored node # value = None # first_node will be `value = None` if hasattr(node, 'ingoing'): @@ -491,7 +491,7 @@ def visit_If(self, node): orelse_last_nodes = self.handle_or_else(node.orelse, test) body_connect_stmts.last_statements.extend(orelse_last_nodes) else: - body_connect_stmts.last_statements.append(test) # if there is no orelse, test needs an edge to the next_node + body_connect_stmts.last_statements.append(test) # if there is no orelse, test needs an edge to the next_node last_statements = self.remove_breaks(body_connect_stmts.last_statements) @@ -575,7 +575,7 @@ def extract_left_hand_side(self, target): left_hand_side.replace('*', '') if '[' in left_hand_side: index = left_hand_side.index('[') - left_hand_side = target[0:index] + left_hand_side = target[:index] return left_hand_side @@ -607,7 +607,7 @@ def assign_tuple_target(self, node, right_hand_side_variables): self.connect_nodes(new_assignment_nodes) - return ControlFlowNode(new_assignment_nodes[0], [new_assignment_nodes[-1]], []) # return the last added node + return ControlFlowNode(new_assignment_nodes[0], [new_assignment_nodes[-1]], []) # return the last added node def assign_multi_target(self, node, right_hand_side_variables): new_assignment_nodes = list() @@ -627,12 +627,12 @@ def assign_multi_target(self, node, right_hand_side_variables): ))) self.connect_nodes(new_assignment_nodes) - return ControlFlowNode(new_assignment_nodes[0], [new_assignment_nodes[-1]], []) # return the last added node + return ControlFlowNode(new_assignment_nodes[0], [new_assignment_nodes[-1]], []) # return the last added node def visit_Assign(self, node): rhs_visitor = RHSVisitor() rhs_visitor.visit(node.value) - if isinstance(node.targets[0], ast.Tuple): # x,y = [1,2] + if isinstance(node.targets[0], ast.Tuple): # x,y = [1,2] if isinstance(node.value, ast.Tuple): return self.assign_tuple_target(node, rhs_visitor.result) elif isinstance(node.value, ast.Call): @@ -678,40 +678,30 @@ def visit_Assign(self, node): def assignment_call_node(self, left_hand_label, ast_node): """Handle assignments that contain a function call on its right side.""" - self.undecided = True # Used for handling functions in assignments + self.undecided = True # Used for handling functions in assignments call = self.visit(ast_node.value) - call_label = '' - call_assignment = None - - # Necessary to know `image_name = image_name.replace('..', '')` is a reassignment. - vars_visitor = VarsVisitor() - vars_visitor.visit(ast_node.value) - call_label = call.left_hand_side + if isinstance(call, BBorBInode): - call_assignment = AssignmentCallNode( - left_hand_label + ' = ' + call_label, - left_hand_label, - ast_node, - [call.left_hand_side], - vv_result=vars_visitor.result, - line_number=ast_node.lineno, - path=self.filenames[-1], - call_node=call - ) + # Necessary to know `image_name = image_name.replace('..', '')` is a reassignment. + vars_visitor = VarsVisitor() + vars_visitor.visit(ast_node.value) + vv_result = vars_visitor.result # Assignment after returned user-defined function call e.g. RestoreNode ¤call_1 = ret_outer elif isinstance(call, AssignmentNode): - call_assignment = AssignmentCallNode( - left_hand_label + ' = ' + call_label, - left_hand_label, - ast_node, - [call.left_hand_side], - vv_result=[], - line_number=ast_node.lineno, - path=self.filenames[-1], - call_node=call - ) + vv_result = list() + + call_assignment = AssignmentCallNode( + left_hand_label + ' = ' + call_label, + left_hand_label, + ast_node, + [call.left_hand_side], + vv_result=vv_result, + line_number=ast_node.lineno, + path=self.filenames[-1], + call_node=call + ) call.connect(call_assignment) self.nodes.append(call_assignment) diff --git a/pyt/github_search.py b/pyt/github_search.py index 154242cd..0c54ccbe 100644 --- a/pyt/github_search.py +++ b/pyt/github_search.py @@ -8,7 +8,6 @@ from .reaching_definitions_taint import ReachingDefinitionsTaintAnalysis from .repo_runner import add_repo_to_csv, NoEntryPathError from .save import save_repo_scan -from .vulnerabilities import SinkArgsError DEFAULT_TIMEOUT_IN_SECONDS = 60 @@ -228,8 +227,6 @@ def scan_github(search_string, start_date, analysis_type, analyse_repo_func, csv else: save_repo_scan(repo, r.path, vulnerability_log=None) r.clean_up() - except SinkArgsError as err: - save_repo_scan(repo, r.path, vulnerability_log=None, error=err) except SyntaxError as err: save_repo_scan(repo, r.path, vulnerability_log=None, error=err) except IOError as err: diff --git a/pyt/repo_runner.py b/pyt/repo_runner.py index 78ad414b..9af8acfe 100644 --- a/pyt/repo_runner.py +++ b/pyt/repo_runner.py @@ -27,7 +27,7 @@ def clone(self): r = self.URL.split('/')[-1].split('.') if len(r) > 1: - self.directory = '.'.join(r[0:-1]) + self.directory = '.'.join(r[:-1]) else: self.directory = r[0] diff --git a/pyt/vulnerabilities.py b/pyt/vulnerabilities.py index 75f5649d..eace2c07 100644 --- a/pyt/vulnerabilities.py +++ b/pyt/vulnerabilities.py @@ -17,11 +17,10 @@ from .right_hand_side_visitor import RHSVisitor from .trigger_definitions_parser import parse from .vars_visitor import VarsVisitor -from .vulnerability_log import ( - SanitisedVulnerability, - UnknownVulnerability, - Vulnerability, - VulnerabilityLog +from .vulnerability_helper import ( + vuln_factory, + VulnerabilityLog, + VulnerabilityType ) @@ -272,67 +271,19 @@ def find_sanitiser_nodes( yield sanitiser_tuple.cfg_node -def is_sanitised( - sink, - sanitiser_dict, - lattice -): - """Check if sink is sanitised by any santiser in the sanitiser_dict. - - Args: - sink(TriggerNode): TriggerNode of the sink. - sanitiser_dict(dict): dictionary of sink sanitiser pairs. - lattice(Lattice): The lattice we're analysing. - """ - for sanitiser in sink.sanitisers: - for cfg_node in sanitiser_dict[sanitiser]: - if lattice.in_constraint(cfg_node, sink.cfg_node): - return True - return False - - -class SinkArgsError(Exception): - pass - - -def is_unknown( - trimmed_reassignment_nodes, - blackbox_assignments -): - """Check if vulnerability is unknown by seeing if a blackbox - assignment is in trimmed_reassignment_nodes. - - Args: - trimmed_reassignment_nodes(list[AssignmentNode]): reassignments leading to the vulnerability. - blackbox_assignments(set[AssignmentNode]): set of blackbox assignments. - - Returns: - AssignmentNode or None - """ - for node in trimmed_reassignment_nodes: - if node in blackbox_assignments: - return node - return None - - -def get_sink_args( - cfg_node -): +def get_sink_args(cfg_node): if isinstance(cfg_node.ast_node, ast.Call): rhs_visitor = RHSVisitor() rhs_visitor.visit(cfg_node.ast_node) return rhs_visitor.result elif isinstance(cfg_node.ast_node, ast.Assign): return cfg_node.right_hand_side_variables - - vv = VarsVisitor() - other_results = list() - if isinstance(cfg_node, BBorBInode): - other_results = cfg_node.args + elif isinstance(cfg_node, BBorBInode): + return cfg_node.args else: + vv = VarsVisitor() vv.visit(cfg_node.ast_node) - - return vv.result + other_results + return vv.result def get_vulnerability_chains( @@ -341,6 +292,14 @@ def get_vulnerability_chains( def_use, chain ): + """Traverses the def-use graph to find all paths from source to sink that cause a vulnerability. + + Args: + current_node() + sink() + def_use(dict): + chain(list(Node)): + """ for use in def_use[current_node]: if use == sink: yield chain @@ -355,31 +314,64 @@ def get_vulnerability_chains( ) -def is_actually_vulnerable( +def how_vulnerable( chain, - blackbox_mapping + blackbox_mapping, + sanitiser_nodes, + blackbox_assignments, + ui_mode, + vuln_deets ): - for i in range(len(chain)): - if isinstance(chain[i], BBorBInode): - if chain[i].func_name in blackbox_mapping['propagates']: - print(f'so {chain[i].func_name} does propagate') - elif chain[i].func_name in blackbox_mapping['does_not_propagate']: - print(f'so {chain[i].func_name} does not propagate') - return False - else: - # TKTK: move if interactive mode all the way up here? + """Iterates through the chain of nodes and checks the blackbox nodes against the blackbox mapping and sanitiser dictionary. + + Args: + chain(list(Node)): TODO + blackbox_mapping(dict): + sanitiser_nodes(set): + blackbox_assignments(set[AssignmentNode]): set of blackbox assignments, includes the ReturnNode's of BBorBInode's. + ui_mode(UImode): determines if we interact with the user when we don't already have a blackbox mapping available. + vuln_deets(dict): vulnerability details. + + Returns: + A VulnerabilityType depending on how vulnerable the chain is. + """ + for i, current_node in enumerate(chain): + if current_node in sanitiser_nodes: + vuln_deets['sanitiser'] = current_node + return Vulnerability.SANITISED + + if isinstance(current_node, BBorBInode): + if current_node.func_name in blackbox_mapping['propagates']: + continue + elif current_node.func_name in blackbox_mapping['does_not_propagate']: + return VulnerabilityType.FALSE + elif ui_mode == UImode.INTERACTIVE: user_says = input( - 'Is the return value of {} '.format(chain[i].label) + - 'with tainted argument "{}" vulnerable? (Y/n)'.format(chain[i-1].left_hand_side) + 'Is the return value of {} with tainted argument "{}" vulnerable? (Y/n)'.format( + current_node.label, + chain[i-1].left_hand_side + ) ).lower() - print(f'The user says {user_says}') if user_says.startswith('n'): - print(f'\n\n\n\nThe user said {chain[i].func_name} DOES NOT ret a tainted val') - blackbox_mapping['does_not_propagate'].append(chain[i].func_name) - return False - blackbox_mapping['propagates'].append(chain[i].func_name) - print(f'\n\n\n\nThe user said {chain[i].func_name} DOES ret a tainted val') - return True + blackbox_mapping['does_not_propagate'].append(current_node.func_name) + return VulnerabilityType.FALSE + blackbox_mapping['propagates'].append(current_node.func_name) + else: + vuln_deets['unknown_assignment'] = current_node + return VulnerabilityType.UNKNOWN + return VulnerabilityType.TRUE + + +def get_tainted_node_in_sink_args( + sink_args, + nodes_in_constaint +): + if not sink_args: + return None + # Starts with the node closest to the sink + for node in nodes_in_constaint: + if node.left_hand_side in sink_args: + return node def get_vulnerability( @@ -411,83 +403,58 @@ def get_vulnerability( Returns: A Vulnerability if it exists, else None """ - secondary_nodes_in_sink = [secondary for secondary in source.secondary_nodes - if lattice.in_constraint(secondary, - sink.cfg_node)] + nodes_in_constaint = [secondary for secondary in reversed(source.secondary_nodes) + if lattice.in_constraint(secondary, + sink.cfg_node)] + nodes_in_constaint.append(source.cfg_node) + sink_args = get_sink_args(sink.cfg_node) - tainted_node_in_sink_arg = None - if sink_args: - if source.cfg_node.left_hand_side in sink_args: - tainted_node_in_sink_arg = source.cfg_node - for node in secondary_nodes_in_sink: - if node.left_hand_side in sink_args: - tainted_node_in_sink_arg = node + tainted_node_in_sink_arg = get_tainted_node_in_sink_args( + sink_args, + nodes_in_constaint + ) if tainted_node_in_sink_arg: - if ui_mode == UImode.INTERACTIVE: - def_use = build_def_use_chain(cfg.nodes) - for chain in get_vulnerability_chains( - source.cfg_node, - sink.cfg_node, - def_use, - [source.cfg_node] - ): - if is_actually_vulnerable(chain, blackbox_mapping): - # print(f'\n\n\n\nWe have a vulnerability {chain}!') - print(f'\n\n\n\nWe have a vulnerability !') - else: - # print(f'\n\n\n\nWe DO NOT have a vulnerability {chain}!') - print(f'\n\n\n\nWe DO NOT have a vulnerability !') - - trimmed_reassignment_nodes = list() - trimmed_reassignment_nodes.append(tainted_node_in_sink_arg) - node_in_the_vulnerability_chain = tainted_node_in_sink_arg - # Here is where we do backwards slicing to traceback which nodes led to the vulnerability - for secondary in reversed(source.secondary_nodes): - if lattice.in_constraint(secondary, sink.cfg_node): - if secondary.left_hand_side in node_in_the_vulnerability_chain.right_hand_side_variables: - node_in_the_vulnerability_chain = secondary - trimmed_reassignment_nodes.insert(0, node_in_the_vulnerability_chain) - - source_trigger_word = source.trigger_word - sink_trigger_word = sink.trigger_word - - sink_is_sanitised = is_sanitised( - sink, - triggers.sanitiser_dict, - lattice - ) - # TKTK: We do not have to call is_unknown if interactive mode is on - blackbox_assignment_in_chain = is_unknown( - trimmed_reassignment_nodes, - cfg.blackbox_assignments - ) - reassignment_nodes = source.secondary_nodes - if ui_mode == UImode.TRIM: - reassignment_nodes = trimmed_reassignment_nodes - if sink_is_sanitised: - return SanitisedVulnerability( - source.cfg_node, source_trigger_word, - sink.cfg_node, sink_trigger_word, - sink.sanitisers, - reassignment_nodes - ) - elif ui_mode == UImode.INTERACTIVE: - print('figure stuff out') - print('figure stuff out') - elif blackbox_assignment_in_chain: - return UnknownVulnerability( - source.cfg_node, source_trigger_word, - sink.cfg_node, sink_trigger_word, - blackbox_assignment_in_chain, - reassignment_nodes - ) - else: - return Vulnerability( - source.cfg_node, source_trigger_word, - sink.cfg_node, sink_trigger_word, - reassignment_nodes + vuln_deets = { + 'source': source.cfg_node, + 'source_trigger_word': source.trigger_word, + 'sink': sink.cfg_node, + 'sink_trigger_word': sink.trigger_word, + 'reassignment_nodes': source.secondary_nodes + } + + # Imma get this working and then see if there is a better way + # Maybe using blackbox_mapping.json, maybe not. + sanitiser_nodes = set() + if sink.sanitisers: + print(f'so sink.sanitisers is {sink.sanitisers}') + for sanitiser in sink.sanitisers: + print(f'so triggers.sanitiser_dict[sanitiser] is {triggers.sanitiser_dict[sanitiser]}') + for cfg_node in triggers.sanitiser_dict[sanitiser]: + sanitiser_nodes.append(cfg_node) + + def_use = build_def_use_chain(cfg.nodes) + for chain in get_vulnerability_chains( + source.cfg_node, + sink.cfg_node, + def_use, + [source.cfg_node] + ): + vulnerability_type = how_vulnerable( + chain, + blackbox_mapping, + sanitiser_nodes, + cfg.blackbox_assignments, + ui_mode, + vuln_deets ) + if vulnerability_type == VulnerabilityType.FALSE: + continue + if ui_mode != UImode.NORMAL: + vuln_deets['reassignment_nodes'] = chain + + return vuln_factory(vulnerability_type)(**vuln_deets) + return None @@ -550,7 +517,6 @@ def find_vulnerabilities( definitions = parse(vulnerability_files.triggers) with open(vulnerability_files.blackbox_mapping) as f: blackbox_mapping = json.load(f) - print(f'BEFORE blackbox_mapping is {blackbox_mapping}') vulnerability_log = VulnerabilityLog() for cfg in cfg_list: @@ -562,7 +528,6 @@ def find_vulnerabilities( ui_mode, blackbox_mapping ) - print(f'AFTER blackbox_mapping is {blackbox_mapping}') with open(vulnerability_files.blackbox_mapping, 'w') as f: json.dump(blackbox_mapping, f, indent=4) diff --git a/pyt/vulnerability_definitions/blackbox_mapping.json b/pyt/vulnerability_definitions/blackbox_mapping.json index 402cab38..96c749fd 100644 --- a/pyt/vulnerability_definitions/blackbox_mapping.json +++ b/pyt/vulnerability_definitions/blackbox_mapping.json @@ -1,9 +1,10 @@ { "does_not_propagate": [ - "fast_eddie" + "fast_eddie", + "url_for" ], "propagates": [ - "minnesota_fats", - "graham" + "graham", + "minnesota_fats" ] } \ No newline at end of file diff --git a/pyt/vulnerability_log.py b/pyt/vulnerability_helper.py similarity index 56% rename from pyt/vulnerability_log.py rename to pyt/vulnerability_helper.py index d74fd653..f5208757 100644 --- a/pyt/vulnerability_log.py +++ b/pyt/vulnerability_helper.py @@ -1,9 +1,12 @@ -"""This module contains a vulnerability log. +"""This module contains vulnerability helpers. -This log is able to give precise information +Mostly is contains logs to give precise information about where a vulnerability is located. The log is printed to standard output. + +It is only used in vulnerabilities.py """ +from enum import Enum class VulnerabilityLog(): @@ -40,7 +43,8 @@ def __str__(self): reassignment += '\n\t'.join([ 'File: ' + node.path + '\n\t' + ' > Line ' + str(node.line_number) + ': ' + - node.label for node in self.reassignment_nodes]) + node.label for node in self.reassignment_nodes + ]) return reassignment @@ -48,34 +52,45 @@ class Vulnerability(): """Vulnerability containing the source and the sources trigger word, the sink and the sinks trigger word.""" - def __init__(self, source, source_trigger_word, - sink, sink_trigger_word, reassignment_nodes): + def __init__( + self, + source, + source_trigger_word, + sink, + sink_trigger_word, + reassignment_nodes + ): """Set source and sink information.""" self.source = source self.source_trigger_word = source_trigger_word self.sink = sink self.sink_trigger_word = sink_trigger_word - self.reassignment_nodes = reassignment_nodes - self.__remove_sink_from_secondary_nodes() + self.reassignment_nodes = reassignment_nodes + self._remove_sink_from_secondary_nodes() - def __remove_sink_from_secondary_nodes(self): - if self.reassignment_nodes: - try: - self.reassignment_nodes.remove(self.sink) - except ValueError: - pass + def _remove_sink_from_secondary_nodes(self): + try: + self.reassignment_nodes.remove(self.sink) + except ValueError: + pass def __str__(self): """Pretty printing of a vulnerability.""" reassigned_str = Reassigned(self.reassignment_nodes) - return ('File: {}\n > User input at line {}, trigger word "{}":' - ' \n\t{}{}\nFile: {}\n > reaches line {}, trigger word' - ' "{}": \n\t{}'.format( - self.source.path, self.source.line_number, - self.source_trigger_word, self.source.label, - reassigned_str, self.sink.path, self.sink.line_number, - self.sink_trigger_word, self.sink.label)) + return ( + 'File: {}\n' + ' > User input at line {}, trigger word "{}": \n' + '\t {}{}\nFile: {}\n' + ' > reaches line {}, trigger word "{}": \n' + '\t{}'.format( + self.source.path, + self.source.line_number, self.source_trigger_word, + self.source.label, reassigned_str, self.sink.path, + self.sink.line_number, self.sink_trigger_word, + self.sink.label + ) + ) class SanitisedVulnerability(Vulnerability): @@ -83,8 +98,15 @@ class SanitisedVulnerability(Vulnerability): trigger word, the sink and the sinks trigger word. Also containing the sanitiser.""" - def __init__(self, source, source_trigger_word, - sink, sink_trigger_word, sanitiser, reassignment_nodes): + def __init__( + self, + source, + source_trigger_word, + sink, + sink_trigger_word, + sanitiser, + reassignment_nodes + ): """Set source, sink and sanitiser information.""" super().__init__(source, source_trigger_word, sink, sink_trigger_word, reassignment_nodes) @@ -92,9 +114,11 @@ def __init__(self, source, source_trigger_word, def __str__(self): """Pretty printing of a vulnerability.""" - super_str = super().__str__() - return super_str + ('\nThis vulnerability is potentially sanitised by:' - ' {}'.format(self.sanitiser)) + return ( + super().__str__() + + '\nThis vulnerability is potentially sanitised by: ' + + self.sanitiser + ) class UnknownVulnerability(Vulnerability): @@ -102,15 +126,40 @@ class UnknownVulnerability(Vulnerability): trigger word, the sink and the sinks trigger word. Also containing the blackbox assignment.""" - def __init__(self, source, source_trigger_word, - sink, sink_trigger_word, blackbox_assignment, reassignment_nodes): + def __init__( + self, + source, + source_trigger_word, + sink, + sink_trigger_word, + unknown_assignment, + reassignment_nodes + ): """Set source, sink and blackbox assignment information.""" super().__init__(source, source_trigger_word, sink, sink_trigger_word, reassignment_nodes) - self.blackbox_assignment = blackbox_assignment + self.unknown_assignment = unknown_assignment def __str__(self): """Pretty printing of a vulnerability.""" - super_str = super().__str__() - return super_str + ('\nThis vulnerability is unknown due to:' - ' {}'.format(self.blackbox_assignment)) + return ( + super().__str__() + + '\nThis vulnerability is unknown due to: ' + + str(self.unknown_assignment) + ) + + +class VulnerabilityType(Enum): + FALSE = 0 + SANITISED = 1 + TRUE = 2 + UNKNOWN = 3 + + +def vuln_factory(vulnerability_type): + if vulnerability_type == VulnerabilityType.UNKNOWN: + return UnknownVulnerability + elif vulnerability_type == VulnerabilityType.SANITISED: + return SanitisedVulnerability + else: + return Vulnerability From e5d0b2c238097a0b44b0fb284ba763676c7f988d Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Thu, 15 Mar 2018 21:04:44 -0700 Subject: [PATCH 10/20] Disinfected all but 3 tests, Scrubbed ugly special case code for VarsVisiting during an AssignmentCallNode, Rinsed build_def_use_chain, Wiped get_uses --- example/vulnerable_code/path_traversal.py | 2 +- pyt/__main__.py | 8 +- pyt/argument_helpers.py | 8 +- pyt/base_cfg.py | 22 ++-- pyt/definition_chains.py | 43 +++---- pyt/interprocedural_cfg.py | 11 +- pyt/reaching_definitions_taint.py | 15 +-- pyt/vulnerabilities.py | 45 +++---- pyt/vulnerability_helper.py | 32 +++-- tests/command_line_test.py | 8 +- tests/vulnerabilities_across_files_test.py | 16 ++- tests/vulnerabilities_test.py | 130 ++++++++++++++------- 12 files changed, 192 insertions(+), 148 deletions(-) diff --git a/example/vulnerable_code/path_traversal.py b/example/vulnerable_code/path_traversal.py index cb350fb3..7c80e2c0 100644 --- a/example/vulnerable_code/path_traversal.py +++ b/example/vulnerable_code/path_traversal.py @@ -16,7 +16,7 @@ def cat_picture(): if not image_name: image_name = 'foo' return 404 - foo = outer(inner(), image_name) # Nested call after if caused the problem + foo = outer(inner(), image_name) # Nested call after if caused the problem send_file(foo) return 'idk' diff --git a/pyt/__main__.py b/pyt/__main__.py index e024d92a..51bd1b7a 100644 --- a/pyt/__main__.py +++ b/pyt/__main__.py @@ -188,8 +188,8 @@ def analyse_repo(github_repo, analysis_type, ui_mode): analysis_type, ui_mode, VulnerabilityFiles( - args.trigger_word_file, - args.blackbox_mapping_file + args.blackbox_mapping_file, + args.trigger_word_file ) ) return vulnerability_log @@ -277,8 +277,8 @@ def main(command_line_args=sys.argv[1:]): analysis, ui_mode, VulnerabilityFiles( - args.trigger_word_file, - args.blackbox_mapping_file + args.blackbox_mapping_file, + args.trigger_word_file ) ) vulnerability_log.print_report() diff --git a/pyt/argument_helpers.py b/pyt/argument_helpers.py index 7f4aabdd..a020431e 100644 --- a/pyt/argument_helpers.py +++ b/pyt/argument_helpers.py @@ -36,8 +36,8 @@ class UImode(Enum): VulnerabilityFiles = namedtuple( 'VulnerabilityFiles', - [ - 'triggers', - 'blackbox_mapping' - ] + [ + 'blackbox_mapping', + 'triggers' + ] ) diff --git a/pyt/base_cfg.py b/pyt/base_cfg.py index 35305a17..99bca7ce 100644 --- a/pyt/base_cfg.py +++ b/pyt/base_cfg.py @@ -203,7 +203,6 @@ def __init__( left_hand_side, ast_node, right_hand_side_variables, - vv_result, *, line_number, path, @@ -216,13 +215,11 @@ def __init__( left_hand_side(str): The variable on the left hand side of the assignment. Used for analysis. ast_node right_hand_side_variables(list[str]): A list of variables on the right hand side. - vv_result(list[str]): Necessary to know `image_name = image_name.replace('..', '')` is a reassignment. line_number(Optional[int]): The line of the expression the Node represents. path(string): Current filename. call_node(BBorBInode or RestoreNode): Used in connect_control_flow_node. """ super().__init__(label, left_hand_side, ast_node, right_hand_side_variables, line_number=line_number, path=path) - self.vv_result = vv_result self.call_node = call_node self.blackbox = False @@ -314,8 +311,7 @@ def should_connect_node(self, node): """Determine if node should be in the final CFG.""" if isinstance(node, (FunctionNode, IgnoredNode)): return False - else: - return True + return True def get_inner_most_function_call(self, call_node): # Loop to inner most function call @@ -684,20 +680,18 @@ def assignment_call_node(self, left_hand_label, ast_node): call_label = call.left_hand_side if isinstance(call, BBorBInode): - # Necessary to know `image_name = image_name.replace('..', '')` is a reassignment. + # Necessary to know e.g. + # `image_name = image_name.replace('..', '')` + # is a reassignment. vars_visitor = VarsVisitor() vars_visitor.visit(ast_node.value) - vv_result = vars_visitor.result - # Assignment after returned user-defined function call e.g. RestoreNode ¤call_1 = ret_outer - elif isinstance(call, AssignmentNode): - vv_result = list() + call.right_hand_side_variables.extend(vars_visitor.result) call_assignment = AssignmentCallNode( left_hand_label + ' = ' + call_label, left_hand_label, ast_node, [call.left_hand_side], - vv_result=vv_result, line_number=ast_node.lineno, path=self.filenames[-1], call_node=call @@ -886,8 +880,10 @@ def add_blackbox_or_builtin_call(self, node, blackbox): call_node.label = LHS + " = " + RHS call_node.right_hand_side_variables = rhs_vars - # Used in get_sink_args - call_node.args = rhs_vars + # Used in get_sink_args, not using right_hand_side_variables because it is extended in AssignmentCallNode + rhs_visitor = RHSVisitor() + rhs_visitor.visit(node) + call_node.args = rhs_visitor.result if blackbox: self.blackbox_assignments.add(call_node) diff --git a/pyt/definition_chains.py b/pyt/definition_chains.py index b17af9d2..e61c3c01 100644 --- a/pyt/definition_chains.py +++ b/pyt/definition_chains.py @@ -1,6 +1,11 @@ import ast -from .base_cfg import AssignmentNode, EntryOrExitNode +from .base_cfg import ( + AssignmentNode, + AssignmentCallNode, + BBorBInode, + EntryOrExitNode +) from .constraint_table import constraint_table from .lattice import Lattice from .reaching_definitions import ReachingDefinitionsAnalysis @@ -52,38 +57,24 @@ def build_use_def_chain(cfg_nodes): return use_def -def get_uses(node): - if isinstance(node, AssignmentNode): - result = list() - for var in node.right_hand_side_variables: - if var not in node.left_hand_side: - result.append(var) - return result - else: - return [] - # elif isinstance(node, EntryOrExitNode): - # return [] - # else: - # print(f'\n\n\ntype(node) is {type(node)}') - # print(f'node is {node}\n\n\n') - # raise - - def build_def_use_chain(cfg_nodes): def_use = dict() lattice = Lattice(cfg_nodes, ReachingDefinitionsAnalysis) # For every node for node in cfg_nodes: - # Make an empty list for each def + # That's a definition if isinstance(node, AssignmentNode): + # Make an empty list for it in def_use dict def_use[node] = list() - # Get its uses - for variable in get_uses(node): - # Loop through all the nodes before it - for earlier_node in get_constraint_nodes(node, lattice): - # and add to the 'uses list' of each earlier node, when applicable - if variable in earlier_node.left_hand_side: - def_use[earlier_node].append(node) + + # Get its uses + for variable in node.right_hand_side_variables: + # Loop through most of the nodes before it + for earlier_node in get_constraint_nodes(node, lattice): + # and add to the 'uses list' of each earlier node, when applicable + # 'earlier node' here being a simplification + if variable in earlier_node.left_hand_side: + def_use[earlier_node].append(node) return def_use diff --git a/pyt/interprocedural_cfg.py b/pyt/interprocedural_cfg.py index ddbc1b22..003eb3e5 100644 --- a/pyt/interprocedural_cfg.py +++ b/pyt/interprocedural_cfg.py @@ -648,7 +648,16 @@ def visit_Call(self, node): return self.add_blackbox_or_builtin_call(node, blackbox=True) return self.add_blackbox_or_builtin_call(node, blackbox=False) - def add_module(self, module, module_or_package_name, local_names, import_alias_mapping, is_init=False, from_from=False, from_fdid=False): + def add_module( + self, + module, + module_or_package_name, + local_names, + import_alias_mapping, + is_init=False, + from_from=False, + from_fdid=False + ): """ Returns: The ExitNode that gets attached to the CFG of the class. diff --git a/pyt/reaching_definitions_taint.py b/pyt/reaching_definitions_taint.py index a93ec544..6e5d613a 100644 --- a/pyt/reaching_definitions_taint.py +++ b/pyt/reaching_definitions_taint.py @@ -1,7 +1,4 @@ -from .base_cfg import ( - AssignmentCallNode, - AssignmentNode -) +from .base_cfg import AssignmentNode from .constraint_table import constraint_table from .reaching_definitions_base import ReachingDefinitionsAnalysisBase @@ -15,14 +12,8 @@ def fixpointmethod(self, cfg_node): if isinstance(cfg_node, AssignmentNode): arrow_result = JOIN - # There are two if statements on purpose - if isinstance(cfg_node, AssignmentCallNode): - # vv_result is necessary to know `image_name = image_name.replace('..', '')` is a reassignment. - if cfg_node.left_hand_side not in cfg_node.vv_result: - # Get previous assignments of cfg_node.left_hand_side and remove them from JOIN - arrow_result = self.arrow(JOIN, cfg_node.left_hand_side) - # Other reassignment check - elif cfg_node.left_hand_side not in cfg_node.right_hand_side_variables: + # Reassignment check + if cfg_node.left_hand_side not in cfg_node.right_hand_side_variables: # Get previous assignments of cfg_node.left_hand_side and remove them from JOIN arrow_result = self.arrow(JOIN, cfg_node.left_hand_side) diff --git a/pyt/vulnerabilities.py b/pyt/vulnerabilities.py index eace2c07..9ec66b28 100644 --- a/pyt/vulnerabilities.py +++ b/pyt/vulnerabilities.py @@ -154,31 +154,20 @@ def update_assignments( ): for node in assignment_nodes: for other in assignment_list: - if node not in assignment_list: - append_if_reassigned(assignment_list, other, node, lattice) + if node not in assignment_list and lattice.in_constraint(other, node): + append_node_if_reassigned(assignment_list, other, node) -def append_if_reassigned( +def append_node_if_reassigned( assignment_list, secondary, - node, - lattice + node ): - try: - reassigned = False - # vv_result is necessary to know `image_name = image_name.replace('..', '')` is a reassignment. - if isinstance(node, AssignmentCallNode) and secondary.left_hand_side in node.vv_result: - reassigned = True - elif secondary.left_hand_side in node.right_hand_side_variables: - reassigned = True - elif secondary.left_hand_side == node.left_hand_side: - reassigned = True - if reassigned and lattice.in_constraint(secondary, node): - assignment_list.append(node) - except AttributeError: - print(secondary) - print('EXCEPT' + secondary) - exit(0) + if ( + secondary.left_hand_side in node.right_hand_side_variables or + secondary.left_hand_side == node.left_hand_side + ): + assignment_list.append(node) def find_triggers( @@ -248,8 +237,10 @@ def build_sanitiser_node_dict( sanitiser_node_dict = dict() for sanitiser in sanitisers: - sanitiser_node_dict[sanitiser] = list(find_sanitiser_nodes(sanitiser, - sanitisers_in_file)) + sanitiser_node_dict[sanitiser] = list(find_sanitiser_nodes( + sanitiser, + sanitisers_in_file + )) return sanitiser_node_dict @@ -338,7 +329,7 @@ def how_vulnerable( for i, current_node in enumerate(chain): if current_node in sanitiser_nodes: vuln_deets['sanitiser'] = current_node - return Vulnerability.SANITISED + return VulnerabilityType.SANITISED if isinstance(current_node, BBorBInode): if current_node.func_name in blackbox_mapping['propagates']: @@ -423,22 +414,18 @@ def get_vulnerability( 'reassignment_nodes': source.secondary_nodes } - # Imma get this working and then see if there is a better way - # Maybe using blackbox_mapping.json, maybe not. sanitiser_nodes = set() if sink.sanitisers: - print(f'so sink.sanitisers is {sink.sanitisers}') for sanitiser in sink.sanitisers: - print(f'so triggers.sanitiser_dict[sanitiser] is {triggers.sanitiser_dict[sanitiser]}') for cfg_node in triggers.sanitiser_dict[sanitiser]: - sanitiser_nodes.append(cfg_node) + sanitiser_nodes.add(cfg_node) def_use = build_def_use_chain(cfg.nodes) for chain in get_vulnerability_chains( source.cfg_node, sink.cfg_node, def_use, - [source.cfg_node] + [] ): vulnerability_type = how_vulnerable( chain, diff --git a/pyt/vulnerability_helper.py b/pyt/vulnerability_helper.py index f5208757..7f10a735 100644 --- a/pyt/vulnerability_helper.py +++ b/pyt/vulnerability_helper.py @@ -80,9 +80,9 @@ def __str__(self): reassigned_str = Reassigned(self.reassignment_nodes) return ( 'File: {}\n' - ' > User input at line {}, trigger word "{}": \n' + ' > User input at line {}, trigger word "{}":\n' '\t {}{}\nFile: {}\n' - ' > reaches line {}, trigger word "{}": \n' + ' > reaches line {}, trigger word "{}":\n' '\t{}'.format( self.source.path, self.source.line_number, self.source_trigger_word, @@ -104,12 +104,17 @@ def __init__( source_trigger_word, sink, sink_trigger_word, - sanitiser, - reassignment_nodes + reassignment_nodes, + sanitiser ): """Set source, sink and sanitiser information.""" - super().__init__(source, source_trigger_word, - sink, sink_trigger_word, reassignment_nodes) + super().__init__( + source, + source_trigger_word, + sink, + sink_trigger_word, + reassignment_nodes + ) self.sanitiser = sanitiser def __str__(self): @@ -117,7 +122,7 @@ def __str__(self): return ( super().__str__() + '\nThis vulnerability is potentially sanitised by: ' + - self.sanitiser + str(self.sanitiser) ) @@ -132,12 +137,17 @@ def __init__( source_trigger_word, sink, sink_trigger_word, - unknown_assignment, - reassignment_nodes + reassignment_nodes, + unknown_assignment ): """Set source, sink and blackbox assignment information.""" - super().__init__(source, source_trigger_word, - sink, sink_trigger_word, reassignment_nodes) + super().__init__( + source, + source_trigger_word, + sink, + sink_trigger_word, + reassignment_nodes + ) self.unknown_assignment = unknown_assignment def __str__(self): diff --git a/tests/command_line_test.py b/tests/command_line_test.py index 6749677c..e3c6992c 100644 --- a/tests/command_line_test.py +++ b/tests/command_line_test.py @@ -24,10 +24,10 @@ def test_no_args(self): EXPECTED = """usage: python -m pyt [-h] (-f FILEPATH | -gr GIT_REPOS) [-pr PROJECT_ROOT] [-d] [-o OUTPUT_FILENAME] [-csv CSV_PATH] - [-p | -vp | -trim] [-t TRIGGER_WORD_FILE] [-py2] - [-l LOG_LEVEL] [-a ADAPTOR] [-db] - [-dl DRAW_LATTICE [DRAW_LATTICE ...]] [-li | -re | -rt] - [-intra] [-ppm] + [-p | -vp | -trim | -i] [-t TRIGGER_WORD_FILE] + [-b BLACKBOX_MAPPING_FILE] [-py2] [-l LOG_LEVEL] + [-a ADAPTOR] [-db] [-dl DRAW_LATTICE [DRAW_LATTICE ...]] + [-li | -re | -rt] [-intra] [-ppm] {save,github_search} ...\n""" + \ "python -m pyt: error: one of the arguments " + \ "-f/--filepath -gr/--git-repos is required\n" diff --git a/tests/vulnerabilities_across_files_test.py b/tests/vulnerabilities_across_files_test.py index 0c9a6153..e1fdab51 100644 --- a/tests/vulnerabilities_across_files_test.py +++ b/tests/vulnerabilities_across_files_test.py @@ -3,6 +3,12 @@ from .base_test_case import BaseTestCase from pyt import trigger_definitions_parser, vulnerabilities +from pyt.argument_helpers import ( + default_blackbox_mapping_file, + default_trigger_word_file, + UImode, + VulnerabilityFiles +) from pyt.ast_helper import get_call_names_as_string from pyt.base_cfg import Node from pyt.constraint_table import constraint_table, initialize_constraint_table @@ -31,7 +37,15 @@ def run_analysis(self, path): analyse(cfg_list, analysis_type=ReachingDefinitionsTaintAnalysis) - return vulnerabilities.find_vulnerabilities(cfg_list, ReachingDefinitionsTaintAnalysis) + return vulnerabilities.find_vulnerabilities( + cfg_list, + ReachingDefinitionsTaintAnalysis, + UImode.NORMAL, + VulnerabilityFiles( + default_blackbox_mapping_file, + default_trigger_word_file + ) + ) def test_find_vulnerabilities_absolute_from_file_command_injection(self): vulnerability_log = self.run_analysis('example/vulnerable_code_across_files/absolute_from_file_command_injection.py') diff --git a/tests/vulnerabilities_test.py b/tests/vulnerabilities_test.py index aeb39478..50154121 100644 --- a/tests/vulnerabilities_test.py +++ b/tests/vulnerabilities_test.py @@ -1,12 +1,28 @@ import os from .base_test_case import BaseTestCase -from pyt import trigger_definitions_parser, vulnerabilities + +from pyt import ( + trigger_definitions_parser, + vulnerabilities +) +from pyt.argument_helpers import ( + default_blackbox_mapping_file, + default_trigger_word_file, + UImode, + VulnerabilityFiles +) from pyt.base_cfg import Node -from pyt.constraint_table import constraint_table, initialize_constraint_table +from pyt.constraint_table import( + constraint_table, + initialize_constraint_table +) from pyt.fixed_point import analyse from pyt.framework_adaptor import FrameworkAdaptor -from pyt.framework_helper import is_django_view_function, is_flask_route_function +from pyt.framework_helper import( + is_django_view_function, + is_flask_route_function +) from pyt.lattice import Lattice from pyt.reaching_definitions_taint import ReachingDefinitionsTaintAnalysis @@ -20,7 +36,14 @@ def get_lattice_elements(self, cfg_nodes): return cfg_nodes def test_parse(self): - definitions = vulnerabilities.parse(trigger_word_file=os.path.join(os.getcwd(), 'pyt', 'trigger_definitions', 'test_triggers.pyt')) + definitions = vulnerabilities.parse( + trigger_word_file=os.path.join( + os.getcwd(), + 'pyt', + 'vulnerability_definitions', + 'test_triggers.pyt' + ) + ) self.assert_length(definitions.sources, expected_length=1) self.assert_length(definitions.sinks, expected_length=3) @@ -104,43 +127,43 @@ def test_build_sanitiser_node_dict(self): self.assertEqual(sanitiser_dict['escape'][0], cfg.nodes[3]) - def test_is_sanitised_false(self): - cfg_node_1 = Node('Not sanitising at all', None, line_number=None, path=None) - cfg_node_2 = Node('something.replace("this", "with this")', None, line_number=None, path=None) - sinks_in_file = [vulnerabilities.TriggerNode('replace', ['escape'], cfg_node_2)] - sanitiser_dict = {'escape': [cfg_node_1]} + # def test_is_sanitised_false(self): + # cfg_node_1 = Node('Not sanitising at all', None, line_number=None, path=None) + # cfg_node_2 = Node('something.replace("this", "with this")', None, line_number=None, path=None) + # sinks_in_file = [vulnerabilities.TriggerNode('replace', ['escape'], cfg_node_2)] + # sanitiser_dict = {'escape': [cfg_node_1]} - # We should use mock instead - orginal_get_lattice_elements = ReachingDefinitionsTaintAnalysis.get_lattice_elements - ReachingDefinitionsTaintAnalysis.get_lattice_elements = self.get_lattice_elements - lattice = Lattice([cfg_node_1, cfg_node_2], analysis_type=ReachingDefinitionsTaintAnalysis) + # # We should use mock instead + # orginal_get_lattice_elements = ReachingDefinitionsTaintAnalysis.get_lattice_elements + # ReachingDefinitionsTaintAnalysis.get_lattice_elements = self.get_lattice_elements + # lattice = Lattice([cfg_node_1, cfg_node_2], analysis_type=ReachingDefinitionsTaintAnalysis) - constraint_table[cfg_node_1] = 0b0 - constraint_table[cfg_node_2] = 0b0 + # constraint_table[cfg_node_1] = 0b0 + # constraint_table[cfg_node_2] = 0b0 - result = vulnerabilities.is_sanitised(sinks_in_file[0], sanitiser_dict, lattice) - self.assertEqual(result, False) - # Clean up - ReachingDefinitionsTaintAnalysis.get_lattice_elements = orginal_get_lattice_elements + # result = vulnerabilities.is_sanitised(sinks_in_file[0], sanitiser_dict, lattice) + # self.assertEqual(result, False) + # # Clean up + # ReachingDefinitionsTaintAnalysis.get_lattice_elements = orginal_get_lattice_elements - def test_is_sanitised_true(self): - cfg_node_1 = Node('Awesome sanitiser', None, line_number=None, path=None) - cfg_node_2 = Node('something.replace("this", "with this")', None, line_number=None, path=None) - sinks_in_file = [vulnerabilities.TriggerNode('replace', ['escape'], cfg_node_2)] - sanitiser_dict = {'escape': [cfg_node_1]} + # def test_is_sanitised_true(self): + # cfg_node_1 = Node('Awesome sanitiser', None, line_number=None, path=None) + # cfg_node_2 = Node('something.replace("this", "with this")', None, line_number=None, path=None) + # sinks_in_file = [vulnerabilities.TriggerNode('replace', ['escape'], cfg_node_2)] + # sanitiser_dict = {'escape': [cfg_node_1]} - # We should use mock instead - orginal_get_lattice_elements = ReachingDefinitionsTaintAnalysis.get_lattice_elements - ReachingDefinitionsTaintAnalysis.get_lattice_elements = self.get_lattice_elements + # # We should use mock instead + # orginal_get_lattice_elements = ReachingDefinitionsTaintAnalysis.get_lattice_elements + # ReachingDefinitionsTaintAnalysis.get_lattice_elements = self.get_lattice_elements - lattice = Lattice([cfg_node_1, cfg_node_2], analysis_type=ReachingDefinitionsTaintAnalysis) - constraint_table[cfg_node_2] = 0b1 + # lattice = Lattice([cfg_node_1, cfg_node_2], analysis_type=ReachingDefinitionsTaintAnalysis) + # constraint_table[cfg_node_2] = 0b1 - result = vulnerabilities.is_sanitised(sinks_in_file[0], sanitiser_dict, lattice) - self.assertEqual(result, True) - # Clean up - ReachingDefinitionsTaintAnalysis.get_lattice_elements = orginal_get_lattice_elements + # result = vulnerabilities.is_sanitised(sinks_in_file[0], sanitiser_dict, lattice) + # self.assertEqual(result, True) + # # Clean up + # ReachingDefinitionsTaintAnalysis.get_lattice_elements = orginal_get_lattice_elements def run_analysis(self, path): self.cfg_create_from_file(path) @@ -151,7 +174,16 @@ def run_analysis(self, path): analyse(cfg_list, analysis_type=ReachingDefinitionsTaintAnalysis) - return vulnerabilities.find_vulnerabilities(cfg_list, ReachingDefinitionsTaintAnalysis) + return vulnerabilities.find_vulnerabilities( + cfg_list, + ReachingDefinitionsTaintAnalysis, + UImode.NORMAL, + VulnerabilityFiles( + default_blackbox_mapping_file, + default_trigger_word_file + ) + ) + def test_find_vulnerabilities_assign_other_var(self): vulnerability_log = self.run_analysis('example/vulnerable_code/XSS_assign_to_other_var.py') @@ -252,10 +284,12 @@ def test_path_traversal_sanitised_result(self): EXPECTED_VULNERABILITY_DESCRIPTION = """ File: example/vulnerable_code/path_traversal_sanitised.py > User input at line 8, trigger word "request.args.get(": - ¤call_1 = ret_request.args.get('image_name') + ¤call_1 = ret_request.args.get('image_name') Reassigned in: File: example/vulnerable_code/path_traversal_sanitised.py > Line 8: image_name = ¤call_1 + File: example/vulnerable_code/path_traversal_sanitised.py + > Line 10: ¤call_2 = ret_image_name.replace('..', '') File: example/vulnerable_code/path_traversal_sanitised.py > Line 10: image_name = ¤call_2 File: example/vulnerable_code/path_traversal_sanitised.py @@ -265,7 +299,7 @@ def test_path_traversal_sanitised_result(self): File: example/vulnerable_code/path_traversal_sanitised.py > reaches line 12, trigger word "send_file(": ¤call_3 = ret_send_file(¤call_4) - This vulnerability is potentially sanitised by: ["'..'", "'..' in"] + This vulnerability is potentially sanitised by: Label: ¤call_2 = ret_image_name.replace('..', '') """ self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) @@ -373,7 +407,7 @@ def test_XSS_sanitised_result(self): EXPECTED_VULNERABILITY_DESCRIPTION = """ File: example/vulnerable_code/XSS_sanitised.py > User input at line 7, trigger word "request.args.get(": - ¤call_1 = ret_request.args.get('param', 'not set') + ¤call_1 = ret_request.args.get('param', 'not set') Reassigned in: File: example/vulnerable_code/XSS_sanitised.py > Line 7: param = ¤call_1 @@ -388,9 +422,9 @@ def test_XSS_sanitised_result(self): File: example/vulnerable_code/XSS_sanitised.py > Line 13: ret_XSS1 = resp File: example/vulnerable_code/XSS_sanitised.py - > reaches line 12, trigger word "replace(": + > reaches line 12, trigger word "replace(": ¤call_5 = ret_html.replace('{{ param }}', param) - This vulnerability is potentially sanitised by: ['escape'] + This vulnerability is potentially sanitised by: Label: ¤call_2 = ret_Markup.escape(param) """ self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) @@ -469,9 +503,21 @@ def run_analysis(self, path): analyse(cfg_list, analysis_type=ReachingDefinitionsTaintAnalysis) - trigger_word_file = os.path.join('pyt', 'trigger_definitions', 'django_trigger_words.pyt') - - return vulnerabilities.find_vulnerabilities(cfg_list, ReachingDefinitionsTaintAnalysis, trigger_word_file=trigger_word_file) + trigger_word_file = os.path.join( + 'pyt', + 'vulnerability_definitions', + 'django_trigger_words.pyt' + ) + + return vulnerabilities.find_vulnerabilities( + cfg_list, + ReachingDefinitionsTaintAnalysis, + UImode.NORMAL, + VulnerabilityFiles( + default_blackbox_mapping_file, + trigger_word_file + ) + ) def test_django_view_param(self): vulnerability_log = self.run_analysis('example/vulnerable_code/django_XSS.py') From a5be8be7085ce78ebe77cd7e458dc5d4ffc81e98 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Thu, 15 Mar 2018 21:15:24 -0700 Subject: [PATCH 11/20] Mop up reasoning for .args comment, Scour passing in an empty list when we can default arg, Wipe old is_sanitised tests --- pyt/base_cfg.py | 2 +- pyt/vulnerabilities.py | 5 ++--- tests/vulnerabilities_test.py | 38 ----------------------------------- 3 files changed, 3 insertions(+), 42 deletions(-) diff --git a/pyt/base_cfg.py b/pyt/base_cfg.py index 99bca7ce..d8bec040 100644 --- a/pyt/base_cfg.py +++ b/pyt/base_cfg.py @@ -880,7 +880,7 @@ def add_blackbox_or_builtin_call(self, node, blackbox): call_node.label = LHS + " = " + RHS call_node.right_hand_side_variables = rhs_vars - # Used in get_sink_args, not using right_hand_side_variables because it is extended in AssignmentCallNode + # Used in get_sink_args, not using right_hand_side_variables because it is extended in assignment_call_node rhs_visitor = RHSVisitor() rhs_visitor.visit(node) call_node.args = rhs_visitor.result diff --git a/pyt/vulnerabilities.py b/pyt/vulnerabilities.py index 9ec66b28..380d57d6 100644 --- a/pyt/vulnerabilities.py +++ b/pyt/vulnerabilities.py @@ -281,7 +281,7 @@ def get_vulnerability_chains( current_node, sink, def_use, - chain + chain=[] ): """Traverses the def-use graph to find all paths from source to sink that cause a vulnerability. @@ -424,8 +424,7 @@ def get_vulnerability( for chain in get_vulnerability_chains( source.cfg_node, sink.cfg_node, - def_use, - [] + def_use ): vulnerability_type = how_vulnerable( chain, diff --git a/tests/vulnerabilities_test.py b/tests/vulnerabilities_test.py index 50154121..3e4b7e73 100644 --- a/tests/vulnerabilities_test.py +++ b/tests/vulnerabilities_test.py @@ -127,44 +127,6 @@ def test_build_sanitiser_node_dict(self): self.assertEqual(sanitiser_dict['escape'][0], cfg.nodes[3]) - # def test_is_sanitised_false(self): - # cfg_node_1 = Node('Not sanitising at all', None, line_number=None, path=None) - # cfg_node_2 = Node('something.replace("this", "with this")', None, line_number=None, path=None) - # sinks_in_file = [vulnerabilities.TriggerNode('replace', ['escape'], cfg_node_2)] - # sanitiser_dict = {'escape': [cfg_node_1]} - - # # We should use mock instead - # orginal_get_lattice_elements = ReachingDefinitionsTaintAnalysis.get_lattice_elements - # ReachingDefinitionsTaintAnalysis.get_lattice_elements = self.get_lattice_elements - # lattice = Lattice([cfg_node_1, cfg_node_2], analysis_type=ReachingDefinitionsTaintAnalysis) - - # constraint_table[cfg_node_1] = 0b0 - # constraint_table[cfg_node_2] = 0b0 - - # result = vulnerabilities.is_sanitised(sinks_in_file[0], sanitiser_dict, lattice) - # self.assertEqual(result, False) - # # Clean up - # ReachingDefinitionsTaintAnalysis.get_lattice_elements = orginal_get_lattice_elements - - - # def test_is_sanitised_true(self): - # cfg_node_1 = Node('Awesome sanitiser', None, line_number=None, path=None) - # cfg_node_2 = Node('something.replace("this", "with this")', None, line_number=None, path=None) - # sinks_in_file = [vulnerabilities.TriggerNode('replace', ['escape'], cfg_node_2)] - # sanitiser_dict = {'escape': [cfg_node_1]} - - # # We should use mock instead - # orginal_get_lattice_elements = ReachingDefinitionsTaintAnalysis.get_lattice_elements - # ReachingDefinitionsTaintAnalysis.get_lattice_elements = self.get_lattice_elements - - # lattice = Lattice([cfg_node_1, cfg_node_2], analysis_type=ReachingDefinitionsTaintAnalysis) - # constraint_table[cfg_node_2] = 0b1 - - # result = vulnerabilities.is_sanitised(sinks_in_file[0], sanitiser_dict, lattice) - # self.assertEqual(result, True) - # # Clean up - # ReachingDefinitionsTaintAnalysis.get_lattice_elements = orginal_get_lattice_elements - def run_analysis(self, path): self.cfg_create_from_file(path) cfg_list = [self.cfg] From 84a25f2a58a4bb21d542bc09b760515bcbc5d3cf Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Wed, 21 Mar 2018 19:48:16 -0700 Subject: [PATCH 12/20] Get path_traversal_sanitised_2 to work, remove a bad connect in save_def_args_in_temp, make all tests pass, change double quotes to single quotes, cleanup stmt_star_handler/connect_if_allowed/comments/docstrings --- example/vulnerable_code/ensure_saved_scope.py | 25 +++ .../path_traversal_sanitised_2.py | 2 +- pyt/argument_helpers.py | 2 +- pyt/base_cfg.py | 66 +++--- pyt/definition_chains.py | 1 - pyt/interprocedural_cfg.py | 110 +++++---- pyt/intraprocedural_cfg.py | 12 +- pyt/vulnerabilities.py | 46 ++-- .../blackbox_mapping.json | 1 + pyt/vulnerability_helper.py | 10 +- tests/cfg_test.py | 212 +++++++++--------- tests/vulnerabilities_test.py | 97 ++++++-- 12 files changed, 350 insertions(+), 234 deletions(-) create mode 100644 example/vulnerable_code/ensure_saved_scope.py diff --git a/example/vulnerable_code/ensure_saved_scope.py b/example/vulnerable_code/ensure_saved_scope.py new file mode 100644 index 00000000..772a1600 --- /dev/null +++ b/example/vulnerable_code/ensure_saved_scope.py @@ -0,0 +1,25 @@ +import os +from flask import Flask, request, send_file + +app = Flask(__name__) + +def outer(outer_arg, other_arg): + outer_ret_val = outer_arg + 'hey' + other_arg + return outer_ret_val + +def inner(): + return 'boom' + +@app.route('/') +def cat_picture(): + image_name = request.args.get('image_name') + if not image_name: + image_name = 'foo' + return 404 + foo = outer(inner(), image_name) # Nested call after if caused the problem + send_file(image_name) + return 'idk' + + +if __name__ == '__main__': + app.run(debug=True) diff --git a/example/vulnerable_code/path_traversal_sanitised_2.py b/example/vulnerable_code/path_traversal_sanitised_2.py index 2c85840f..afe1ac11 100644 --- a/example/vulnerable_code/path_traversal_sanitised_2.py +++ b/example/vulnerable_code/path_traversal_sanitised_2.py @@ -7,7 +7,7 @@ def cat_picture(): image_name = request.args.get('image_name') - if not '..' in image_name: + if '..' in image_name: return 404 return send_file(os.path.join(os.getcwd(), image_name)) diff --git a/pyt/argument_helpers.py b/pyt/argument_helpers.py index a020431e..05b4542c 100644 --- a/pyt/argument_helpers.py +++ b/pyt/argument_helpers.py @@ -35,7 +35,7 @@ class UImode(Enum): VulnerabilityFiles = namedtuple( - 'VulnerabilityFiles', + 'VulnerabilityFiles', [ 'blackbox_mapping', 'triggers' diff --git a/pyt/base_cfg.py b/pyt/base_cfg.py index d8bec040..67c1d085 100644 --- a/pyt/base_cfg.py +++ b/pyt/base_cfg.py @@ -323,20 +323,21 @@ def get_inner_most_function_call(self, call_node): call_node = call_node.inner_most_call else: try: - call_node = call_node.first_node.inner_most_call + # e.g. save_2_blah, even when there is a save_3_blah + call_node = call_node.first_node except AttributeError: - try: - call_node = call_node.first_node - except AttributeError: - # No inner calls - # Possible improvement: Make new node for RestoreNode's made in process_function - # and make `self.inner_most_call = self` - pass + # No inner calls + # Possible improvement: Make new node for RestoreNode's made in process_function + # and make `self.inner_most_call = self` + # So that we can duck type and not catch an exception when there are no inner calls. + # This is what we do in BBorBInode + pass + return call_node def connect_control_flow_node(self, control_flow_node, next_node): """Connect a ControlFlowNode properly to the next_node.""" - for last in control_flow_node[1]: # list of last nodes in ifs and elifs + for last in control_flow_node.last_nodes: if isinstance(next_node, ControlFlowNode): last.connect(next_node.test) # connect to next if test case elif isinstance(next_node, AssignmentCallNode): @@ -349,10 +350,10 @@ def connect_control_flow_node(self, control_flow_node, next_node): def connect_nodes(self, nodes): """Connect the nodes in a list linearly.""" for n, next_node in zip(nodes, nodes[1:]): - if isinstance(n, ControlFlowNode): # case for if + if isinstance(n, ControlFlowNode): self.connect_control_flow_node(n, next_node) - elif isinstance(next_node, ControlFlowNode): # case for if - n.connect(next_node[0]) + elif isinstance(next_node, ControlFlowNode): + n.connect(next_node.test) elif isinstance(next_node, RestoreNode): continue elif CALL_IDENTIFIER in next_node.label: @@ -375,18 +376,19 @@ def stmt_star_handler(self, stmts, prev_node_to_avoid=None): break_nodes = list() cfg_statements = list() - if prev_node_to_avoid: - self.prev_nodes_to_avoid.append(prev_node_to_avoid) + self.prev_nodes_to_avoid.append(prev_node_to_avoid) + self.last_control_flow_nodes.append(None) first_node = None node_not_to_step_past = self.nodes[-1] for stmt in stmts: node = self.visit(stmt) - if isinstance(stmt, ast.While) or isinstance(stmt, ast.For): - self.last_was_loop_stack.append(True) + + if isinstance(node, ControlFlowNode) and node.test.label != 'Try': + self.last_control_flow_nodes.append(node.test) else: - self.last_was_loop_stack.append(False) + self.last_control_flow_nodes.append(None) if isinstance(node, ControlFlowNode): break_nodes.extend(node.break_statements) @@ -419,9 +421,9 @@ def stmt_star_handler(self, stmts, prev_node_to_avoid=None): else: first_node = node cfg_statements.append(node) - if prev_node_to_avoid: - self.prev_nodes_to_avoid.pop() - self.last_was_loop_stack.pop() + + self.prev_nodes_to_avoid.pop() + self.last_control_flow_nodes.pop() self.connect_nodes(cfg_statements) @@ -453,6 +455,10 @@ def add_elif_label(self, CFG_node): def handle_or_else(self, orelse, test): """Handle the orelse part of an if or try node. + Args: + orelse(list[Node]) + test(Node) + Returns: The last nodes of the orelse branch. """ @@ -462,7 +468,10 @@ def handle_or_else(self, orelse, test): test.connect(control_flow_node.test) return control_flow_node.last_nodes else: - else_connect_statements = self.stmt_star_handler(orelse, prev_node_to_avoid=self.nodes[-1]) + else_connect_statements = self.stmt_star_handler( + orelse, + prev_node_to_avoid=self.nodes[-1] + ) test.connect(else_connect_statements.first_statement) return else_connect_statements.last_statements @@ -721,7 +730,10 @@ def visit_AugAssign(self, node): def loop_node_skeleton(self, test, node): """Common handling of looped structures, while and for.""" - body_connect_stmts = self.stmt_star_handler(node.body, prev_node_to_avoid=self.nodes[-1]) + body_connect_stmts = self.stmt_star_handler( + node.body, + prev_node_to_avoid=self.nodes[-1] + ) test.connect(body_connect_stmts.first_statement) test.connect_predecessors(body_connect_stmts.last_statements) @@ -732,7 +744,10 @@ def loop_node_skeleton(self, test, node): last_nodes.extend(body_connect_stmts.break_statements) if node.orelse: - orelse_connect_stmts = self.stmt_star_handler(node.orelse, prev_node_to_avoid=self.nodes[-1]) + orelse_connect_stmts = self.stmt_star_handler( + node.orelse, + prev_node_to_avoid=self.nodes[-1] + ) test.connect(orelse_connect_stmts.first_statement) last_nodes.extend(orelse_connect_stmts.last_statements) @@ -816,16 +831,13 @@ def add_blackbox_or_builtin_call(self, node, blackbox): call_label.visit(node) index = call_label.result.find('(') - if index == -1: - print("No ( in a call") - raise # Create e.g. ¤call_1 = ret_func_foo LHS = CALL_IDENTIFIER + 'call_' + str(saved_function_call_index) RHS = 'ret_' + call_label.result[:index] + '(' call_node = BBorBInode( - label="", + label='', left_hand_side=LHS, right_hand_side_variables=[], line_number=node.lineno, diff --git a/pyt/definition_chains.py b/pyt/definition_chains.py index e61c3c01..81eee885 100644 --- a/pyt/definition_chains.py +++ b/pyt/definition_chains.py @@ -41,7 +41,6 @@ def get_constraint_nodes(node, lattice): yield n -# TKTK: Clean up use-def too def build_use_def_chain(cfg_nodes): use_def = dict() lattice = Lattice(cfg_nodes, ReachingDefinitionsAnalysis) diff --git a/pyt/interprocedural_cfg.py b/pyt/interprocedural_cfg.py index 003eb3e5..23ff1404 100644 --- a/pyt/interprocedural_cfg.py +++ b/pyt/interprocedural_cfg.py @@ -80,7 +80,7 @@ def __init__(self, node, project_modules, local_modules, self.function_return_stack = list() self.module_definitions_stack = list() self.prev_nodes_to_avoid = list() - self.last_was_loop_stack = list() + self.last_control_flow_nodes = list() # Are we already in a module? if module_definitions: @@ -91,7 +91,7 @@ def __init__(self, node, project_modules, local_modules, def init_cfg(self, node): self.module_definitions_stack.append(ModuleDefinitions(filename=self.filenames[-1])) - entry_node = self.append_node(EntryOrExitNode("Entry module")) + entry_node = self.append_node(EntryOrExitNode('Entry module')) module_statements = self.visit(node) @@ -105,12 +105,12 @@ def init_cfg(self, node): if CALL_IDENTIFIER not in first_node.label: entry_node.connect(first_node) - exit_node = self.append_node(EntryOrExitNode("Exit module")) + exit_node = self.append_node(EntryOrExitNode('Exit module')) last_nodes = module_statements.last_statements exit_node.connect_predecessors(last_nodes) else: - exit_node = self.append_node(EntryOrExitNode("Exit module")) + exit_node = self.append_node(EntryOrExitNode('Exit module')) entry_node.connect(exit_node) def init_function_cfg(self, node, module_definitions): @@ -119,7 +119,7 @@ def init_function_cfg(self, node, module_definitions): self.function_names.append(node.name) self.function_return_stack.append(node.name) - entry_node = self.append_node(EntryOrExitNode("Entry function")) + entry_node = self.append_node(EntryOrExitNode('Entry function')) module_statements = self.stmt_star_handler(node.body) @@ -128,7 +128,7 @@ def init_function_cfg(self, node, module_definitions): if CALL_IDENTIFIER not in first_node.label: entry_node.connect(first_node) - exit_node = self.append_node(EntryOrExitNode("Exit function")) + exit_node = self.append_node(EntryOrExitNode('Exit function')) last_nodes = module_statements.last_statements exit_node.connect_predecessors(last_nodes) @@ -302,38 +302,37 @@ def save_local_scope(self, line_number, saved_function_call_index): return (saved_variables, first_node) def connect_if_allowed(self, previous_node, node_to_connect_to): - try: - # Do not connect if last statement was a loop e.g. - # while x != 10: - # if x > 0: - # print(x) - # break - # else: - # print('hest') - # print('next') # self.nodes[-1] is print('hest') - if self.last_was_loop_stack[-1]: - return - except IndexError: - pass - try: - if previous_node is not self.prev_nodes_to_avoid[-1]: - previous_node.connect(node_to_connect_to) - except IndexError: - # If there are no prev_nodes_to_avoid, we just connect safely. - # Except in this case: - # - # if not image_name: - # return 404 - # print('foo') # We do not want to connect this line with `return 404` - if not isinstance(previous_node, ReturnNode): - previous_node.connect(node_to_connect_to) - - def save_def_args_in_temp(self, - call_args, - def_args, - line_number, - saved_function_call_index, - first_node): + # e.g. + # while x != 10: + # if x > 0: + # print(x) + # break + # else: + # print('hest') + # print('next') # self.nodes[-1] is print('hest') + # + # So we connect to `while x!= 10` instead + if self.last_control_flow_nodes[-1]: + self.last_control_flow_nodes[-1].connect(node_to_connect_to) + self.last_control_flow_nodes[-1] = None + return + + # Except in this case: + # + # if not image_name: + # return 404 + # print('foo') # We do not want to connect this line with `return 404` + if previous_node is not self.prev_nodes_to_avoid[-1] and not isinstance(previous_node, ReturnNode): + previous_node.connect(node_to_connect_to) + + def save_def_args_in_temp( + self, + call_args, + def_args, + line_number, + saved_function_call_index, + first_node + ): """Save the arguments of the definition being called. Visit the arguments if they're calls. Args: @@ -407,18 +406,15 @@ def save_def_args_in_temp(self, else: args_mapping[def_args[i]] = call_arg_label_visitor.result - # After args loop - if last_return_value_of_nested_call: - # connect other_inner to outer in e.g. `outer(inner(image_name), other_inner(image_name))` - last_return_value_of_nested_call.connect(first_node) - return (args_mapping, first_node) - def create_local_scope_from_def_args(self, - call_args, - def_args, - line_number, - saved_function_call_index): + def create_local_scope_from_def_args( + self, + call_args, + def_args, + line_number, + saved_function_call_index + ): """Create the local scope before entering the body of a function call. Args: @@ -445,10 +441,12 @@ def create_local_scope_from_def_args(self, self.nodes[-1].connect(local_scope_node) self.nodes.append(local_scope_node) - def restore_saved_local_scope(self, - saved_variables, - args_mapping, - line_number): + def restore_saved_local_scope( + self, + saved_variables, + args_mapping, + line_number + ): """Restore the previously saved variables to their original values. Args: @@ -603,7 +601,7 @@ def visit_and_get_function_nodes(self, definition, first_node): """ len_before_visiting_func = len(self.nodes) previous_node = self.nodes[-1] - entry_node = self.append_node(EntryOrExitNode("Function Entry " + + entry_node = self.append_node(EntryOrExitNode('Function Entry ' + definition.name)) if not first_node: first_node = entry_node @@ -612,7 +610,7 @@ def visit_and_get_function_nodes(self, definition, first_node): function_body_connect_statements = self.stmt_star_handler(definition.node.body) entry_node.connect(function_body_connect_statements.first_statement) - exit_node = self.append_node(EntryOrExitNode("Exit " + definition.name)) + exit_node = self.append_node(EntryOrExitNode('Exit ' + definition.name)) exit_node.connect_predecessors(function_body_connect_statements.last_statements) the_new_nodes = self.nodes[len_before_visiting_func:] @@ -790,7 +788,7 @@ def from_directory_import(self, module, real_names, local_names, import_alias_ma from_fdid=True ) else: - raise Exception("from anything import directory needs an __init__.py file in directory") + raise Exception('from anything import directory needs an __init__.py file in directory') else: file_module = (real_name, full_name + '.py') self.add_module( @@ -814,7 +812,7 @@ def import_package(self, module, module_name, local_name, import_alias_mapping): is_init=True ) else: - raise Exception("import directory needs an __init__.py file") + raise Exception('import directory needs an __init__.py file') def visit_Import(self, node): for name in node.names: diff --git a/pyt/intraprocedural_cfg.py b/pyt/intraprocedural_cfg.py index 5c990f23..1fe175b7 100644 --- a/pyt/intraprocedural_cfg.py +++ b/pyt/intraprocedural_cfg.py @@ -32,7 +32,7 @@ def __init__(self, node, filename): self.init_module_cfg(node) def init_module_cfg(self, node): - entry_node = self.append_node(EntryOrExitNode("Entry module")) + entry_node = self.append_node(EntryOrExitNode('Entry module')) module_statements = self.visit(node) @@ -46,21 +46,21 @@ def init_module_cfg(self, node): if CALL_IDENTIFIER not in first_node.label: entry_node.connect(first_node) - exit_node = self.append_node(EntryOrExitNode("Exit module")) + exit_node = self.append_node(EntryOrExitNode('Exit module')) last_nodes = module_statements.last_statements exit_node.connect_predecessors(last_nodes) else: - exit_node = self.append_node(EntryOrExitNode("Exit module")) + exit_node = self.append_node(EntryOrExitNode('Exit module')) entry_node.connect(exit_node) def init_function_cfg(self, node): - entry_node = self.append_node(EntryOrExitNode("Entry module")) + entry_node = self.append_node(EntryOrExitNode('Entry module')) module_statements = self.stmt_star_handler(node.body) if isinstance(module_statements, IgnoredNode): - exit_node = self.append_node(EntryOrExitNode("Exit module")) + exit_node = self.append_node(EntryOrExitNode('Exit module')) entry_node.connect(exit_node) return @@ -68,7 +68,7 @@ def init_function_cfg(self, node): if CALL_IDENTIFIER not in first_node.label: entry_node.connect(first_node) - exit_node = self.append_node(EntryOrExitNode("Exit module")) + exit_node = self.append_node(EntryOrExitNode('Exit module')) last_nodes = module_statements.last_statements exit_node.connect_predecessors(last_nodes) diff --git a/pyt/vulnerabilities.py b/pyt/vulnerabilities.py index 380d57d6..d0640c53 100644 --- a/pyt/vulnerabilities.py +++ b/pyt/vulnerabilities.py @@ -122,7 +122,7 @@ def find_secondary_sources( Args: assignment_nodes([AssignmentNode]) sources([tuple]) - lattice(Lattice): The lattice we're analysing. + lattice(Lattice): the lattice we're analysing. """ for source in sources: source.secondary_nodes = find_assignments(assignment_nodes, source, lattice) @@ -134,15 +134,16 @@ def find_assignments( lattice ): old = list() - - # added in order to propagate reassignments of the source node + # propagate reassignments of the source node new = [source.cfg_node] - update_assignments(new, assignment_nodes, source.cfg_node, lattice) while new != old: - old = new update_assignments(new, assignment_nodes, source.cfg_node, lattice) - new.remove(source.cfg_node) # remove source node from result + old = new + + # remove source node from result + del new[0] + return new @@ -289,7 +290,7 @@ def get_vulnerability_chains( current_node() sink() def_use(dict): - chain(list(Node)): + chain(list(Node)): A path of nodes between source to sink. """ for use in def_use[current_node]: if use == sink: @@ -309,16 +310,21 @@ def how_vulnerable( chain, blackbox_mapping, sanitiser_nodes, + potential_sanitiser, blackbox_assignments, ui_mode, vuln_deets ): """Iterates through the chain of nodes and checks the blackbox nodes against the blackbox mapping and sanitiser dictionary. + Note: potential_sanitiser is the only hack here, it is because we do not take p-use's into account yet. + e.g. we can only say potentially instead of definitely sanitised in the path_traversal_sanitised_2.py test. + Args: - chain(list(Node)): TODO - blackbox_mapping(dict): - sanitiser_nodes(set): + chain(list(Node)): A path of nodes between source to sink. + blackbox_mapping(dict): A map of blackbox functions containing whether or not they propagate taint. + sanitiser_nodes(set): A set of nodes that are sanitisers for the sink. + potential_sanitiser(Node): An if or elif node that can potentially cause sanitisation. blackbox_assignments(set[AssignmentNode]): set of blackbox assignments, includes the ReturnNode's of BBorBInode's. ui_mode(UImode): determines if we interact with the user when we don't already have a blackbox mapping available. vuln_deets(dict): vulnerability details. @@ -329,6 +335,7 @@ def how_vulnerable( for i, current_node in enumerate(chain): if current_node in sanitiser_nodes: vuln_deets['sanitiser'] = current_node + vuln_deets['definite'] = True return VulnerabilityType.SANITISED if isinstance(current_node, BBorBInode): @@ -350,6 +357,12 @@ def how_vulnerable( else: vuln_deets['unknown_assignment'] = current_node return VulnerabilityType.UNKNOWN + + if potential_sanitiser: + vuln_deets['sanitiser'] = potential_sanitiser + vuln_deets['definite'] = False + return VulnerabilityType.SANITISED + return VulnerabilityType.TRUE @@ -386,10 +399,10 @@ def get_vulnerability( source(TriggerNode): TriggerNode of the source. sink(TriggerNode): TriggerNode of the sink. triggers(Triggers): Triggers of the CFG. - lattice(Lattice): The lattice we're analysing. + lattice(Lattice): the lattice we're analysing. cfg(CFG): .blackbox_assignments used in is_unknown, .nodes used in build_def_use_chain ui_mode(UImode): determines if we interact with the user or trim the nodes in the output, if at all. - blackbox_mapping(dict): TODO + blackbox_mapping(dict): A map of blackbox functions containing whether or not they propagate taint. Returns: A Vulnerability if it exists, else None @@ -415,10 +428,13 @@ def get_vulnerability( } sanitiser_nodes = set() + potential_sanitiser = None if sink.sanitisers: for sanitiser in sink.sanitisers: for cfg_node in triggers.sanitiser_dict[sanitiser]: sanitiser_nodes.add(cfg_node) + if 'if' in cfg_node.label and cfg_node.label.endswith(':'): + potential_sanitiser = cfg_node def_use = build_def_use_chain(cfg.nodes) for chain in get_vulnerability_chains( @@ -430,12 +446,14 @@ def get_vulnerability( chain, blackbox_mapping, sanitiser_nodes, + potential_sanitiser, cfg.blackbox_assignments, ui_mode, vuln_deets ) if vulnerability_type == VulnerabilityType.FALSE: continue + if ui_mode != UImode.NORMAL: vuln_deets['reassignment_nodes'] = chain @@ -458,9 +476,9 @@ def find_vulnerabilities_in_cfg( cfg(CFG): The CFG to find vulnerabilities in. vulnerability_log(vulnerability_log.VulnerabilityLog): The log in which to place found vulnerabilities. definitions(trigger_definitions_parser.Definitions): Source and sink definitions. - lattice(Lattice): The lattice we're analysing. + lattice(Lattice): the lattice we're analysing. ui_mode(UImode): determines if we interact with the user or trim the nodes in the output, if at all. - blackbox_mapping(dict): TODO + blackbox_mapping(dict): A map of blackbox functions containing whether or not they propagate taint. """ triggers = identify_triggers( cfg, diff --git a/pyt/vulnerability_definitions/blackbox_mapping.json b/pyt/vulnerability_definitions/blackbox_mapping.json index 96c749fd..fbb229e3 100644 --- a/pyt/vulnerability_definitions/blackbox_mapping.json +++ b/pyt/vulnerability_definitions/blackbox_mapping.json @@ -4,6 +4,7 @@ "url_for" ], "propagates": [ + "os.path.join", "graham", "minnesota_fats" ] diff --git a/pyt/vulnerability_helper.py b/pyt/vulnerability_helper.py index 7f10a735..0e26c03e 100644 --- a/pyt/vulnerability_helper.py +++ b/pyt/vulnerability_helper.py @@ -39,7 +39,7 @@ def __init__(self, reassignment_nodes): def __str__(self): reassignment = '' if self.reassignment_nodes: - reassignment += '\nReassigned in: \n\t' + reassignment += '\nReassigned in:\n\t' reassignment += '\n\t'.join([ 'File: ' + node.path + '\n\t' + ' > Line ' + str(node.line_number) + ': ' + @@ -105,7 +105,8 @@ def __init__( sink, sink_trigger_word, reassignment_nodes, - sanitiser + sanitiser, + definite ): """Set source, sink and sanitiser information.""" super().__init__( @@ -116,12 +117,15 @@ def __init__( reassignment_nodes ) self.sanitiser = sanitiser + self.definite = definite def __str__(self): """Pretty printing of a vulnerability.""" return ( super().__str__() + - '\nThis vulnerability is potentially sanitised by: ' + + '\nThis vulnerability is ' + + ('' if self.definite else 'potentially ') + + 'sanitised by: ' + str(self.sanitiser) ) diff --git a/tests/cfg_test.py b/tests/cfg_test.py index 08edb251..9459c88e 100644 --- a/tests/cfg_test.py +++ b/tests/cfg_test.py @@ -788,16 +788,17 @@ def test_blackbox_call_after_if(self): ret_send_file = 7 _exit = 8 - self.assertInCfg([(ret_request, entry), - (image_name_equals_call_1, ret_request), - (_if, image_name_equals_call_1), - (image_name_equals_foo, _if), - (blackbox_call, _if), - (blackbox_call, image_name_equals_foo), - (foo_equals_call_2, blackbox_call), - (ret_send_file, foo_equals_call_2), - (_exit, ret_send_file) - ]) + self.assertInCfg([ + (ret_request, entry), + (image_name_equals_call_1, ret_request), + (_if, image_name_equals_call_1), + (image_name_equals_foo, _if), + (blackbox_call, _if), + (blackbox_call, image_name_equals_foo), + (foo_equals_call_2, blackbox_call), + (ret_send_file, foo_equals_call_2), + (_exit, ret_send_file) + ]) def test_multiple_nested_user_defined_calls_after_if(self): path = 'example/vulnerable_code/multiple_nested_user_defined_calls_after_if.py' @@ -850,53 +851,50 @@ def test_multiple_nested_user_defined_calls_after_if(self): ret_send_file = 37 _exit = 38 - self.assertInCfg([(ret_request, entry), - (image_name_equals_call_1, ret_request), - (_if, image_name_equals_call_1), - (image_name_equals_foo, _if), - # (call_2_equals_ret_outer, _if), ## (Before) NO NO NO - # (call_2_equals_ret_outer, image_name_equals_foo), ## (Before) NO NO NO - (save_3_image_name, _if), ## (After) Aww yeah, feels so good - (save_3_image_name, image_name_equals_foo), ## (After) Aww yeah, feels so good - (save_2_image_name, image_name_equals_foo), - - (save_3_image_name, save_2_image_name), - (temp_3_first_inner_arg, save_3_image_name), - (inner_arg_equals_temp_3, temp_3_first_inner_arg), - (function_entry_first_inner, inner_arg_equals_temp_3), - (first_inner_ret_val_assign_1st, function_entry_first_inner), - (ret_first_inner, first_inner_ret_val_assign_1st), - (function_exit_first_inner, ret_first_inner), - (image_name_equals_first_inner_arg, function_exit_first_inner), - (call_3_equals_ret_first_inner, image_name_equals_first_inner_arg), - (save_4_image_name, call_3_equals_ret_first_inner), - (temp_2_first_arg_equals_call_3, call_3_equals_ret_first_inner), - (save_4_image_name, temp_2_first_arg_equals_call_3), - (save_4_inner_ret_val, save_4_image_name), - (temp_4_inner_arg, save_4_inner_ret_val), - (inner_arg_equals_temp_4, temp_4_inner_arg), - (function_entry_second_inner, inner_arg_equals_temp_4), - (second_inner_ret_val_assign_2nd, function_entry_second_inner), - (ret_second_inner, second_inner_ret_val_assign_2nd), - (function_exit_second_inner, ret_second_inner), - (image_name_equals_second_inner_arg, function_exit_second_inner), - (first_inner_ret_val_equals_save_4, image_name_equals_second_inner_arg), - (call_4_equals_ret_second_inner, first_inner_ret_val_equals_save_4), - (save_2_image_name, call_4_equals_ret_second_inner), - (temp_2_second_arg_equals_call_4, call_4_equals_ret_second_inner), - (first_arg_equals_temp, temp_2_second_arg_equals_call_4), - (second_arg_equals_temp, first_arg_equals_temp), - (function_entry_outer, second_arg_equals_temp), - (outer_ret_val_assignment, function_entry_outer), - (ret_outer, outer_ret_val_assignment), - (function_exit_outer, ret_outer), - (image_name_restore, function_exit_outer), - (call_2_equals_ret_outer, image_name_restore), - - (foo_equals_call_2, call_2_equals_ret_outer), - (ret_send_file, foo_equals_call_2), - (_exit, ret_send_file) - ]) + self.assertInCfg([ + (ret_request, entry), + (image_name_equals_call_1, ret_request), + (_if, image_name_equals_call_1), + (image_name_equals_foo, _if), + (save_2_image_name, _if), + (save_2_image_name, image_name_equals_foo), + + (save_3_image_name, save_2_image_name), + (temp_3_first_inner_arg, save_3_image_name), + (inner_arg_equals_temp_3, temp_3_first_inner_arg), + (function_entry_first_inner, inner_arg_equals_temp_3), + (first_inner_ret_val_assign_1st, function_entry_first_inner), + (ret_first_inner, first_inner_ret_val_assign_1st), + (function_exit_first_inner, ret_first_inner), + (image_name_equals_first_inner_arg, function_exit_first_inner), + (call_3_equals_ret_first_inner, image_name_equals_first_inner_arg), + (save_4_image_name, call_3_equals_ret_first_inner), + (temp_2_first_arg_equals_call_3, call_3_equals_ret_first_inner), + (save_4_image_name, temp_2_first_arg_equals_call_3), + (save_4_inner_ret_val, save_4_image_name), + (temp_4_inner_arg, save_4_inner_ret_val), + (inner_arg_equals_temp_4, temp_4_inner_arg), + (function_entry_second_inner, inner_arg_equals_temp_4), + (second_inner_ret_val_assign_2nd, function_entry_second_inner), + (ret_second_inner, second_inner_ret_val_assign_2nd), + (function_exit_second_inner, ret_second_inner), + (image_name_equals_second_inner_arg, function_exit_second_inner), + (first_inner_ret_val_equals_save_4, image_name_equals_second_inner_arg), + (call_4_equals_ret_second_inner, first_inner_ret_val_equals_save_4), + (temp_2_second_arg_equals_call_4, call_4_equals_ret_second_inner), + (first_arg_equals_temp, temp_2_second_arg_equals_call_4), + (second_arg_equals_temp, first_arg_equals_temp), + (function_entry_outer, second_arg_equals_temp), + (outer_ret_val_assignment, function_entry_outer), + (ret_outer, outer_ret_val_assignment), + (function_exit_outer, ret_outer), + (image_name_restore, function_exit_outer), + (call_2_equals_ret_outer, image_name_restore), + + (foo_equals_call_2, call_2_equals_ret_outer), + (ret_send_file, foo_equals_call_2), + (_exit, ret_send_file) + ]) def test_multiple_nested_blackbox_calls_after_for(self): path = 'example/vulnerable_code/multiple_nested_blackbox_calls_after_for.py' @@ -916,18 +914,19 @@ def test_multiple_nested_blackbox_calls_after_for(self): ret_send_file = 9 _exit = 10 - self.assertInCfg([(ret_request, entry), - (image_name_equals_call_1, ret_request), - (_for, image_name_equals_call_1), - (ret_print, _for), - (_for, ret_print), - (inner_blackbox_call, _for), - (second_inner_blackbox_call, inner_blackbox_call), - (outer_blackbox_call, second_inner_blackbox_call), - (foo_equals_call_3, outer_blackbox_call), - (ret_send_file, foo_equals_call_3), - (_exit, ret_send_file) - ]) + self.assertInCfg([ + (ret_request, entry), + (image_name_equals_call_1, ret_request), + (_for, image_name_equals_call_1), + (ret_print, _for), + (_for, ret_print), + (inner_blackbox_call, _for), + (second_inner_blackbox_call, inner_blackbox_call), + (outer_blackbox_call, second_inner_blackbox_call), + (foo_equals_call_3, outer_blackbox_call), + (ret_send_file, foo_equals_call_3), + (_exit, ret_send_file) + ]) def test_multiple_blackbox_calls_in_user_defined_call_after_if(self): path = 'example/vulnerable_code/multiple_blackbox_calls_in_user_defined_call_after_if.py' @@ -972,43 +971,42 @@ def test_multiple_blackbox_calls_in_user_defined_call_after_if(self): ret_send_file = 30 _exit = 31 - self.assertInCfg([(save_2_image_name, ret_scrypt_third), # Makes sense - (ret_scrypt_first, _if), # Makes sense - (ret_scrypt_first, image_name_equals_foo), # Makes sense - (save_4_image_name, ret_scrypt_first), # Makes sense - (ret_scrypt_third, call_4_equals_ret_second_inner), # Makes sense - (ret_request, entry), - (image_name_equals_call_1, ret_request), - (_if, image_name_equals_call_1), - (image_name_equals_foo, _if), - (save_2_image_name, image_name_equals_foo), - (ret_scrypt_first, save_2_image_name), - (temp_2_first_arg, ret_scrypt_first), - (save_4_image_name, temp_2_first_arg), - (temp_4_inner_arg, save_4_image_name), - (inner_arg_equals_temp_4, temp_4_inner_arg), - (function_entry_second_inner, inner_arg_equals_temp_4), - (inner_ret_val_equals_inner_arg_2nd, function_entry_second_inner), - (ret_second_inner, inner_ret_val_equals_inner_arg_2nd), - (function_exit_second_inner, ret_second_inner), - (image_name_equals_save_4, function_exit_second_inner), - (call_4_equals_ret_second_inner, image_name_equals_save_4), - (temp_2_second_arg, call_4_equals_ret_second_inner), - (ret_scrypt_third, temp_2_second_arg), - (temp_2_third_arg_equals_call_5, ret_scrypt_third), - (first_arg_equals_temp, temp_2_third_arg_equals_call_5), - (second_arg_equals_temp, first_arg_equals_temp), - (third_arg_equals_temp, second_arg_equals_temp), - (function_entry_outer, third_arg_equals_temp), - (outer_ret_val, function_entry_outer), - (ret_outer, outer_ret_val), - (exit_outer, ret_outer), - (image_name_equals_save_2, exit_outer), - (call_2_equals_ret_outer, image_name_equals_save_2), - (foo_equals_call_2, call_2_equals_ret_outer), - (ret_send_file, foo_equals_call_2), - (_exit, ret_send_file) - ]) + self.assertInCfg([ + (save_2_image_name, _if), + (save_4_image_name, ret_scrypt_first), + (ret_scrypt_third, call_4_equals_ret_second_inner), + (ret_request, entry), + (image_name_equals_call_1, ret_request), + (_if, image_name_equals_call_1), + (image_name_equals_foo, _if), + (save_2_image_name, image_name_equals_foo), + (ret_scrypt_first, save_2_image_name), + (temp_2_first_arg, ret_scrypt_first), + (save_4_image_name, temp_2_first_arg), + (temp_4_inner_arg, save_4_image_name), + (inner_arg_equals_temp_4, temp_4_inner_arg), + (function_entry_second_inner, inner_arg_equals_temp_4), + (inner_ret_val_equals_inner_arg_2nd, function_entry_second_inner), + (ret_second_inner, inner_ret_val_equals_inner_arg_2nd), + (function_exit_second_inner, ret_second_inner), + (image_name_equals_save_4, function_exit_second_inner), + (call_4_equals_ret_second_inner, image_name_equals_save_4), + (temp_2_second_arg, call_4_equals_ret_second_inner), + (ret_scrypt_third, temp_2_second_arg), + (temp_2_third_arg_equals_call_5, ret_scrypt_third), + (first_arg_equals_temp, temp_2_third_arg_equals_call_5), + (second_arg_equals_temp, first_arg_equals_temp), + (third_arg_equals_temp, second_arg_equals_temp), + (function_entry_outer, third_arg_equals_temp), + (outer_ret_val, function_entry_outer), + (ret_outer, outer_ret_val), + (exit_outer, ret_outer), + (image_name_equals_save_2, exit_outer), + (call_2_equals_ret_outer, image_name_equals_save_2), + (foo_equals_call_2, call_2_equals_ret_outer), + (ret_send_file, foo_equals_call_2), + (_exit, ret_send_file) + ]) @@ -1051,7 +1049,7 @@ def test_multiple_user_defined_calls_in_blackbox_call_after_if(self): ret_send_file = 28 _exit = 29 - self.assertInCfg([(save_3_image_name, _if), # Makes sense + self.assertInCfg([(save_3_image_name, _if), (ret_request, entry), (image_name_equals_call_1, ret_request), (_if, image_name_equals_call_1), diff --git a/tests/vulnerabilities_test.py b/tests/vulnerabilities_test.py index 3e4b7e73..3d875e37 100644 --- a/tests/vulnerabilities_test.py +++ b/tests/vulnerabilities_test.py @@ -167,7 +167,7 @@ def test_XSS_result(self): File: example/vulnerable_code/XSS.py > User input at line 6, trigger word "request.args.get(": ¤call_1 = ret_request.args.get('param', 'not set') - Reassigned in: + Reassigned in: File: example/vulnerable_code/XSS.py > Line 6: param = ¤call_1 File: example/vulnerable_code/XSS.py @@ -191,7 +191,7 @@ def test_command_injection_result(self): File: example/vulnerable_code/command_injection.py > User input at line 15, trigger word "form[": param = request.form['suggestion'] - Reassigned in: + Reassigned in: File: example/vulnerable_code/command_injection.py > Line 16: command = 'echo ' + param + ' >> ' + 'menu.txt' File: example/vulnerable_code/command_injection.py @@ -208,10 +208,12 @@ def test_path_traversal_result(self): EXPECTED_VULNERABILITY_DESCRIPTION = """ File: example/vulnerable_code/path_traversal.py > User input at line 15, trigger word "request.args.get(": - ¤call_1 = ret_request.args.get('image_name') - Reassigned in: + ¤call_1 = ret_request.args.get('image_name') + Reassigned in: File: example/vulnerable_code/path_traversal.py > Line 15: image_name = ¤call_1 + File: example/vulnerable_code/path_traversal.py + > Line 6: save_2_image_name = image_name File: example/vulnerable_code/path_traversal.py > Line 10: save_3_image_name = image_name File: example/vulnerable_code/path_traversal.py @@ -224,14 +226,12 @@ def test_path_traversal_result(self): > Line 7: outer_ret_val = outer_arg + 'hey' + other_arg File: example/vulnerable_code/path_traversal.py > Line 8: ret_outer = outer_ret_val + File: example/vulnerable_code/path_traversal.py + > Line 6: image_name = save_2_image_name File: example/vulnerable_code/path_traversal.py > Line 19: ¤call_2 = ret_outer File: example/vulnerable_code/path_traversal.py > Line 19: foo = ¤call_2 - File: example/vulnerable_code/path_traversal.py - > Line 6: save_2_image_name = image_name - File: example/vulnerable_code/path_traversal.py - > Line 6: image_name = save_2_image_name File: example/vulnerable_code/path_traversal.py > reaches line 20, trigger word "send_file(": ¤call_4 = ret_send_file(foo) @@ -239,6 +239,44 @@ def test_path_traversal_result(self): self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) + def test_ensure_saved_scope(self): + vulnerability_log = self.run_analysis('example/vulnerable_code/ensure_saved_scope.py') + self.assert_length(vulnerability_log.vulnerabilities, expected_length=1) + vulnerability_description = str(vulnerability_log.vulnerabilities[0]) + EXPECTED_VULNERABILITY_DESCRIPTION = """ + File: example/vulnerable_code/ensure_saved_scope.py + > User input at line 15, trigger word "request.args.get(": + ¤call_1 = ret_request.args.get('image_name') + Reassigned in: + File: example/vulnerable_code/ensure_saved_scope.py + > Line 15: image_name = ¤call_1 + File: example/vulnerable_code/ensure_saved_scope.py + > Line 6: save_2_image_name = image_name + File: example/vulnerable_code/ensure_saved_scope.py + > Line 10: save_3_image_name = image_name + File: example/vulnerable_code/ensure_saved_scope.py + > Line 10: image_name = save_3_image_name + File: example/vulnerable_code/ensure_saved_scope.py + > Line 19: temp_2_other_arg = image_name + File: example/vulnerable_code/ensure_saved_scope.py + > Line 6: other_arg = temp_2_other_arg + File: example/vulnerable_code/ensure_saved_scope.py + > Line 7: outer_ret_val = outer_arg + 'hey' + other_arg + File: example/vulnerable_code/ensure_saved_scope.py + > Line 8: ret_outer = outer_ret_val + File: example/vulnerable_code/ensure_saved_scope.py + > Line 6: image_name = save_2_image_name + File: example/vulnerable_code/ensure_saved_scope.py + > Line 19: ¤call_2 = ret_outer + File: example/vulnerable_code/ensure_saved_scope.py + > Line 19: foo = ¤call_2 + File: example/vulnerable_code/ensure_saved_scope.py + > reaches line 20, trigger word "send_file(": + ¤call_4 = ret_send_file(image_name) + """ + + self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) + def test_path_traversal_sanitised_result(self): vulnerability_log = self.run_analysis('example/vulnerable_code/path_traversal_sanitised.py') self.assert_length(vulnerability_log.vulnerabilities, expected_length=1) @@ -247,7 +285,7 @@ def test_path_traversal_sanitised_result(self): File: example/vulnerable_code/path_traversal_sanitised.py > User input at line 8, trigger word "request.args.get(": ¤call_1 = ret_request.args.get('image_name') - Reassigned in: + Reassigned in: File: example/vulnerable_code/path_traversal_sanitised.py > Line 8: image_name = ¤call_1 File: example/vulnerable_code/path_traversal_sanitised.py @@ -261,7 +299,30 @@ def test_path_traversal_sanitised_result(self): File: example/vulnerable_code/path_traversal_sanitised.py > reaches line 12, trigger word "send_file(": ¤call_3 = ret_send_file(¤call_4) - This vulnerability is potentially sanitised by: Label: ¤call_2 = ret_image_name.replace('..', '') + This vulnerability is sanitised by: Label: ¤call_2 = ret_image_name.replace('..', '') + """ + + self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) + + def test_path_traversal_sanitised_2_result(self): + vulnerability_log = self.run_analysis('example/vulnerable_code/path_traversal_sanitised_2.py') + self.assert_length(vulnerability_log.vulnerabilities, expected_length=1) + vulnerability_description = str(vulnerability_log.vulnerabilities[0]) + EXPECTED_VULNERABILITY_DESCRIPTION = """ + File: example/vulnerable_code/path_traversal_sanitised_2.py + > User input at line 8, trigger word "request.args.get(": + ¤call_1 = ret_request.args.get('image_name') + Reassigned in: + File: example/vulnerable_code/path_traversal_sanitised_2.py + > Line 8: image_name = ¤call_1 + File: example/vulnerable_code/path_traversal_sanitised_2.py + > Line 12: ¤call_3 = ret_os.path.join(¤call_4, image_name) + File: example/vulnerable_code/path_traversal_sanitised_2.py + > Line 12: ret_cat_picture = ¤call_2 + File: example/vulnerable_code/path_traversal_sanitised_2.py + > reaches line 12, trigger word "send_file(": + ¤call_2 = ret_send_file(¤call_3) + This vulnerability is potentially sanitised by: Label: if '..' in image_name: """ self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) @@ -274,7 +335,7 @@ def test_sql_result(self): File: example/vulnerable_code/sql/sqli.py > User input at line 26, trigger word "request.args.get(": ¤call_1 = ret_request.args.get('param', 'not set') - Reassigned in: + Reassigned in: File: example/vulnerable_code/sql/sqli.py > Line 26: param = ¤call_1 File: example/vulnerable_code/sql/sqli.py @@ -294,7 +355,7 @@ def test_XSS_form_result(self): File: example/vulnerable_code/XSS_form.py > User input at line 14, trigger word "form[": data = request.form['my_text'] - Reassigned in: + Reassigned in: File: example/vulnerable_code/XSS_form.py > Line 15: ¤call_1 = ret_make_response(¤call_2) File: example/vulnerable_code/XSS_form.py @@ -316,7 +377,7 @@ def test_XSS_url_result(self): File: example/vulnerable_code/XSS_url.py > User input at line 4, trigger word "Framework function URL parameter": url - Reassigned in: + Reassigned in: File: example/vulnerable_code/XSS_url.py > Line 6: param = url File: example/vulnerable_code/XSS_url.py @@ -344,7 +405,7 @@ def test_XSS_reassign_result(self): File: example/vulnerable_code/XSS_reassign.py > User input at line 6, trigger word "request.args.get(": ¤call_1 = ret_request.args.get('param', 'not set') - Reassigned in: + Reassigned in: File: example/vulnerable_code/XSS_reassign.py > Line 6: param = ¤call_1 File: example/vulnerable_code/XSS_reassign.py @@ -370,7 +431,7 @@ def test_XSS_sanitised_result(self): File: example/vulnerable_code/XSS_sanitised.py > User input at line 7, trigger word "request.args.get(": ¤call_1 = ret_request.args.get('param', 'not set') - Reassigned in: + Reassigned in: File: example/vulnerable_code/XSS_sanitised.py > Line 7: param = ¤call_1 File: example/vulnerable_code/XSS_sanitised.py @@ -386,7 +447,7 @@ def test_XSS_sanitised_result(self): File: example/vulnerable_code/XSS_sanitised.py > reaches line 12, trigger word "replace(": ¤call_5 = ret_html.replace('{{ param }}', param) - This vulnerability is potentially sanitised by: Label: ¤call_2 = ret_Markup.escape(param) + This vulnerability is sanitised by: Label: ¤call_2 = ret_Markup.escape(param) """ self.assertTrue(self.string_compare_alpha(vulnerability_description, EXPECTED_VULNERABILITY_DESCRIPTION)) @@ -403,7 +464,7 @@ def test_XSS_variable_assign_result(self): File: example/vulnerable_code/XSS_variable_assign.py > User input at line 6, trigger word "request.args.get(": ¤call_1 = ret_request.args.get('param', 'not set') - Reassigned in: + Reassigned in: File: example/vulnerable_code/XSS_variable_assign.py > Line 6: param = ¤call_1 File: example/vulnerable_code/XSS_variable_assign.py @@ -429,7 +490,7 @@ def test_XSS_variable_multiple_assign_result(self): File: example/vulnerable_code/XSS_variable_multiple_assign.py > User input at line 6, trigger word "request.args.get(": ¤call_1 = ret_request.args.get('param', 'not set') - Reassigned in: + Reassigned in: File: example/vulnerable_code/XSS_variable_multiple_assign.py > Line 6: param = ¤call_1 File: example/vulnerable_code/XSS_variable_multiple_assign.py From cfbefef87854071ee03432389a8ae7a71aaed387 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Thu, 22 Mar 2018 19:07:27 -0700 Subject: [PATCH 13/20] Made node types for If and Try statements statements, moved some LabelVisitor stuff to node_types, comments --- pyt/base_cfg.py | 28 ++++++++---------- pyt/fixed_point.py | 2 +- pyt/interprocedural_cfg.py | 4 +-- pyt/node_types.py | 58 ++++++++++++++++++++++++++++++++------ pyt/vulnerabilities.py | 10 ++++--- 5 files changed, 70 insertions(+), 32 deletions(-) diff --git a/pyt/base_cfg.py b/pyt/base_cfg.py index 555bc5dc..4f1862b9 100644 --- a/pyt/base_cfg.py +++ b/pyt/base_cfg.py @@ -21,9 +21,11 @@ BBorBInode, BreakNode, ControlFlowNode, + IfNode, IgnoredNode, Node, - RestoreNode + RestoreNode, + TryNode ) from .right_hand_side_visitor import RHSVisitor from .vars_visitor import VarsVisitor @@ -55,7 +57,7 @@ def stmt_star_handler( for stmt in stmts: node = self.visit(stmt) - if isinstance(node, ControlFlowNode) and node.test.label != 'Try': + if isinstance(node, ControlFlowNode) and not isinstance(node.test, TryNode): self.last_control_flow_nodes.append(node.test) else: self.last_control_flow_nodes.append(None) @@ -110,6 +112,7 @@ def handle_or_else(self, orelse, test): """ if isinstance(orelse[0], ast.If): control_flow_node = self.visit(orelse[0]) + # Prefix the if label with 'el' control_flow_node.test.label = 'el' + control_flow_node.test.label test.connect(control_flow_node.test) @@ -123,11 +126,8 @@ def handle_or_else(self, orelse, test): return else_connect_statements.last_statements def visit_If(self, node): - label_visitor = LabelVisitor() - label_visitor.visit(node.test) - - test = self.append_node(Node( - 'if ' + label_visitor.result + ':', + test = self.append_node(IfNode( + node.test, node, line_number=node.lineno, path=self.filenames[-1] @@ -153,11 +153,7 @@ def visit_If(self, node): return ControlFlowNode(test, last_statements, break_statements=body_connect_stmts.break_statements) def visit_Raise(self, node): - label = LabelVisitor() - label.visit(node) - return self.append_node(RaiseNode( - label.result, node, line_number=node.lineno, path=self.filenames[-1] @@ -175,8 +171,7 @@ def handle_stmt_star_ignore_node(self, body, fallback_cfg_node): return body def visit_Try(self, node): - try_node = self.append_node(Node( - 'Try', + try_node = self.append_node(TryNode( node, line_number=node.lineno, path=self.filenames[-1] @@ -243,7 +238,8 @@ def assign_tuple_target(self, node, right_hand_side_variables): extract_left_hand_side(target), ast.Assign(target, value), right_hand_side_variables, - line_number=node.lineno, path=self.filenames[-1] + line_number=node.lineno, + path=self.filenames[-1] ))) connect_nodes(new_assignment_nodes) @@ -263,7 +259,8 @@ def assign_multi_target(self, node, right_hand_side_variables): left_hand_side, ast.Assign(target, node.value), right_hand_side_variables, - line_number=node.lineno, path=self.filenames[-1] + line_number=node.lineno, + path=self.filenames[-1] ))) connect_nodes(new_assignment_nodes) @@ -448,7 +445,6 @@ def add_blackbox_or_builtin_call(self, node, blackbox): Returns: call_node(BBorBInode): The call node. """ - # Increment function_call_index self.function_call_index += 1 saved_function_call_index = self.function_call_index self.undecided = False diff --git a/pyt/fixed_point.py b/pyt/fixed_point.py index 806c172a..b0574934 100644 --- a/pyt/fixed_point.py +++ b/pyt/fixed_point.py @@ -26,7 +26,7 @@ def fixpoint_runner(self): for node in self.analysis.dep(q[0]): # for (v in dep(v_i)) q.append(node) # q.append(v): constraint_table[q[0]] = y # q[0].old_constraint = q[0].new_constraint # x_i = y - q = q[1:] # q = q.tail() # The list minus the head + q = q[1:] # q = q.tail() # The list minus the head def analyse(cfg_list, *, analysis_type): diff --git a/pyt/interprocedural_cfg.py b/pyt/interprocedural_cfg.py index 5ba9c31e..af81e277 100644 --- a/pyt/interprocedural_cfg.py +++ b/pyt/interprocedural_cfg.py @@ -81,7 +81,7 @@ def init_cfg(self, node): raise Exception('Empty module. It seems that your file is empty,' + 'there is nothing to analyse.') - exit_node = self.append_node(EntryOrExitNode("Exit module")) + exit_node = self.append_node(EntryOrExitNode('Exit module')) if isinstance(module_statements, IgnoredNode): entry_node.connect(exit_node) @@ -104,7 +104,7 @@ def init_function_cfg(self, node, module_definitions): entry_node = self.append_node(EntryOrExitNode('Entry function')) module_statements = self.stmt_star_handler(node.body) - exit_node = self.append_node(EntryOrExitNode("Exit function")) + exit_node = self.append_node(EntryOrExitNode('Exit function')) if isinstance(module_statements, IgnoredNode): entry_node.connect(exit_node) diff --git a/pyt/node_types.py b/pyt/node_types.py index b9b796b8..dbf9ceeb 100644 --- a/pyt/node_types.py +++ b/pyt/node_types.py @@ -1,6 +1,8 @@ """This module contains all of the CFG nodes types.""" from collections import namedtuple +from .label_visitor import LabelVisitor + ControlFlowNode = namedtuple( 'ControlFlowNode', @@ -58,7 +60,6 @@ def __str__(self): """Print the label of the node.""" return ''.join((' Label: ', self.label)) - def __repr__(self): """Print a representation of the node.""" label = ' '.join(('Label: ', self.label)) @@ -78,19 +79,43 @@ def __repr__(self): return '\n' + '\n'.join((label, line_number, ingoing, outgoing)) -class RaiseNode(Node, ConnectToExitNode): - """CFG Node that represents a Raise statement.""" +class BreakNode(Node): + """CFG Node that represents a Break statement.""" - def __init__(self, label, ast_node, *, line_number, path): - """Create a Raise node.""" - super().__init__(label, ast_node, line_number=line_number, path=path) + def __init__(self, ast_node, *, line_number, path): + super().__init__( + self.__class__.__name__, + ast_node, + line_number=line_number, + path=path + ) -class BreakNode(Node): - """CFG Node that represents a Break node.""" +class IfNode(Node): + """CFG Node that represents an If statement.""" + + def __init__(self, test_node, ast_node, *, line_number, path): + label_visitor = LabelVisitor() + label_visitor.visit(test_node) + + super().__init__( + 'if ' + label_visitor.result + ':', + ast_node, + line_number=line_number, + path=path + ) + + +class TryNode(Node): + """CFG Node that represents a Try statement.""" def __init__(self, ast_node, *, line_number, path): - super().__init__(self.__class__.__name__, ast_node, line_number=line_number, path=path) + super().__init__( + 'try:', + ast_node, + line_number=line_number, + path=path + ) class EntryOrExitNode(Node): @@ -100,6 +125,21 @@ def __init__(self, label): super().__init__(label, None, line_number=None, path=None) +class RaiseNode(Node, ConnectToExitNode): + """CFG Node that represents a Raise statement.""" + + def __init__(self, ast_node, *, line_number, path): + label = LabelVisitor() + label.visit(ast_node) + + super().__init__( + label_visitor.result, + ast_node, + line_number=line_number, + path=path + ) + + class AssignmentNode(Node): """CFG Node that represents an assignment.""" diff --git a/pyt/vulnerabilities.py b/pyt/vulnerabilities.py index 7b726dc3..50306f80 100644 --- a/pyt/vulnerabilities.py +++ b/pyt/vulnerabilities.py @@ -11,6 +11,7 @@ AssignmentCallNode, AssignmentNode, BBorBInode, + IfNode, RestoreNode, TaintedNode ) @@ -290,7 +291,7 @@ def get_vulnerability_chains( current_node() sink() def_use(dict): - chain(list(Node)): A path of nodes between source to sink. + chain(list(Node)): A path of nodes between source and sink. """ for use in def_use[current_node]: if use == sink: @@ -321,7 +322,7 @@ def how_vulnerable( e.g. we can only say potentially instead of definitely sanitised in the path_traversal_sanitised_2.py test. Args: - chain(list(Node)): A path of nodes between source to sink. + chain(list(Node)): A path of nodes between source and sink. blackbox_mapping(dict): A map of blackbox functions containing whether or not they propagate taint. sanitiser_nodes(set): A set of nodes that are sanitisers for the sink. potential_sanitiser(Node): An if or elif node that can potentially cause sanitisation. @@ -432,8 +433,9 @@ def get_vulnerability( if sink.sanitisers: for sanitiser in sink.sanitisers: for cfg_node in triggers.sanitiser_dict[sanitiser]: - sanitiser_nodes.add(cfg_node) - if 'if' in cfg_node.label and cfg_node.label.endswith(':'): + if isinstance(cfg_node, AssignmentNode): + sanitiser_nodes.add(cfg_node) + elif isinstance(cfg_node, IfNode): potential_sanitiser = cfg_node def_use = build_def_use_chain(cfg.nodes) From 0dead7c7df1fe3c2321577036a389508da65f9d2 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Thu, 22 Mar 2018 19:12:24 -0700 Subject: [PATCH 14/20] Delete intra_cfg, same as in master --- pyt/intraprocedural_cfg.py | 179 ------------------------------------- 1 file changed, 179 deletions(-) delete mode 100644 pyt/intraprocedural_cfg.py diff --git a/pyt/intraprocedural_cfg.py b/pyt/intraprocedural_cfg.py deleted file mode 100644 index 1fe175b7..00000000 --- a/pyt/intraprocedural_cfg.py +++ /dev/null @@ -1,179 +0,0 @@ -import ast - -from .ast_helper import Arguments, generate_ast -from .base_cfg import ( - CALL_IDENTIFIER, - CFG, - EntryOrExitNode, - IgnoredNode, - Node, - ReturnNode, - Visitor -) -from .label_visitor import LabelVisitor -from .right_hand_side_visitor import RHSVisitor - - -class IntraproceduralVisitor(Visitor): - - def __init__(self, node, filename): - """Create an empty CFG.""" - self.nodes = list() - self.undecided = False # Check if needed in intraprocedural - - self.function_names = list() - self.filenames = [filename] - - try: - # FunctionDef ast node - self.init_function_cfg(node) - except: # Error?! - # Module ast node - self.init_module_cfg(node) - - def init_module_cfg(self, node): - entry_node = self.append_node(EntryOrExitNode('Entry module')) - - module_statements = self.visit(node) - - if not module_statements: - raise Exception('Empty module. It seems that your file is empty,' + - ' there is nothing to analyse.') - - if not isinstance(module_statements, IgnoredNode): - first_node = module_statements.first_statement - - if CALL_IDENTIFIER not in first_node.label: - entry_node.connect(first_node) - - exit_node = self.append_node(EntryOrExitNode('Exit module')) - - last_nodes = module_statements.last_statements - exit_node.connect_predecessors(last_nodes) - else: - exit_node = self.append_node(EntryOrExitNode('Exit module')) - entry_node.connect(exit_node) - - def init_function_cfg(self, node): - - entry_node = self.append_node(EntryOrExitNode('Entry module')) - - module_statements = self.stmt_star_handler(node.body) - if isinstance(module_statements, IgnoredNode): - exit_node = self.append_node(EntryOrExitNode('Exit module')) - entry_node.connect(exit_node) - return - - first_node = module_statements.first_statement - if CALL_IDENTIFIER not in first_node.label: - entry_node.connect(first_node) - - exit_node = self.append_node(EntryOrExitNode('Exit module')) - - last_nodes = module_statements.last_statements - exit_node.connect_predecessors(last_nodes) - - def visit_ClassDef(self, node): - return self.append_node(Node('class ' + node.name, node, - line_number=node.lineno, - path=self.filenames[-1])) - - def visit_FunctionDef(self, node): - arguments = Arguments(node.args) - return self.append_node(Node('def ' + node.name + '(' + - ','.join(arguments) + '):', - node, - line_number=node.lineno, - path=self.filenames[-1])) - - def visit_Return(self, node): - label = LabelVisitor() - label.visit(node) - - try: - rhs_visitor = RHSVisitor() - rhs_visitor.visit(node.value) - except AttributeError: - rhs_visitor.result = 'EmptyReturn' - - LHS = 'ret_' + 'MAYBE_FUNCTION_NAME' - return self.append_node(ReturnNode(LHS + ' = ' + label.result, - LHS, - node, - rhs_visitor.result, - line_number=node.lineno, - path=self.filenames[-1])) - - def visit_Yield(self, node): - label = LabelVisitor() - label.visit(node) - - try: - rhs_visitor = RHSVisitor() - rhs_visitor.visit(node.value) - except AttributeError: - rhs_visitor.result = 'EmptyYield' - - LHS = 'yield_' + 'MAYBE_FUNCTION_NAME' - return self.append_node(ReturnNode(LHS + ' = ' + label.result, - LHS, - node, - rhs_visitor.result, - line_number=node.lineno, - path=self.filenames[-1])) - - def visit_Call(self, node): - return self.add_builtin(node) - - def visit_Import(self, node): - names = [n.name for n in node.names] - return self.append_node(Node('Import ' + ', '.join(names), node, - line_number=node.lineno, - path=self.filenames[-1])) - - def visit_ImportFrom(self, node): - names = [a.name for a in node.names] - try: - from_import = 'from ' + node.module + ' ' - except TypeError: - from_import = '' - return self.append_node(Node(from_import + 'import ' + - ', '.join(names), - node, - line_number=node.lineno, - path=self.filenames[-1])) - - -class FunctionDefVisitor(ast.NodeVisitor): - def __init__(self): - self.result = list() - - def visit_FunctionDef(self, node): - self.result.append(node) - #def visit_ClassDef(self, node): - # self.result.append(node) - - -def intraprocedural(project_modules, cfg_list): - functions = list() - dup = list() - for module in project_modules: - t = generate_ast(module[1]) - iv = IntraproceduralVisitor(t, filename=module[1]) - cfg_list.append(CFG(iv.nodes)) - dup.append(t) - fdv = FunctionDefVisitor() - fdv.visit(t) - dup.extend(fdv.result) - functions.extend([(f, module[1]) for f in fdv.result]) - - for f in functions: - iv = IntraproceduralVisitor(f[0], filename=f[1]) - cfg_list.append(CFG(iv.nodes)) - - s = set() - for d in dup: - if d in s: - raise Exception('Duplicates in the functions definitions list.') - else: - s.add(d) From 1d11a0521e3078d86bddeb2b0722a38fc6985855 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Thu, 22 Mar 2018 19:49:10 -0700 Subject: [PATCH 15/20] [DRY] Remove all 'line_number=node.lineno' for node def sites --- pyt/base_cfg.py | 4 ---- pyt/interprocedural_cfg.py | 6 ++---- pyt/node_types.py | 22 +++++++++++----------- 3 files changed, 13 insertions(+), 19 deletions(-) diff --git a/pyt/base_cfg.py b/pyt/base_cfg.py index 4f1862b9..e0441840 100644 --- a/pyt/base_cfg.py +++ b/pyt/base_cfg.py @@ -129,7 +129,6 @@ def visit_If(self, node): test = self.append_node(IfNode( node.test, node, - line_number=node.lineno, path=self.filenames[-1] )) @@ -155,7 +154,6 @@ def visit_If(self, node): def visit_Raise(self, node): return self.append_node(RaiseNode( node, - line_number=node.lineno, path=self.filenames[-1] )) @@ -173,7 +171,6 @@ def handle_stmt_star_ignore_node(self, body, fallback_cfg_node): def visit_Try(self, node): try_node = self.append_node(TryNode( node, - line_number=node.lineno, path=self.filenames[-1] )) body = self.stmt_star_handler(node.body) @@ -548,7 +545,6 @@ def visit_With(self, node): def visit_Break(self, node): return self.append_node(BreakNode( node, - line_number=node.lineno, path=self.filenames[-1] )) diff --git a/pyt/interprocedural_cfg.py b/pyt/interprocedural_cfg.py index af81e277..af7830a3 100644 --- a/pyt/interprocedural_cfg.py +++ b/pyt/interprocedural_cfg.py @@ -198,7 +198,6 @@ def visit_Return(self, node): LHS, node, [return_value_of_call.left_hand_side], - line_number=node.lineno, path=self.filenames[-1] ) return_value_of_call.connect(return_node) @@ -210,7 +209,6 @@ def visit_Return(self, node): LHS, node, rhs_visitor.result, - line_number=node.lineno, path=self.filenames[-1] )) @@ -231,7 +229,6 @@ def visit_Yield(self, node): LHS, node, rhs_visitor.result, - line_number=node.lineno, path=self.filenames[-1]) ) @@ -266,7 +263,8 @@ def save_local_scope(self, line_number, saved_function_call_index): save_name + ' = ' + assignment.left_hand_side, save_name, [assignment.left_hand_side], - line_number=line_number, path=self.filenames[-1] + line_number=line_number, + path=self.filenames[-1] ) if not first_node: first_node = saved_scope_node diff --git a/pyt/node_types.py b/pyt/node_types.py index dbf9ceeb..bdbe59af 100644 --- a/pyt/node_types.py +++ b/pyt/node_types.py @@ -27,7 +27,7 @@ class Node(): """A Control Flow Graph node that contains a list of ingoing and outgoing nodes and a list of its variables.""" - def __init__(self, label, ast_node, *, line_number, path): + def __init__(self, label, ast_node, *, line_number=None, path): """Create a Node that can be used in a CFG. Args: @@ -36,7 +36,12 @@ def __init__(self, label, ast_node, *, line_number, path): """ self.label = label self.ast_node = ast_node - self.line_number = line_number + if line_number: + self.line_number = line_number + elif ast_node: + self.line_number = ast_node.lineno + else: + self.line_number = None self.path = path self.ingoing = list() self.outgoing = list() @@ -82,11 +87,10 @@ def __repr__(self): class BreakNode(Node): """CFG Node that represents a Break statement.""" - def __init__(self, ast_node, *, line_number, path): + def __init__(self, ast_node, *, path): super().__init__( self.__class__.__name__, ast_node, - line_number=line_number, path=path ) @@ -94,14 +98,13 @@ def __init__(self, ast_node, *, line_number, path): class IfNode(Node): """CFG Node that represents an If statement.""" - def __init__(self, test_node, ast_node, *, line_number, path): + def __init__(self, test_node, ast_node, *, path): label_visitor = LabelVisitor() label_visitor.visit(test_node) super().__init__( 'if ' + label_visitor.result + ':', ast_node, - line_number=line_number, path=path ) @@ -109,11 +112,10 @@ def __init__(self, test_node, ast_node, *, line_number, path): class TryNode(Node): """CFG Node that represents a Try statement.""" - def __init__(self, ast_node, *, line_number, path): + def __init__(self, ast_node, *, path): super().__init__( 'try:', ast_node, - line_number=line_number, path=path ) @@ -253,7 +255,6 @@ def __init__( ast_node, right_hand_side_variables, *, - line_number, path ): """Create a return from a call node. @@ -263,7 +264,6 @@ def __init__( left_hand_side(str): The variable on the left hand side of the assignment. Used for analysis. ast_node right_hand_side_variables(list[str]): A list of variables on the right hand side. - line_number(Optional[int]): The line of the expression the Node represents. path(string): Current filename. """ super().__init__( @@ -271,6 +271,6 @@ def __init__( left_hand_side, ast_node, right_hand_side_variables, - line_number=line_number, + line_number=ast_node.lineno, path=path ) From 6bb2a68da24a2504f6ea147ff6efe5273017dbb9 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Fri, 23 Mar 2018 16:55:57 -0700 Subject: [PATCH 16/20] [DRY] Remove more 'line_number=node.lineno' from node def sites --- pyt/base_cfg.py | 9 --------- pyt/node_types.py | 2 +- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/pyt/base_cfg.py b/pyt/base_cfg.py index e0441840..76d65d62 100644 --- a/pyt/base_cfg.py +++ b/pyt/base_cfg.py @@ -287,7 +287,6 @@ def visit_Assign(self, node): label.result, node, rhs_visitor.result, - line_number=node.lineno, path=self.filenames[-1] )) @@ -306,7 +305,6 @@ def visit_Assign(self, node): extract_left_hand_side(node.targets[0]), node, rhs_visitor.result, - line_number=node.lineno, path=self.filenames[-1] )) @@ -353,7 +351,6 @@ def visit_AugAssign(self, node): extract_left_hand_side(node.target), node, rhs_visitor.result, - line_number=node.lineno, path=self.filenames[-1] )) @@ -398,7 +395,6 @@ def visit_For(self, node): for_node = self.append_node(Node( "for " + target_label.result + " in " + iterator_label.result + ':', node, - line_number=node.lineno, path=self.filenames[-1] )) @@ -415,7 +411,6 @@ def visit_While(self, node): test = self.append_node(Node( 'while ' + label_visitor.result + ':', node, - line_number=node.lineno, path=self.filenames[-1] )) @@ -531,7 +526,6 @@ def visit_With(self, node): with_node = self.append_node(Node( label_visitor.result, node, - line_number=node.lineno, path=self.filenames[-1] )) connect_statements = self.stmt_star_handler(node.body) @@ -555,7 +549,6 @@ def visit_Delete(self, node): return self.append_node(Node( 'del ' + labelVisitor.result, node, - line_number=node.lineno, path=self.filenames[-1] )) @@ -566,7 +559,6 @@ def visit_Assert(self, node): return self.append_node(Node( label_visitor.result, node, - line_number=node.lineno, path=self.filenames[-1] )) @@ -627,7 +619,6 @@ def visit_miscelleaneous_node( return self.append_node(Node( label, node, - line_number=node.lineno, path=self.filenames[-1] )) diff --git a/pyt/node_types.py b/pyt/node_types.py index bdbe59af..45d15159 100644 --- a/pyt/node_types.py +++ b/pyt/node_types.py @@ -145,7 +145,7 @@ def __init__(self, ast_node, *, line_number, path): class AssignmentNode(Node): """CFG Node that represents an assignment.""" - def __init__(self, label, left_hand_side, ast_node, right_hand_side_variables, *, line_number, path): + def __init__(self, label, left_hand_side, ast_node, right_hand_side_variables, *, line_number=None, path): """Create an Assignment node. Args: From 3df4c288d82b308bf4d9cd62a8ff3c3e6ba34e26 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Fri, 23 Mar 2018 17:20:22 -0700 Subject: [PATCH 17/20] Added a comment to ConnectToExitNode --- pyt/interprocedural_cfg.py | 72 ++++++++++++++++++++++++-------------- pyt/node_types.py | 1 + 2 files changed, 46 insertions(+), 27 deletions(-) diff --git a/pyt/interprocedural_cfg.py b/pyt/interprocedural_cfg.py index af7830a3..c9d8c21a 100644 --- a/pyt/interprocedural_cfg.py +++ b/pyt/interprocedural_cfg.py @@ -232,7 +232,39 @@ def visit_Yield(self, node): path=self.filenames[-1]) ) - def save_local_scope(self, line_number, saved_function_call_index): + def connect_if_allowed( + self, + previous_node, + node_to_connect_to + ): + # e.g. + # while x != 10: + # if x > 0: + # print(x) + # break + # else: + # print('hest') + # print('next') # self.nodes[-1] is print('hest') + # + # So we connect to `while x!= 10` instead + if self.last_control_flow_nodes[-1]: + self.last_control_flow_nodes[-1].connect(node_to_connect_to) + self.last_control_flow_nodes[-1] = None + return + + # Except in this case: + # + # if not image_name: + # return 404 + # print('foo') # We do not want to connect this line with `return 404` + if previous_node is not self.prev_nodes_to_avoid[-1] and not isinstance(previous_node, ReturnNode): + previous_node.connect(node_to_connect_to) + + def save_local_scope( + self, + line_number, + saved_function_call_index + ): """Save the local scope before entering a function call by saving all the LHS's of assignments so far. Args: @@ -277,30 +309,6 @@ def save_local_scope(self, line_number, saved_function_call_index): return (saved_variables, first_node) - def connect_if_allowed(self, previous_node, node_to_connect_to): - # e.g. - # while x != 10: - # if x > 0: - # print(x) - # break - # else: - # print('hest') - # print('next') # self.nodes[-1] is print('hest') - # - # So we connect to `while x!= 10` instead - if self.last_control_flow_nodes[-1]: - self.last_control_flow_nodes[-1].connect(node_to_connect_to) - self.last_control_flow_nodes[-1] = None - return - - # Except in this case: - # - # if not image_name: - # return 404 - # print('foo') # We do not want to connect this line with `return 404` - if previous_node is not self.prev_nodes_to_avoid[-1] and not isinstance(previous_node, ReturnNode): - previous_node.connect(node_to_connect_to) - def save_def_args_in_temp( self, call_args, @@ -418,7 +426,11 @@ def create_local_scope_from_def_args( self.nodes[-1].connect(local_scope_node) self.nodes.append(local_scope_node) - def visit_and_get_function_nodes(self, definition, first_node): + def visit_and_get_function_nodes( + self, + definition, + first_node + ): """Visits the nodes of a user defined function. Args: @@ -497,7 +509,13 @@ def restore_saved_local_scope( return restore_nodes - def return_handler(self, call_node, function_nodes, saved_function_call_index, first_node): + def return_handler( + self, + call_node, + function_nodes, + saved_function_call_index, + first_node + ): """Handle the return from a function during a function call. Args: diff --git a/pyt/node_types.py b/pyt/node_types.py index 45d15159..edd778e6 100644 --- a/pyt/node_types.py +++ b/pyt/node_types.py @@ -20,6 +20,7 @@ class IgnoredNode(): class ConnectToExitNode(): + """A common type between raise's and return's, used in return_handler""" pass From 8ee2e8ae6ed328a44d3be918de1e7d25f860bd5f Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Mon, 26 Mar 2018 10:42:58 -0700 Subject: [PATCH 18/20] Add period at the end of docstring comment --- pyt/node_types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyt/node_types.py b/pyt/node_types.py index edd778e6..400c7e55 100644 --- a/pyt/node_types.py +++ b/pyt/node_types.py @@ -20,7 +20,7 @@ class IgnoredNode(): class ConnectToExitNode(): - """A common type between raise's and return's, used in return_handler""" + """A common type between raise's and return's, used in return_handler.""" pass From d70133dc5a653260e2a926013b1d309839b65590 Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Mon, 26 Mar 2018 18:56:04 -0700 Subject: [PATCH 19/20] [refactor] namedtuples changed field lists to tuples --- pyt/argument_helpers.py | 4 ++-- pyt/base_cfg_helper.py | 4 ++-- pyt/interprocedural_cfg_helper.py | 8 +++++++- pyt/node_types.py | 6 +++--- pyt/trigger_definitions_parser.py | 4 ++-- pyt/vulnerabilities.py | 8 ++++---- tests/analysis_base_test_case.py | 8 +++++++- tests/reaching_definitions_taint_test.py | 2 -- 8 files changed, 27 insertions(+), 17 deletions(-) diff --git a/pyt/argument_helpers.py b/pyt/argument_helpers.py index 05b4542c..a2ecee35 100644 --- a/pyt/argument_helpers.py +++ b/pyt/argument_helpers.py @@ -36,8 +36,8 @@ class UImode(Enum): VulnerabilityFiles = namedtuple( 'VulnerabilityFiles', - [ + ( 'blackbox_mapping', 'triggers' - ] + ) ) diff --git a/pyt/base_cfg_helper.py b/pyt/base_cfg_helper.py index e4a1784d..beac36e3 100644 --- a/pyt/base_cfg_helper.py +++ b/pyt/base_cfg_helper.py @@ -13,11 +13,11 @@ CALL_IDENTIFIER = '¤' ConnectStatements = namedtuple( 'ConnectStatements', - [ + ( 'first_statement', 'last_statements', 'break_statements' - ] + ) ) diff --git a/pyt/interprocedural_cfg_helper.py b/pyt/interprocedural_cfg_helper.py index d8ee6c7f..2a6b0ab2 100644 --- a/pyt/interprocedural_cfg_helper.py +++ b/pyt/interprocedural_cfg_helper.py @@ -4,7 +4,13 @@ ConnectToExitNode ) -SavedVariable = namedtuple('SavedVariable', 'LHS RHS') +SavedVariable = namedtuple( + 'SavedVariable', + ( + 'LHS', + 'RHS' + ) +) BUILTINS = ( 'get', 'Flask', diff --git a/pyt/node_types.py b/pyt/node_types.py index 400c7e55..effa63f2 100644 --- a/pyt/node_types.py +++ b/pyt/node_types.py @@ -6,11 +6,11 @@ ControlFlowNode = namedtuple( 'ControlFlowNode', - [ + ( 'test', 'last_nodes', 'break_statements' - ] + ) ) @@ -20,7 +20,7 @@ class IgnoredNode(): class ConnectToExitNode(): - """A common type between raise's and return's, used in return_handler.""" + """A common type between raise's and return's, used in return_handler""" pass diff --git a/pyt/trigger_definitions_parser.py b/pyt/trigger_definitions_parser.py index 248be29a..7515da4a 100644 --- a/pyt/trigger_definitions_parser.py +++ b/pyt/trigger_definitions_parser.py @@ -8,10 +8,10 @@ Definitions = namedtuple( 'Definitions', - [ + ( 'sources', 'sinks' - ] + ) ) diff --git a/pyt/vulnerabilities.py b/pyt/vulnerabilities.py index 50306f80..2ad8a2ce 100644 --- a/pyt/vulnerabilities.py +++ b/pyt/vulnerabilities.py @@ -27,18 +27,18 @@ Sanitiser = namedtuple( 'Sanitiser', - [ + ( 'trigger_word', 'cfg_node' - ] + ) ) Triggers = namedtuple( 'Triggers', - [ + ( 'sources', 'sinks', 'sanitiser_dict' - ] + ) ) diff --git a/tests/analysis_base_test_case.py b/tests/analysis_base_test_case.py index 6f07a9f2..cdd33182 100644 --- a/tests/analysis_base_test_case.py +++ b/tests/analysis_base_test_case.py @@ -8,7 +8,13 @@ class AnalysisBaseTestCase(BaseTestCase): - connection = namedtuple('connection', 'constraintset element') + connection = namedtuple( + 'connection', + ( + 'constraintset', + 'element' + ) + ) def setUp(self): self.cfg = None diff --git a/tests/reaching_definitions_taint_test.py b/tests/reaching_definitions_taint_test.py index 39158e20..cb5d3d7e 100644 --- a/tests/reaching_definitions_taint_test.py +++ b/tests/reaching_definitions_taint_test.py @@ -1,5 +1,3 @@ -from collections import namedtuple, OrderedDict - from .analysis_base_test_case import AnalysisBaseTestCase from pyt.constraint_table import constraint_table from pyt.reaching_definitions_taint import ReachingDefinitionsTaintAnalysis From 4447c75ec2462f5e27da872c7a12cb8452d7bbeb Mon Sep 17 00:00:00 2001 From: Kevin Hock Date: Mon, 26 Mar 2018 18:59:22 -0700 Subject: [PATCH 20/20] re-Add period at the end of docstring comment --- pyt/node_types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyt/node_types.py b/pyt/node_types.py index effa63f2..cc2e0384 100644 --- a/pyt/node_types.py +++ b/pyt/node_types.py @@ -20,7 +20,7 @@ class IgnoredNode(): class ConnectToExitNode(): - """A common type between raise's and return's, used in return_handler""" + """A common type between raise's and return's, used in return_handler.""" pass