From c61ef925d270568cfc4b7450e78484bcc2253346 Mon Sep 17 00:00:00 2001 From: Jamie Davis Date: Mon, 23 Apr 2018 18:32:14 -0400 Subject: [PATCH] detect-vuln: test on expanded pattern space Problems: 1. Detectors miss /a+$/ 2. Detectors choke on /a{1,100}a{1,100}a{1,100}$/ Solutions: 1. Prefix with '^(.*)' Credit: Idea from Cox and discussion with Christy 2. Convert a{1,100} to a+, etc. Credit: I think chalker suggested something like this --- src/detect/detect-vuln.pl | 149 ++++++++++++++++++++++++++++---------- 1 file changed, 110 insertions(+), 39 deletions(-) diff --git a/src/detect/detect-vuln.pl b/src/detect/detect-vuln.pl index f588f06b..7205526b 100755 --- a/src/detect/detect-vuln.pl +++ b/src/detect/detect-vuln.pl @@ -57,48 +57,65 @@ my $limitTime = (defined $query->{timeLimit}) ? "timeout $query->{timeLimit}s" : ""; my $ulimitMemory = (defined $query->{memoryLimit}) ? "ulimit -m $memoryLimitInBytes; ulimit -v $memoryLimitInBytes;" : ""; -# Run each detector. Can re-use the input file. +my @patternsToTry = &expandPatternSpaceForDetectors($query->{pattern}); + +# This will contain N_DETECTORS * scalar(@patternsToTry) opinions. my @detectorOpinions; -&log("Applying detectors to pattern /$query->{pattern}/"); -for my $d (@DETECTORS) { - &log("Querying detector $d->{name}"); - my $t0 = [gettimeofday]; - my $stderrFile = "/tmp/detect-vuln-$$-stderr"; - my ($rc, $out) = &cmd("$ulimitMemory $limitTime $d->{driver} $patternFile 2>$stderrFile"); - my $elapsed = tv_interval($t0); - chomp $out; - - # Clean up in case there was a timeout. - my $stderr = &readFile("file"=>$stderrFile); - my @filesToClean = ($stderr =~ m/CLEANUP: (\S+)/g); - &log("Cleaning up @filesToClean"); - unlink @filesToClean; - unlink $stderrFile; - - my $opinion = { "name" => $d->{name}, - "secToDecide" => sprintf("%.4f", $elapsed), - }; - - if ($rc eq 124) { - &log("Detector $d->{name} timed out"); - $opinion->{hasOpinion} = 0; - $opinion->{opinion} = "TIMEOUT"; - } - elsif ($rc) { - &log("Detector $d->{name} said rc $rc"); - $opinion->{hasOpinion} = 0; - $opinion->{opinion} = "INTERNAL-ERROR"; - } - else { - &log("Detector $d->{name} said: $out"); - my $result = decode_json($out); - # Extract the details needed to make the summary. - # Otherwise we repeat ourselves too much. - $opinion->{hasOpinion} = 1; - $opinion->{opinion} = $result->{opinion}; +# Try each pattern. +for my $pattern (@patternsToTry) { + &log("Applying detectors to pattern /$pattern/"); + + # Craft query file. + my $newQuery = decode_json(encode_json($query)); + $newQuery->{pattern} = $pattern; + my $tmpPatternFile = &makeQueryFile($newQuery); + + # Ask each detector. + for my $d (@DETECTORS) { + &log("Querying detector $d->{name}"); + my $t0 = [gettimeofday]; + my $stderrFile = "/tmp/detect-vuln-$$-stderr"; + my ($rc, $out) = &cmd("$ulimitMemory $limitTime $d->{driver} $tmpPatternFile 2>$stderrFile"); + my $elapsed = tv_interval($t0); + chomp $out; + + # Clean up in case there was a timeout. + my $stderr = &readFile("file"=>$stderrFile); + my @filesToClean = ($stderr =~ m/CLEANUP: (\S+)/g); + &log("Cleaning up @filesToClean"); + unlink @filesToClean; + unlink $stderrFile; + + my $opinion = { "name" => $d->{name}, + "secToDecide" => sprintf("%.4f", $elapsed), + }; + + if ($rc eq 124) { + &log("Detector $d->{name} timed out"); + $opinion->{hasOpinion} = 0; + $opinion->{opinion} = "TIMEOUT"; + } + elsif ($rc) { + &log("Detector $d->{name} said rc $rc"); + $opinion->{hasOpinion} = 0; + $opinion->{opinion} = "INTERNAL-ERROR"; + } + else { + &log("Detector $d->{name} said: $out"); + my $result = decode_json($out); + # Extract the details needed to make the summary. + # Otherwise we repeat ourselves too much. + $opinion->{hasOpinion} = 1; + $opinion->{opinion} = $result->{opinion}; + + # Note the pattern we queried about, so we can distinguish from the original. + $opinion->{patternVariant} = $pattern; + } + + push @detectorOpinions, $opinion; } - push @detectorOpinions, $opinion; + unlink $tmpPatternFile; } $query->{detectorOpinions} = \@detectorOpinions; @@ -149,6 +166,13 @@ sub getDetectors { return @detectors; } +sub makeQueryFile { + my ($query) = @_; + my $tmpFile = "/tmp/detect-vuln-$$.json"; + &writeToFile("file"=>$tmpFile, "contents"=>encode_json($query)); + return $tmpFile; +} + # input: (\@list, $e) # output: true if $e is in @list, else false sub listContains { @@ -173,3 +197,50 @@ sub readFile { return $contents; } + +# input: %args: keys: file contents +# output: $file +sub writeToFile { + my %args = @_; + + open(my $fh, '>', $args{file}); + print $fh $args{contents}; + close $fh; + + return $args{file}; +} + +sub expandPatternSpaceForDetectors { + my ($pattern) = @_; + + my @patternsToTry = ($pattern); + + # If pattern is unanchored, a backtracking regex engine will run the loop: + # for (1 .. n): + # _match(regex, substr) + # This means that if each match is linear-time, the worst-case behavior is quadratic. + # For example, /a+$/ is quadratic in Node.js. + # The detectors don't seem to acknowledge this loop. + # We can simulate it by prefixing un-anchored regexes with '^(.*?)'. + # This is also how a linear-time engine scans all starting indices in parallel; see Cox's writings. + if (substr($query->{pattern}, 0, 1) ne "^") { + my $anchoredPattern = "^(.*?)$query->{pattern}"; + push @patternsToTry, $anchoredPattern; + } + + # If pattern contains curlies "{\d*,\d*}", the detectors may time out due to graph expansion. + # We can try a more general pattern with "*" and "+" instead. + # The detectors might give false positives but that's OK, that's what the validate stage is for. + # I'm not being careful about escaped curly braces, so let's hope there are no meta-regexes here. + my $genericCurlies = $query->{pattern}; + # {0, and {, both mean "0 or more" + $genericCurlies =~ s/{0,\d*}/\*/g; + $genericCurlies =~ s/{,\d*}/\*/g; + # {[1-9] means "1 or more" + $genericCurlies =~ s/{[1-9]\d*,\d*}/\+/g; + if ($genericCurlies ne $pattern) { + push @patternsToTry, $genericCurlies; + } + + return @patternsToTry; +}