Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

detect-vuln: test on expanded pattern space #49

Merged
merged 1 commit into from
Apr 23, 2018
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
149 changes: 110 additions & 39 deletions src/detect/detect-vuln.pl
Original file line number Diff line number Diff line change
Expand Up @@ -57,48 +57,65 @@
my $limitTime = (defined $query->{timeLimit}) ? "timeout $query->{timeLimit}s" : "";
my $ulimitMemory = (defined $query->{memoryLimit}) ? "ulimit -m $memoryLimitInBytes; ulimit -v $memoryLimitInBytes;" : "";

# Run each detector. Can re-use the input file.
my @patternsToTry = &expandPatternSpaceForDetectors($query->{pattern});

# This will contain N_DETECTORS * scalar(@patternsToTry) opinions.
my @detectorOpinions;
&log("Applying detectors to pattern /$query->{pattern}/");
for my $d (@DETECTORS) {
&log("Querying detector $d->{name}");
my $t0 = [gettimeofday];
my $stderrFile = "/tmp/detect-vuln-$$-stderr";
my ($rc, $out) = &cmd("$ulimitMemory $limitTime $d->{driver} $patternFile 2>$stderrFile");
my $elapsed = tv_interval($t0);
chomp $out;

# Clean up in case there was a timeout.
my $stderr = &readFile("file"=>$stderrFile);
my @filesToClean = ($stderr =~ m/CLEANUP: (\S+)/g);
&log("Cleaning up @filesToClean");
unlink @filesToClean;
unlink $stderrFile;

my $opinion = { "name" => $d->{name},
"secToDecide" => sprintf("%.4f", $elapsed),
};

if ($rc eq 124) {
&log("Detector $d->{name} timed out");
$opinion->{hasOpinion} = 0;
$opinion->{opinion} = "TIMEOUT";
}
elsif ($rc) {
&log("Detector $d->{name} said rc $rc");
$opinion->{hasOpinion} = 0;
$opinion->{opinion} = "INTERNAL-ERROR";
}
else {
&log("Detector $d->{name} said: $out");
my $result = decode_json($out);
# Extract the details needed to make the summary.
# Otherwise we repeat ourselves too much.
$opinion->{hasOpinion} = 1;
$opinion->{opinion} = $result->{opinion};
# Try each pattern.
for my $pattern (@patternsToTry) {
&log("Applying detectors to pattern /$pattern/");

# Craft query file.
my $newQuery = decode_json(encode_json($query));
$newQuery->{pattern} = $pattern;
my $tmpPatternFile = &makeQueryFile($newQuery);

# Ask each detector.
for my $d (@DETECTORS) {
&log("Querying detector $d->{name}");
my $t0 = [gettimeofday];
my $stderrFile = "/tmp/detect-vuln-$$-stderr";
my ($rc, $out) = &cmd("$ulimitMemory $limitTime $d->{driver} $tmpPatternFile 2>$stderrFile");
my $elapsed = tv_interval($t0);
chomp $out;

# Clean up in case there was a timeout.
my $stderr = &readFile("file"=>$stderrFile);
my @filesToClean = ($stderr =~ m/CLEANUP: (\S+)/g);
&log("Cleaning up @filesToClean");
unlink @filesToClean;
unlink $stderrFile;

my $opinion = { "name" => $d->{name},
"secToDecide" => sprintf("%.4f", $elapsed),
};

if ($rc eq 124) {
&log("Detector $d->{name} timed out");
$opinion->{hasOpinion} = 0;
$opinion->{opinion} = "TIMEOUT";
}
elsif ($rc) {
&log("Detector $d->{name} said rc $rc");
$opinion->{hasOpinion} = 0;
$opinion->{opinion} = "INTERNAL-ERROR";
}
else {
&log("Detector $d->{name} said: $out");
my $result = decode_json($out);
# Extract the details needed to make the summary.
# Otherwise we repeat ourselves too much.
$opinion->{hasOpinion} = 1;
$opinion->{opinion} = $result->{opinion};

# Note the pattern we queried about, so we can distinguish from the original.
$opinion->{patternVariant} = $pattern;
}

push @detectorOpinions, $opinion;
}

push @detectorOpinions, $opinion;
unlink $tmpPatternFile;
}

$query->{detectorOpinions} = \@detectorOpinions;
Expand Down Expand Up @@ -149,6 +166,13 @@ sub getDetectors {
return @detectors;
}

sub makeQueryFile {
my ($query) = @_;
my $tmpFile = "/tmp/detect-vuln-$$.json";
&writeToFile("file"=>$tmpFile, "contents"=>encode_json($query));
return $tmpFile;
}

# input: (\@list, $e)
# output: true if $e is in @list, else false
sub listContains {
Expand All @@ -173,3 +197,50 @@ sub readFile {

return $contents;
}

# input: %args: keys: file contents
# output: $file
sub writeToFile {
my %args = @_;

open(my $fh, '>', $args{file});
print $fh $args{contents};
close $fh;

return $args{file};
}

sub expandPatternSpaceForDetectors {
my ($pattern) = @_;

my @patternsToTry = ($pattern);

# If pattern is unanchored, a backtracking regex engine will run the loop:
# for (1 .. n):
# _match(regex, substr)
# This means that if each match is linear-time, the worst-case behavior is quadratic.
# For example, /a+$/ is quadratic in Node.js.
# The detectors don't seem to acknowledge this loop.
# We can simulate it by prefixing un-anchored regexes with '^(.*?)'.
# This is also how a linear-time engine scans all starting indices in parallel; see Cox's writings.
if (substr($query->{pattern}, 0, 1) ne "^") {
my $anchoredPattern = "^(.*?)$query->{pattern}";
push @patternsToTry, $anchoredPattern;
}

# If pattern contains curlies "{\d*,\d*}", the detectors may time out due to graph expansion.
# We can try a more general pattern with "*" and "+" instead.
# The detectors might give false positives but that's OK, that's what the validate stage is for.
# I'm not being careful about escaped curly braces, so let's hope there are no meta-regexes here.
my $genericCurlies = $query->{pattern};
# {0, and {, both mean "0 or more"
$genericCurlies =~ s/{0,\d*}/\*/g;
$genericCurlies =~ s/{,\d*}/\*/g;
# {[1-9] means "1 or more"
$genericCurlies =~ s/{[1-9]\d*,\d*}/\+/g;
if ($genericCurlies ne $pattern) {
push @patternsToTry, $genericCurlies;
}

return @patternsToTry;
}