Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[STAL-2713] Various taint analysis graph fixes #521

Merged
merged 5 commits into from
Oct 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -282,11 +282,13 @@ export class MethodFlow {
/**
* Visits an `array_creation_expression`.
* ```java
* var test = new String[]{"hello", someVar};
* // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
* var example_01 = new String[]{"hello", someVar};
* // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
* var example_02 = new byte[1024];
* // ^^^^^^^^^^^^^^
* ```
* ```
* (type: (_) dimensions: (dimensions) value: (array_initializer))
* (type: (_) dimensions: (dimensions) <value: (array_initializer)>?)
* ```
* @param {TreeSitterNode} node
*/
Expand All @@ -295,9 +297,11 @@ export class MethodFlow {
const children = ddsa.getChildren(node);

const valueIdx = findFieldIndex(children, 2, "value");
const value = children[valueIdx];
this.visit(value);
this.propagateLastTaint(node);
if (valueIdx !== -1) {
const value = children[valueIdx];
this.visit(value);
this.propagateLastTaint(node);
}
}

/**
Expand Down Expand Up @@ -554,17 +558,20 @@ export class MethodFlow {
visitMethodCall(node) {
const children = ddsa.getChildren(node);
const objIdx = findFieldIndex(children, 0, "object");
// [(identifier) (field_access)]
// `[(identifier) (field_access)]`
const obj = children[objIdx];
if (obj?.cstType === "identifier") {
// [simplification]: If the node could represent a local variable, propagate taint as if that local variable
// always taints the return value of an instance method (this is clearly not always the case).
this.visitIdentifier(obj);
this.propagateLastTaint(node);
const _ = this.takeLastTainted();
}

// [simplification]: Ignore the "name" field (we don't do name or type resolution).
// (We don't blanket visit the `obj` because of how we selectively visit `identifier` to track variable references).
if (obj !== undefined) {
switch (obj.cstType) {
case "method_invocation": {
this.visitMethodCall(obj);
this.propagateLastTaint(node);
break;
}
}
}

const argsIdx = findFieldIndex(children, objIdx + 1, "arguments");
const args = children[argsIdx];
Expand All @@ -573,6 +580,13 @@ export class MethodFlow {
// [simplification]: Propagate tainted arguments as if they _always_ flow through into the return value
// of the method (this is clearly not always the case).
this.propagateLastTaint(node);

if (obj?.cstType === "identifier") {
// [simplification]: If the node could represent a local variable, propagate taint as if that local variable
// always taints the return value of an instance method (this is clearly not always the case).
this.visitIdentifier(obj);
this.propagateLastTaint(node);
}
}

/**
Expand Down Expand Up @@ -601,10 +615,17 @@ export class MethodFlow {
* ```
* (object_creation_expression (object_creation_expression)? type: (_) arguments: (argument_list))
* ```
* @param {TreeSitterNode} _node
* @param {TreeSitterNode} node
*/
visitObjCreationExpr(_node) {
// [simplification]: Ignore this node
visitObjCreationExpr(node) {
// [simplification]: Propagate arguments as if they _always_ flow through into the return value
// of the constructor.
const children = ddsa.getChildren(node);

const argumentsIdx = findFieldIndex(children, 1, "arguments");
const args = children[argumentsIdx];
this.visitArgList(args);
this.propagateLastTaint(node);
}

/**
Expand Down Expand Up @@ -868,6 +889,7 @@ export class MethodFlow {

// The index of the first "update" child field detected.
let updateFieldIdx = -1;
let bodyFieldIdx = -1;

const len = children.length;
for (let i = 0; i < len; i++) {
Expand All @@ -891,6 +913,7 @@ export class MethodFlow {
}
break;
case "body":
bodyFieldIdx = i;
this.visitBlockStmt(child);
break;
default:
Expand All @@ -899,7 +922,7 @@ export class MethodFlow {
}

if (updateFieldIdx !== -1) {
for (let i = updateFieldIdx; i < len; i++) {
for (let i = updateFieldIdx; i < bodyFieldIdx; i++) {
const child = children[i];
this.visit(child);
// TODO(JF): After scoped variable support: propagate taint here
Expand Down
131 changes: 107 additions & 24 deletions crates/static-analysis-kernel/src/analysis/ddsa_lib/js/flow/java.rs
Original file line number Diff line number Diff line change
Expand Up @@ -505,9 +505,9 @@ strict digraph {
}

/// array_creation_expression
/// array_initializer
#[test]
fn array_creation_expression_array_initializer() {
fn array_creation_expression() {
// With array_initializer
assert_digraph!(
// language=java
"\
Expand All @@ -527,6 +527,19 @@ strict digraph full {
arrayInit -> var_A [kind=dependence]
arrayInit -> var_B [kind=dependence]
}
"#
);
// Without array_initializer
assert_digraph!(
// language=java
"\
void method() {
new String[123];
}
",
// language=dot
r#"
strict digraph full {}
"#
);
}
Expand Down Expand Up @@ -638,6 +651,66 @@ strict digraph full {
);
}

/// Chained methods are parsed and taint is propagated from object to method/property.
#[test]
fn method_invocation_chained() {
assert_digraph!(
// language=java
"\
void method() {
echo(x).golf(y).foxtrot(z);
}
",
// language=dot
r#"
strict digraph full {
x; y; z
argList0 [text="(x)",cstkind=argument_list]
methodInvo0 [text="echo(x)",cstkind=method_invocation]
argList1 [text="(y)",cstkind=argument_list]
methodInvo1 [text="echo(x).golf(y)",cstkind=method_invocation]
argList2 [text="(z)",cstkind=argument_list]
methodInvo2 [text="echo(x).golf(y).foxtrot(z)",cstkind=method_invocation]

argList0 -> x [kind=dependence]
methodInvo0 -> argList0 [kind=dependence]
argList1 -> y [kind=dependence]
methodInvo1 -> argList1 [kind=dependence]
methodInvo1 -> methodInvo0 [kind=dependence]
argList2 -> z [kind=dependence]
methodInvo2 -> argList2 [kind=dependence]
methodInvo2 -> methodInvo1 [kind=dependence]
}
"#
);
}

#[test]
fn obj_creation_expr() {
// (simplification: all taint is passed through to the return value)
assert_digraph!(
// language=java
"\
void method() {
String y = new String(z);
}
",
// language=dot
r#"
strict digraph full {
y
z
objCreation [text="*",cstkind=object_creation_expression]
argList [text="*",cstkind=argument_list]

argList -> z [kind=dependence]
objCreation -> argList [kind=dependence]
y -> objCreation [kind=assignment]
}
"#
);
}

///////////////////////////////////////////////////////////////////////////
// Statements
///////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -1087,28 +1160,6 @@ strict digraph full {
);
}

/// `object_creation_expression` nodes are parsed but not analyzed.
#[test]
fn obj_creation_expr_unsupported() {
assert_digraph!(
// language=java
"\
void method() {
String var_A = new OuterClass().new InnerClass();
}
",
// language=dot
r#"
strict digraph full {
var_A
objCreation [text="new OuterClass().new InnerClass()",cstkind=object_creation_expression]

var_A -> objCreation [kind=assignment]
}
"#
);
}

/// A switch statement is only considered exhaustive if it contains a `default` case, regardless
/// of whether constant propagation could classify it exhaustive or not.
#[test]
Expand Down Expand Up @@ -1378,6 +1429,38 @@ strict digraph full {

var_A -> methodCall [kind=assignment]
}
"#
);
}
/// This simplification is recursive
#[test]
fn method_call_return_object_recursive() {
assert_digraph!(
// language=java
"\
void method() {
a = b.get(c.getBytes());
}
",
// language=dot
r#"
strict digraph full {
a
b
c
methodCall_b [text="*",cstkind=method_invocation,col=9]
argList_b [text="*",cstkind=argument_list,col=14]
methodCall_c [text="*",cstkind=method_invocation,col=15]

// The simplification:
methodCall_b -> b [kind=dependence]
methodCall_c -> c [kind=dependence]
//////////

methodCall_b -> argList_b [kind=dependence]
argList_b -> methodCall_c [kind=dependence]
a -> methodCall_b [kind=assignment]
}
"#
);
}
Expand Down
Loading