From 110a4c81e3163f5bed1fb81b1052b128d0f872cb Mon Sep 17 00:00:00 2001 From: Anders Schack-Mulligen Date: Thu, 31 Aug 2023 14:51:45 +0200 Subject: [PATCH 01/15] Java: Minor perf fix. --- java/ql/lib/semmle/code/java/frameworks/Thrift.qll | 1 + 1 file changed, 1 insertion(+) diff --git a/java/ql/lib/semmle/code/java/frameworks/Thrift.qll b/java/ql/lib/semmle/code/java/frameworks/Thrift.qll index de30b1b338d3..4e07a2730dc2 100644 --- a/java/ql/lib/semmle/code/java/frameworks/Thrift.qll +++ b/java/ql/lib/semmle/code/java/frameworks/Thrift.qll @@ -8,6 +8,7 @@ import java * A file detected as generated by the Apache Thrift Compiler. */ class ThriftGeneratedFile extends GeneratedFile { + cached ThriftGeneratedFile() { exists(JavadocElement t | t.getFile() = this | exists(string msg | msg = t.getText() | msg.regexpMatch("(?i).*\\bAutogenerated by Thrift.*")) From a7b677ba40de9a134bc3ddf038054b8ef2d4061e Mon Sep 17 00:00:00 2001 From: Anders Schack-Mulligen Date: Thu, 31 Aug 2023 14:52:25 +0200 Subject: [PATCH 02/15] Java: Bugfix for SuperAccess.isOwnInstanceAccess(). --- java/ql/lib/semmle/code/java/Expr.qll | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/java/ql/lib/semmle/code/java/Expr.qll b/java/ql/lib/semmle/code/java/Expr.qll index 312d7767ac2b..8a1b977f55aa 100644 --- a/java/ql/lib/semmle/code/java/Expr.qll +++ b/java/ql/lib/semmle/code/java/Expr.qll @@ -1670,13 +1670,25 @@ abstract class InstanceAccess extends Expr { /** Holds if this instance access is to an enclosing instance of type `t`. */ predicate isEnclosingInstanceAccess(RefType t) { t = this.getQualifier().getType().(RefType).getSourceDeclaration() and - t != this.getEnclosingCallable().getDeclaringType() + t != this.getEnclosingCallable().getDeclaringType() and + not this.isSuperInterfaceAccess() or - not exists(this.getQualifier()) and + (not exists(this.getQualifier()) or this.isSuperInterfaceAccess()) and exists(LambdaExpr lam | lam.asMethod() = this.getEnclosingCallable() | t = lam.getAnonymousClass().getEnclosingType() ) } + + // A default method on an interface, `I`, may be invoked using `I.super.m()`. + // This always refers to the implemented interfaces of `this`. This form of + // qualified `super` cannot be combined with accessing an enclosing instance. + // JLS 15.11.2. "Accessing Superclass Members using super" + // JLS 15.12. "Method Invocation Expressions" + // JLS 15.12.1. "Compile-Time Step 1: Determine Type to Search" + private predicate isSuperInterfaceAccess() { + this instanceof SuperAccess and + this.getQualifier().getType().(RefType).getSourceDeclaration() instanceof Interface + } } /** From 300425540a5d3ad865af513c94a1b46157e6344b Mon Sep 17 00:00:00 2001 From: Anders Schack-Mulligen Date: Thu, 31 Aug 2023 14:54:25 +0200 Subject: [PATCH 03/15] Java: Minor improvement to TypeFlow for super accesses. --- .../ql/lib/semmle/code/java/dataflow/TypeFlow.qll | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/java/ql/lib/semmle/code/java/dataflow/TypeFlow.qll b/java/ql/lib/semmle/code/java/dataflow/TypeFlow.qll index 6d1946a47f6c..c4b95645bc8b 100644 --- a/java/ql/lib/semmle/code/java/dataflow/TypeFlow.qll +++ b/java/ql/lib/semmle/code/java/dataflow/TypeFlow.qll @@ -440,6 +440,18 @@ predicate arrayInstanceOfGuarded(ArrayAccess aa, RefType t) { ) } +/** + * Holds if `t` is the type of the `this` value corresponding to the the + * `SuperAccess`. As the `SuperAccess` expression has the type of the supertype, + * the type `t` is a stronger type bound. + */ +private predicate superAccess(SuperAccess sup, RefType t) { + sup.isEnclosingInstanceAccess(t) + or + sup.isOwnInstanceAccess() and + t = sup.getEnclosingCallable().getDeclaringType() +} + /** * Holds if `n` has type `t` and this information is discarded, such that `t` * might be a better type bound for nodes where `n` flows to. This might include @@ -452,7 +464,8 @@ private predicate typeFlowBaseCand(TypeFlowNode n, RefType t) { downcastSuccessor(n.asExpr(), srctype) or instanceOfGuarded(n.asExpr(), srctype) or arrayInstanceOfGuarded(n.asExpr(), srctype) or - n.asExpr().(FunctionalExpr).getConstructedType() = srctype + n.asExpr().(FunctionalExpr).getConstructedType() = srctype or + superAccess(n.asExpr(), srctype) | t = srctype.(BoundedType).getAnUltimateUpperBoundType() or From c8094d34a7702a2133a52fe14a8f06473f993c26 Mon Sep 17 00:00:00 2001 From: Anders Schack-Mulligen Date: Thu, 3 Aug 2023 10:00:33 +0200 Subject: [PATCH 04/15] Dataflow: Add type-based call-edge pruning. --- .../dataflow/internal/DataFlowPrivate.qll | 7 +- .../java/dataflow/internal/DataFlowUtil.qll | 33 ++ shared/dataflow/codeql/dataflow/DataFlow.qll | 5 + .../codeql/dataflow/internal/DataFlowImpl.qll | 352 ++++++++++++----- .../dataflow/internal/DataFlowImplCommon.qll | 361 ++++++++++++++++++ 5 files changed, 663 insertions(+), 95 deletions(-) diff --git a/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowPrivate.qll b/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowPrivate.qll index 556baa60f96a..6223c9eae7a4 100644 --- a/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowPrivate.qll +++ b/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowPrivate.qll @@ -326,13 +326,18 @@ string ppReprType(DataFlowType t) { else result = t.toString() } +pragma[nomagic] +private predicate compatibleTypes0(DataFlowType t1, DataFlowType t2) { + erasedHaveIntersection(t1, t2) +} + /** * Holds if `t1` and `t2` are compatible, that is, whether data can flow from * a node of type `t1` to a node of type `t2`. */ bindingset[t1, t2] pragma[inline_late] -predicate compatibleTypes(DataFlowType t1, DataFlowType t2) { erasedHaveIntersection(t1, t2) } +predicate compatibleTypes(DataFlowType t1, DataFlowType t2) { compatibleTypes0(t1, t2) } /** A node that performs a type cast. */ class CastNode extends ExprNode { diff --git a/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowUtil.qll b/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowUtil.qll index 080724d8f0cd..4f48b066055e 100644 --- a/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowUtil.qll +++ b/java/ql/lib/semmle/code/java/dataflow/internal/DataFlowUtil.qll @@ -133,6 +133,39 @@ private module Cached { } } +/** + * Holds if the value of `node2` is given by `node1`. + */ +predicate localMustFlowStep(Node node1, Node node2) { + exists(Callable c | node1.(InstanceParameterNode).getCallable() = c | + exists(InstanceAccess ia | + ia = node2.asExpr() and ia.getEnclosingCallable() = c and ia.isOwnInstanceAccess() + ) + or + c = + node2.(ImplicitInstanceAccess).getInstanceAccess().(OwnInstanceAccess).getEnclosingCallable() + ) + or + exists(SsaImplicitInit init | + init.isParameterDefinition(node1.asParameter()) and init.getAUse() = node2.asExpr() + ) + or + exists(SsaExplicitUpdate upd | + upd.getDefiningExpr().(VariableAssign).getSource() = node1.asExpr() and + upd.getAUse() = node2.asExpr() + ) + or + node2.asExpr().(CastingExpr).getExpr() = node1.asExpr() + or + node2.asExpr().(AssignExpr).getSource() = node1.asExpr() + or + node1 = + unique(FlowSummaryNode n1 | + FlowSummaryImpl::Private::Steps::summaryLocalStep(n1.getSummaryNode(), + node2.(FlowSummaryNode).getSummaryNode(), true) + ) +} + import Cached private predicate capturedVariableRead(Node n) { diff --git a/shared/dataflow/codeql/dataflow/DataFlow.qll b/shared/dataflow/codeql/dataflow/DataFlow.qll index ce2c518f4789..67948db0ee84 100644 --- a/shared/dataflow/codeql/dataflow/DataFlow.qll +++ b/shared/dataflow/codeql/dataflow/DataFlow.qll @@ -197,6 +197,11 @@ signature module InputSig { */ predicate allowParameterReturnInSelf(ParameterNode p); + /** + * Holds if the value of `node2` is given by `node1`. + */ + predicate localMustFlowStep(Node node1, Node node2); + class LambdaCallKind; /** Holds if `creation` is an expression that creates a lambda of kind `kind` for `c`. */ diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll index 23caaee0cce1..3e62803b09d1 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll @@ -1289,36 +1289,11 @@ module MakeImpl { ap instanceof ApNil ) or - exists(NodeEx mid | - fwdFlow(mid, state, _, _, _, _, t, ap, apa) and - jumpStepEx(mid, node) and - cc = ccNone() and - summaryCtx = TParamNodeNone() and - argT instanceof TypOption::None and - argAp = apNone() - ) - or - exists(NodeEx mid | - fwdFlow(mid, state, _, _, _, _, _, ap, apa) and - additionalJumpStep(mid, node) and - cc = ccNone() and - summaryCtx = TParamNodeNone() and - argT instanceof TypOption::None and - argAp = apNone() and - t = getNodeTyp(node) and - ap instanceof ApNil - ) - or - exists(NodeEx mid, FlowState state0 | - fwdFlow(mid, state0, _, _, _, _, _, ap, apa) and - additionalJumpStateStep(mid, state0, node, state) and - cc = ccNone() and - summaryCtx = TParamNodeNone() and - argT instanceof TypOption::None and - argAp = apNone() and - t = getNodeTyp(node) and - ap instanceof ApNil - ) + fwdFlowJump(node, state, t, ap, apa) and + cc = ccNone() and + summaryCtx = TParamNodeNone() and + argT instanceof TypOption::None and + argAp = apNone() or // store exists(Content c, Typ t0, Ap ap0 | @@ -1346,7 +1321,7 @@ module MakeImpl { ) or // flow out of a callable - fwdFlowOut(node, state, cc, summaryCtx, argT, argAp, t, ap, apa) + fwdFlowOut(_, _, node, state, cc, summaryCtx, argT, argAp, t, ap, apa) or // flow through a callable exists( @@ -1360,6 +1335,27 @@ module MakeImpl { ) } + private predicate fwdFlowJump(NodeEx node, FlowState state, Typ t, Ap ap, ApApprox apa) { + exists(NodeEx mid | + fwdFlow(mid, state, _, _, _, _, t, ap, apa) and + jumpStepEx(mid, node) + ) + or + exists(NodeEx mid | + fwdFlow(mid, state, _, _, _, _, _, ap, apa) and + additionalJumpStep(mid, node) and + t = getNodeTyp(node) and + ap instanceof ApNil + ) + or + exists(NodeEx mid, FlowState state0 | + fwdFlow(mid, state0, _, _, _, _, _, ap, apa) and + additionalJumpStateStep(mid, state0, node, state) and + t = getNodeTyp(node) and + ap instanceof ApNil + ) + } + pragma[nomagic] private predicate fwdFlowStore( NodeEx node1, Typ t1, Ap ap1, Content c, Typ t2, NodeEx node2, FlowState state, Cc cc, @@ -1455,11 +1451,12 @@ module MakeImpl { } pragma[nomagic] - private predicate fwdFlowIn( - DataFlowCall call, ParamNodeEx p, FlowState state, Cc outercc, CcCall innercc, - ParamNodeOption summaryCtx, TypOption argT, ApOption argAp, Typ t, Ap ap, ApApprox apa + private predicate fwdFlowInCand( + DataFlowCall call, DataFlowCallable inner, ParamNodeEx p, FlowState state, Cc outercc, + CcCall innercc, ParamNodeOption summaryCtx, TypOption argT, ApOption argAp, Typ t, Ap ap, + ApApprox apa ) { - exists(ArgNodeEx arg, boolean allowsFieldFlow, DataFlowCallable inner | + exists(ArgNodeEx arg, boolean allowsFieldFlow | fwdFlow(arg, state, outercc, summaryCtx, argT, argAp, t, ap, apa) and ( inner = viableImplCallContextReducedInlineLate(call, arg, outercc) @@ -1473,6 +1470,19 @@ module MakeImpl { ) } + pragma[nomagic] + private predicate fwdFlowIn( + DataFlowCall call, ParamNodeEx p, FlowState state, Cc outercc, CcCall innercc, + ParamNodeOption summaryCtx, TypOption argT, ApOption argAp, Typ t, Ap ap, ApApprox apa + ) { + exists(DataFlowCallable inner, boolean cc | + fwdFlowInCand(call, inner, p, state, outercc, innercc, summaryCtx, argT, argAp, t, ap, + apa) and + FwdTypeFlow::typeFlowValidEdgeIn(call, inner, cc) and + if outercc instanceof CcCall then cc = true else cc = false + ) + } + bindingset[ctx, result] pragma[inline_late] private DataFlowCallable viableImplCallContextReducedReverseInlineLate( @@ -1501,16 +1511,12 @@ module MakeImpl { flowOutOfCallApa(call, c, ret, _, out, allowsFieldFlow, apa) } - // inline to reduce number of iterations - pragma[inline] - private predicate fwdFlowOut( - NodeEx out, FlowState state, CcNoCall outercc, ParamNodeOption summaryCtx, TypOption argT, - ApOption argAp, Typ t, Ap ap, ApApprox apa + pragma[nomagic] + private predicate fwdFlowOutCand( + DataFlowCall call, DataFlowCallable inner, NodeEx out, FlowState state, CcNoCall outercc, + ParamNodeOption summaryCtx, TypOption argT, ApOption argAp, Typ t, Ap ap, ApApprox apa ) { - exists( - DataFlowCall call, RetNodeEx ret, boolean allowsFieldFlow, CcNoCall innercc, - DataFlowCallable inner - | + exists(RetNodeEx ret, boolean allowsFieldFlow, CcNoCall innercc | fwdFlow(ret, state, innercc, summaryCtx, argT, argAp, t, ap, apa) and inner = ret.getEnclosingCallable() and ( @@ -1526,6 +1532,63 @@ module MakeImpl { ) } + private predicate fwdFlowOut( + DataFlowCall call, DataFlowCallable inner, NodeEx out, FlowState state, CcNoCall outercc, + ParamNodeOption summaryCtx, TypOption argT, ApOption argAp, Typ t, Ap ap, ApApprox apa + ) { + fwdFlowOutCand(call, inner, out, state, outercc, summaryCtx, argT, argAp, t, ap, apa) and + FwdTypeFlow::typeFlowValidEdgeOut(call, inner) + } + + private module FwdTypeFlowInput implements TypeFlowInput { + predicate relevantCallEdgeIn(DataFlowCall call, DataFlowCallable c) { + flowIntoCallApa(call, c, _, _, _, _) + } + + predicate relevantCallEdgeOut(DataFlowCall call, DataFlowCallable c) { + flowOutOfCallApa(call, c, _, _, _, _, _) + } + + pragma[nomagic] + predicate dataFlowTakenCallEdgeIn(DataFlowCall call, DataFlowCallable c, boolean cc) { + exists(ParamNodeEx p, Cc outercc | + fwdFlowIn(call, p, _, outercc, _, _, _, _, _, _, _) and + c = p.getEnclosingCallable() and + if outercc instanceof CcCall then cc = true else cc = false + ) + } + + pragma[nomagic] + predicate dataFlowTakenCallEdgeOut(DataFlowCall call, DataFlowCallable c) { + fwdFlowOut(call, c, _, _, _, _, _, _, _, _, _) + } + + predicate dataFlowNonCallEntry(DataFlowCallable c, boolean cc) { + exists(NodeEx node, FlowState state | + sourceNode(node, state) and + (if hasSourceCallCtx() then cc = true else cc = false) and + PrevStage::revFlow(node, state, getApprox(any(ApNil nil))) and + c = node.getEnclosingCallable() + ) + or + exists(NodeEx node | + cc = false and + fwdFlowJump(node, _, _, _, _) and + c = node.getEnclosingCallable() + ) + } + } + + private module FwdTypeFlow = TypeFlow; + + private predicate flowIntoCallApaValid( + DataFlowCall call, DataFlowCallable c, ArgNodeEx arg, ParamNodeEx p, + boolean allowsFieldFlow, ApApprox apa + ) { + flowIntoCallApa(call, c, arg, p, allowsFieldFlow, apa) and + FwdTypeFlow::typeFlowValidEdgeIn(call, c, _) + } + pragma[nomagic] private predicate fwdFlowRetFromArg( RetNodeEx ret, FlowState state, CcCall ccc, ParamNodeEx summaryCtx, Typ argT, Ap argAp, @@ -1625,7 +1688,7 @@ module MakeImpl { exists(ApApprox argApa, Typ argT | returnFlowsThrough(_, _, _, _, pragma[only_bind_into](p), pragma[only_bind_into](argT), pragma[only_bind_into](argAp), ap) and - flowIntoCallApa(call, _, pragma[only_bind_into](arg), p, allowsFieldFlow, argApa) and + flowIntoCallApaValid(call, _, pragma[only_bind_into](arg), p, allowsFieldFlow, argApa) and fwdFlow(arg, _, _, _, _, _, pragma[only_bind_into](argT), pragma[only_bind_into](argAp), argApa) and if allowsFieldFlow = false then argAp instanceof ApNil else any() @@ -1633,9 +1696,11 @@ module MakeImpl { } pragma[nomagic] - private predicate flowIntoCallAp(DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, Ap ap) { + private predicate flowIntoCallAp( + DataFlowCall call, DataFlowCallable c, ArgNodeEx arg, ParamNodeEx p, Ap ap + ) { exists(ApApprox apa, boolean allowsFieldFlow | - flowIntoCallApa(call, _, arg, p, allowsFieldFlow, apa) and + flowIntoCallApaValid(call, c, arg, p, allowsFieldFlow, apa) and fwdFlow(arg, _, _, _, _, _, _, ap, apa) and if allowsFieldFlow = false then ap instanceof ApNil else any() ) @@ -1643,13 +1708,17 @@ module MakeImpl { pragma[nomagic] private predicate flowOutOfCallAp( - DataFlowCall call, RetNodeEx ret, ReturnPosition pos, NodeEx out, Ap ap + DataFlowCall call, DataFlowCallable c, RetNodeEx ret, ReturnPosition pos, NodeEx out, + Ap ap ) { exists(ApApprox apa, boolean allowsFieldFlow | - flowOutOfCallApa(call, _, ret, _, out, allowsFieldFlow, apa) and + flowOutOfCallApa(call, c, ret, _, out, allowsFieldFlow, apa) and fwdFlow(ret, _, _, _, _, _, _, ap, apa) and pos = ret.getReturnPosition() and if allowsFieldFlow = false then ap instanceof ApNil else any() + | + FwdTypeFlow::typeFlowValidEdgeIn(call, c, _) or + FwdTypeFlow::typeFlowValidEdgeOut(call, c) ) } @@ -1694,28 +1763,9 @@ module MakeImpl { ap instanceof ApNil ) or - exists(NodeEx mid | - jumpStepEx(node, mid) and - revFlow(mid, state, _, _, ap) and - returnCtx = TReturnCtxNone() and - returnAp = apNone() - ) - or - exists(NodeEx mid | - additionalJumpStep(node, mid) and - revFlow(pragma[only_bind_into](mid), state, _, _, ap) and - returnCtx = TReturnCtxNone() and - returnAp = apNone() and - ap instanceof ApNil - ) - or - exists(NodeEx mid, FlowState state0 | - additionalJumpStateStep(node, state, mid, state0) and - revFlow(pragma[only_bind_into](mid), pragma[only_bind_into](state0), _, _, ap) and - returnCtx = TReturnCtxNone() and - returnAp = apNone() and - ap instanceof ApNil - ) + revFlowJump(node, state, ap) and + returnCtx = TReturnCtxNone() and + returnAp = apNone() or // store exists(Ap ap0, Content c | @@ -1730,11 +1780,9 @@ module MakeImpl { ) or // flow into a callable - exists(ParamNodeEx p | - revFlow(p, state, TReturnCtxNone(), returnAp, ap) and - flowIntoCallAp(_, node, p, ap) and - returnCtx = TReturnCtxNone() - ) + revFlowIn(_, _, node, state, ap) and + returnCtx = TReturnCtxNone() and + returnAp = apNone() or // flow through a callable exists(DataFlowCall call, ParamNodeEx p, Ap innerReturnAp | @@ -1755,6 +1803,25 @@ module MakeImpl { ) } + private predicate revFlowJump(NodeEx node, FlowState state, Ap ap) { + exists(NodeEx mid | + jumpStepEx(node, mid) and + revFlow(mid, state, _, _, ap) + ) + or + exists(NodeEx mid | + additionalJumpStep(node, mid) and + revFlow(pragma[only_bind_into](mid), state, _, _, ap) and + ap instanceof ApNil + ) + or + exists(NodeEx mid, FlowState state0 | + additionalJumpStateStep(node, state, mid, state0) and + revFlow(pragma[only_bind_into](mid), pragma[only_bind_into](state0), _, _, ap) and + ap instanceof ApNil + ) + } + pragma[nomagic] private predicate revFlowStore( Ap ap0, Content c, Ap ap, Typ t, NodeEx node, FlowState state, NodeEx mid, @@ -1777,14 +1844,81 @@ module MakeImpl { ) } + private module RevTypeFlowInput implements TypeFlowInput { + predicate relevantCallEdgeIn(DataFlowCall call, DataFlowCallable c) { + flowOutOfCallAp(call, c, _, _, _, _) + } + + predicate relevantCallEdgeOut(DataFlowCall call, DataFlowCallable c) { + flowIntoCallAp(call, c, _, _, _) + } + + pragma[nomagic] + predicate dataFlowTakenCallEdgeIn(DataFlowCall call, DataFlowCallable c, boolean cc) { + exists(RetNodeEx ret, ReturnCtx returnCtx | + revFlowOut(call, ret, _, _, returnCtx, _, _) and + c = ret.getEnclosingCallable() and + if returnCtx instanceof TReturnCtxNone then cc = false else cc = true + ) + } + + pragma[nomagic] + predicate dataFlowTakenCallEdgeOut(DataFlowCall call, DataFlowCallable c) { + revFlowIn(call, c, _, _, _) + } + + predicate dataFlowNonCallEntry(DataFlowCallable c, boolean cc) { + exists(NodeEx node, FlowState state, ApNil nil | + fwdFlow(node, state, _, _, _, _, _, nil, _) and + sinkNode(node, state) and + (if hasSinkCallCtx() then cc = true else cc = false) and + c = node.getEnclosingCallable() + ) + or + exists(NodeEx node | + cc = false and + revFlowJump(node, _, _) and + c = node.getEnclosingCallable() + ) + } + } + + private module RevTypeFlow = TypeFlow; + + private predicate flowIntoCallApValid( + DataFlowCall call, DataFlowCallable c, ArgNodeEx arg, ParamNodeEx p, Ap ap + ) { + flowIntoCallAp(call, c, arg, p, ap) and + RevTypeFlow::typeFlowValidEdgeOut(call, c) + } + + private predicate flowOutOfCallApValid( + DataFlowCall call, RetNodeEx ret, ReturnPosition pos, NodeEx out, Ap ap, boolean cc + ) { + exists(DataFlowCallable c | + flowOutOfCallAp(call, c, ret, pos, out, ap) and + RevTypeFlow::typeFlowValidEdgeIn(call, c, cc) + ) + } + + private predicate revFlowIn( + DataFlowCall call, DataFlowCallable c, ArgNodeEx arg, FlowState state, Ap ap + ) { + exists(ParamNodeEx p | + revFlow(p, state, TReturnCtxNone(), _, ap) and + flowIntoCallApValid(call, c, arg, p, ap) + ) + } + pragma[nomagic] private predicate revFlowOut( DataFlowCall call, RetNodeEx ret, ReturnPosition pos, FlowState state, ReturnCtx returnCtx, ApOption returnAp, Ap ap ) { - exists(NodeEx out | + exists(NodeEx out, boolean cc | revFlow(out, state, returnCtx, returnAp, ap) and - flowOutOfCallAp(call, ret, pos, out, ap) + flowOutOfCallApValid(call, ret, pos, out, ap, cc) and + if returnCtx instanceof TReturnCtxNone then cc = false else cc = true ) } @@ -1921,7 +2055,8 @@ module MakeImpl { } additional predicate stats( - boolean fwd, int nodes, int fields, int conscand, int states, int tuples + boolean fwd, int nodes, int fields, int conscand, int states, int tuples, int calledges, + int tfnodes, int tftuples ) { fwd = true and nodes = count(NodeEx node | fwdFlow(node, _, _, _, _, _, _, _, _)) and @@ -1932,7 +2067,13 @@ module MakeImpl { count(NodeEx n, FlowState state, Cc cc, ParamNodeOption summaryCtx, TypOption argT, ApOption argAp, Typ t, Ap ap | fwdFlow(n, state, cc, summaryCtx, argT, argAp, t, ap, _) - ) + ) and + calledges = + count(DataFlowCall call, DataFlowCallable c | + FwdTypeFlowInput::dataFlowTakenCallEdgeIn(call, c, _) or + FwdTypeFlowInput::dataFlowTakenCallEdgeOut(call, c) + ) and + FwdTypeFlow::typeFlowStats(tfnodes, tftuples) or fwd = false and nodes = count(NodeEx node | revFlow(node, _, _, _, _)) and @@ -1942,7 +2083,13 @@ module MakeImpl { tuples = count(NodeEx n, FlowState state, ReturnCtx returnCtx, ApOption retAp, Ap ap | revFlow(n, state, returnCtx, retAp, ap) - ) + ) and + calledges = + count(DataFlowCall call, DataFlowCallable c | + RevTypeFlowInput::dataFlowTakenCallEdgeIn(call, c, _) or + RevTypeFlowInput::dataFlowTakenCallEdgeOut(call, c) + ) and + RevTypeFlow::typeFlowStats(tfnodes, tftuples) } /* End: Stage logic. */ } @@ -3972,51 +4119,68 @@ module MakeImpl { * Calculates per-stage metrics for data flow. */ predicate stageStats( - int n, string stage, int nodes, int fields, int conscand, int states, int tuples + int n, string stage, int nodes, int fields, int conscand, int states, int tuples, + int calledges, int tfnodes, int tftuples ) { stage = "1 Fwd" and n = 10 and - Stage1::stats(true, nodes, fields, conscand, states, tuples) + Stage1::stats(true, nodes, fields, conscand, states, tuples) and + calledges = -1 and + tfnodes = -1 and + tftuples = -1 or stage = "1 Rev" and n = 15 and - Stage1::stats(false, nodes, fields, conscand, states, tuples) + Stage1::stats(false, nodes, fields, conscand, states, tuples) and + calledges = -1 and + tfnodes = -1 and + tftuples = -1 or stage = "2 Fwd" and n = 20 and - Stage2::stats(true, nodes, fields, conscand, states, tuples) + Stage2::stats(true, nodes, fields, conscand, states, tuples, calledges, tfnodes, tftuples) or stage = "2 Rev" and n = 25 and - Stage2::stats(false, nodes, fields, conscand, states, tuples) + Stage2::stats(false, nodes, fields, conscand, states, tuples, calledges, tfnodes, tftuples) or stage = "3 Fwd" and n = 30 and - Stage3::stats(true, nodes, fields, conscand, states, tuples) + Stage3::stats(true, nodes, fields, conscand, states, tuples, calledges, tfnodes, tftuples) or stage = "3 Rev" and n = 35 and - Stage3::stats(false, nodes, fields, conscand, states, tuples) + Stage3::stats(false, nodes, fields, conscand, states, tuples, calledges, tfnodes, tftuples) or stage = "4 Fwd" and n = 40 and - Stage4::stats(true, nodes, fields, conscand, states, tuples) + Stage4::stats(true, nodes, fields, conscand, states, tuples, calledges, tfnodes, tftuples) or stage = "4 Rev" and n = 45 and - Stage4::stats(false, nodes, fields, conscand, states, tuples) + Stage4::stats(false, nodes, fields, conscand, states, tuples, calledges, tfnodes, tftuples) or stage = "5 Fwd" and n = 50 and - Stage5::stats(true, nodes, fields, conscand, states, tuples) + Stage5::stats(true, nodes, fields, conscand, states, tuples, calledges, tfnodes, tftuples) or stage = "5 Rev" and n = 55 and - Stage5::stats(false, nodes, fields, conscand, states, tuples) + Stage5::stats(false, nodes, fields, conscand, states, tuples, calledges, tfnodes, tftuples) or - stage = "6 Fwd" and n = 60 and finalStats(true, nodes, fields, conscand, states, tuples) + stage = "6 Fwd" and + n = 60 and + finalStats(true, nodes, fields, conscand, states, tuples) and + calledges = -1 and + tfnodes = -1 and + tftuples = -1 or - stage = "6 Rev" and n = 65 and finalStats(false, nodes, fields, conscand, states, tuples) + stage = "6 Rev" and + n = 65 and + finalStats(false, nodes, fields, conscand, states, tuples) and + calledges = -1 and + tfnodes = -1 and + tftuples = -1 } module FlowExploration { diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImplCommon.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImplCommon.qll index 6fb53e04331a..c1f7965c20d7 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImplCommon.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImplCommon.qll @@ -890,6 +890,9 @@ module MakeImplCommon { cached predicate allowParameterReturnInSelfCached(ParamNode p) { allowParameterReturnInSelf(p) } + cached + predicate paramMustFlow(ParamNode p, ArgNode arg) { localMustFlowStep+(p, arg) } + cached newtype TCallContext = TAnyCallContext() or @@ -958,6 +961,364 @@ module MakeImplCommon { TApproxAccessPathFrontSome(ApproxAccessPathFront apf) } + bindingset[t1, t2] + pragma[inline_late] + private predicate typeStrongerThan0(DataFlowType t1, DataFlowType t2) { typeStrongerThan(t1, t2) } + + private predicate callEdge(DataFlowCall call, DataFlowCallable c, ArgNode arg, ParamNode p) { + viableParamArg(call, p, arg) and + c = getNodeEnclosingCallable(p) + } + + signature module TypeFlowInput { + /** Holds if the edge is possibly needed in the direction `call` to `c`. */ + predicate relevantCallEdgeIn(DataFlowCall call, DataFlowCallable c); + + /** Holds if the edge is possibly needed in the direction `c` to `call`. */ + predicate relevantCallEdgeOut(DataFlowCall call, DataFlowCallable c); + + /** + * Holds if the edge is followed in data flow in the direction `call` to `c` + * and the call context `cc`. + */ + predicate dataFlowTakenCallEdgeIn(DataFlowCall call, DataFlowCallable c, boolean cc); + + /** + * Holds if the edge is followed in data flow in the direction `c` to `call`. + */ + predicate dataFlowTakenCallEdgeOut(DataFlowCall call, DataFlowCallable c); + + /** + * Holds if data flow enters `c` with call context `cc` without using a call + * edge. + */ + predicate dataFlowNonCallEntry(DataFlowCallable c, boolean cc); + } + + /** + * Given a call graph for a set of flow paths, this module calculates the type + * flow between parameter and argument nodes in the cases where it is possible + * for a type to first be weakened and then strengthened again. When the + * stronger types at the end-points of such a type flow path are incompatible, + * the call relevant call edges can be excluded as impossible. + * + * The predicates `relevantCallEdgeIn` and `relevantCallEdgeOut` give the + * graph to be explored prior to the recursion, and the other three predicates + * are calculated in mutual recursion with the output of this module, which is + * given in `typeFlowValidEdgeIn` and `typeFlowValidEdgeOut`. + */ + module TypeFlow { + private predicate relevantCallEdge( + DataFlowCall call, DataFlowCallable c, ArgNode arg, ParamNode p + ) { + callEdge(call, c, arg, p) and + ( + Input::relevantCallEdgeIn(call, c) or + Input::relevantCallEdgeOut(call, c) + ) + } + + /** + * Holds if a sequence calls may propagates the value of `p` to some + * argument-to-parameter call edge that strengthens the static type. + */ + pragma[nomagic] + private predicate trackedParamTypeCand(ParamNode p) { + exists(ArgNode arg | + trackedArgTypeCand(arg) and + paramMustFlow(p, arg) + ) + } + + /** + * Holds if a sequence calls may propagates the value of `arg` to some + * argument-to-parameter call edge that strengthens the static type. + */ + pragma[nomagic] + private predicate trackedArgTypeCand(ArgNode arg) { + exists(ParamNode p, DataFlowType at, DataFlowType pt | + at = getNodeType(arg) and + pt = getNodeType(p) and + relevantCallEdge(_, _, arg, p) and + typeStrongerThan0(pt, at) + ) + or + exists(ParamNode p, DataFlowType at, DataFlowType pt | + at = getNodeType(arg) and + pt = getNodeType(p) and + paramMustFlow(p, arg) and + relevantCallEdge(_, _, arg, _) and + typeStrongerThan0(at, pt) + ) + or + exists(ParamNode p | + trackedParamTypeCand(p) and + relevantCallEdge(_, _, arg, p) + ) + } + + /** + * Holds if `p` is part of a value-propagating call path where the + * end-points have stronger types than the intermediate parameter and + * argument nodes. + */ + private predicate trackedParamType(ParamNode p) { + exists( + DataFlowCall call1, DataFlowCallable c1, ArgNode argOut, DataFlowCall call2, + DataFlowCallable c2, ArgNode argIn + | + trackedParamTypeCand(p) and + callEdge(call1, c1, argOut, _) and + Input::relevantCallEdgeOut(call1, c1) and + trackedArgTypeCand(argOut) and + paramMustFlow(p, argOut) and + callEdge(call2, c2, argIn, _) and + Input::relevantCallEdgeIn(call2, c2) and + trackedArgTypeCand(argIn) and + paramMustFlow(p, argIn) + ) + or + exists(ArgNode arg, DataFlowType at, DataFlowType pt | + trackedParamTypeCand(p) and + at = getNodeType(arg) and + pt = getNodeType(p) and + relevantCallEdge(_, _, arg, p) and + typeStrongerThan0(at, pt) + ) + or + exists(ArgNode arg | + trackedArgType(arg) and + relevantCallEdge(_, _, arg, p) and + trackedParamTypeCand(p) + ) + } + + /** + * Holds if `arg` is part of a value-propagating call path where the + * end-points have stronger types than the intermediate parameter and + * argument nodes. + */ + private predicate trackedArgType(ArgNode arg) { + exists(ParamNode p | + trackedParamType(p) and + paramMustFlow(p, arg) and + trackedArgTypeCand(arg) + ) + } + + pragma[nomagic] + private predicate returnCallDeterminesParam(DataFlowCall call, ParamNode p) { + exists(ArgNode arg | + trackedArgType(arg) and + arg.argumentOf(call, _) and + paramMustFlow(p, arg) + ) + } + + private predicate returnCallLeavesParamUndetermined(DataFlowCall call, ParamNode p) { + trackedParamType(p) and + call.getEnclosingCallable() = getNodeEnclosingCallable(p) and + not returnCallDeterminesParam(call, p) + } + + pragma[nomagic] + private predicate trackedParamWithType(ParamNode p, DataFlowType t, DataFlowCallable c) { + trackedParamType(p) and + c = getNodeEnclosingCallable(p) and + nodeDataFlowType(p, t) + } + + pragma[nomagic] + private predicate dataFlowTakenCallEdgeIn( + DataFlowCall call, DataFlowCallable c, ArgNode arg, ParamNode p, boolean cc + ) { + Input::dataFlowTakenCallEdgeIn(call, c, cc) and + callEdge(call, c, arg, p) and + trackedParamType(p) + } + + pragma[nomagic] + private predicate dataFlowTakenCallEdgeOut( + DataFlowCall call, DataFlowCallable c, ArgNode arg, ParamNode p + ) { + Input::dataFlowTakenCallEdgeOut(call, c) and + callEdge(call, c, arg, p) and + trackedArgType(arg) and + paramMustFlow(_, arg) + } + + /** + * Holds if `t` is a possible type for an argument reaching the tracked + * parameter `p` through an in-going edge in the current data flow stage. + */ + pragma[nomagic] + private predicate typeFlowParamTypeCand(ParamNode p, DataFlowType t) { + exists(ArgNode arg, boolean outercc | + dataFlowTakenCallEdgeIn(_, _, arg, p, outercc) and + if trackedArgType(arg) then typeFlowArgType(arg, t, outercc) else nodeDataFlowType(arg, t) + ) + } + + /** + * Holds if `t` is a possible type for the tracked parameter `p` in the call + * context `cc` and that the current data flow stage has reached this + * context. + */ + private predicate typeFlowParamType(ParamNode p, DataFlowType t, boolean cc) { + exists(DataFlowCallable c | + Input::dataFlowNonCallEntry(c, cc) and + trackedParamWithType(p, t, c) + ) + or + exists(DataFlowType t1, DataFlowType t2 | + cc = true and + typeFlowParamTypeCand(p, t1) and + nodeDataFlowType(p, t2) and + if typeStrongerThan(t2, t1) then t = t2 else (compatibleTypes(t1, t2) and t = t1) + ) + or + exists(ArgNode arg, DataFlowType t1, DataFlowType t2 | + cc = false and + typeFlowArgTypeFromReturn(arg, t1) and + paramMustFlow(p, arg) and + nodeDataFlowType(p, t2) and + if typeStrongerThan(t2, t1) then t = t2 else (compatibleTypes(t1, t2) and t = t1) + ) + or + exists(DataFlowCall call | + cc = false and + Input::dataFlowTakenCallEdgeOut(call, _) and + returnCallLeavesParamUndetermined(call, p) and + nodeDataFlowType(p, t) + ) + } + + /** + * Holds if `t` is a possible type for the tracked argument `arg` and that + * the current data flow stage has reached the call of `arg` from one of its + * call targets. + */ + private predicate typeFlowArgTypeFromReturn(ArgNode arg, DataFlowType t) { + exists(ParamNode p, DataFlowType t1, DataFlowType t2 | + dataFlowTakenCallEdgeOut(_, _, arg, p) and + (if trackedParamType(p) then typeFlowParamType(p, t1, false) else nodeDataFlowType(p, t1)) and + nodeDataFlowType(arg, t2) and + if typeStrongerThan(t2, t1) then t = t2 else (compatibleTypes(t1, t2) and t = t1) + ) + } + + /** + * Holds if `t` is a possible type for the tracked argument `arg` in the call + * context `cc` and that the current data flow stage has reached this + * context. + */ + private predicate typeFlowArgType(ArgNode arg, DataFlowType t, boolean cc) { + trackedArgType(arg) and + ( + exists(ParamNode p, DataFlowType t1, DataFlowType t2 | + paramMustFlow(p, arg) and + typeFlowParamType(p, t1, cc) and + nodeDataFlowType(arg, t2) and + if typeStrongerThan(t2, t1) then t = t2 else (compatibleTypes(t1, t2) and t = t1) + ) + or + cc = [true, false] and + not paramMustFlow(_, arg) and + nodeDataFlowType(arg, t) + ) + } + + predicate typeFlowStats(int nodes, int tuples) { + nodes = + count(Node n | + typeFlowParamType(n, _, _) or typeFlowArgTypeFromReturn(n, _) or typeFlowArgType(n, _, _) + ) and + tuples = + count(Node n, DataFlowType t, boolean cc | + typeFlowParamType(n, t, cc) + or + typeFlowArgTypeFromReturn(n, t) and cc = false + or + typeFlowArgType(n, t, cc) + ) + } + + /** + * Holds if the `arg`-to-`p` edge should be considered for validation of the + * corresponding call edge in the in-going direction. + */ + private predicate relevantArgParamIn(ArgNode arg, ParamNode p, DataFlowType pt) { + exists(DataFlowCall call, DataFlowCallable c | + Input::relevantCallEdgeIn(call, c) and + callEdge(call, c, arg, p) and + paramMustFlow(_, arg) and + trackedArgType(arg) and + nodeDataFlowType(p, pt) + ) + } + + /** + * Holds if there is a possible type for `arg` in the call context `cc` that + * is consistent with the static type of `p`. + */ + private predicate validArgParamIn(ArgNode arg, ParamNode p, boolean cc) { + exists(DataFlowType t1, DataFlowType t2 | + typeFlowArgType(arg, t1, cc) and + relevantArgParamIn(arg, p, t2) and + compatibleTypes(t1, t2) + ) + } + + /** + * Holds if the edge `call`-to-`c` is valid in the in-going direction in the + * call context `cc`. + */ + predicate typeFlowValidEdgeIn(DataFlowCall call, DataFlowCallable c, boolean cc) { + Input::relevantCallEdgeIn(call, c) and + cc = [true, false] and + forall(ArgNode arg, ParamNode p | + callEdge(call, c, arg, p) and trackedArgType(arg) and paramMustFlow(_, arg) + | + validArgParamIn(arg, p, cc) + ) + } + + /** + * Holds if the `arg`-to-`p` edge should be considered for validation of the + * corresponding call edge in the out-going direction. + */ + private predicate relevantArgParamOut(ArgNode arg, ParamNode p, DataFlowType argt) { + exists(DataFlowCall call, DataFlowCallable c | + Input::relevantCallEdgeOut(call, c) and + callEdge(call, c, arg, p) and + trackedParamType(p) and + nodeDataFlowType(arg, argt) + ) + } + + /** + * Holds if there is a possible type for `p` in the call context `false` + * that is consistent with the static type of `arg`. + */ + private predicate validArgParamOut(ArgNode arg, ParamNode p) { + exists(DataFlowType t1, DataFlowType t2 | + typeFlowParamType(p, t1, false) and + relevantArgParamOut(arg, p, t2) and + compatibleTypes(t1, t2) + ) + } + + /** + * Holds if the edge `call`-to-`c` is valid in the out-going direction. + */ + predicate typeFlowValidEdgeOut(DataFlowCall call, DataFlowCallable c) { + Input::relevantCallEdgeOut(call, c) and + forall(ArgNode arg, ParamNode p | callEdge(call, c, arg, p) and trackedParamType(p) | + validArgParamOut(arg, p) + ) + } + } + /** * Holds if the call context `call` either improves virtual dispatch in * `callable` or if it allows us to prune unreachable nodes in `callable`. From e677c1ffe6a0b0f6e575b45270af329454db2cde Mon Sep 17 00:00:00 2001 From: Anders Schack-Mulligen Date: Mon, 4 Sep 2023 14:04:32 +0200 Subject: [PATCH 05/15] Java: Add qltest --- .../dataflow/typeflow-dispatch/A.java | 60 +++++++++++++++++++ .../dataflow/typeflow-dispatch/test.expected | 0 .../dataflow/typeflow-dispatch/test.ql | 2 + 3 files changed, 62 insertions(+) create mode 100644 java/ql/test/library-tests/dataflow/typeflow-dispatch/A.java create mode 100644 java/ql/test/library-tests/dataflow/typeflow-dispatch/test.expected create mode 100644 java/ql/test/library-tests/dataflow/typeflow-dispatch/test.ql diff --git a/java/ql/test/library-tests/dataflow/typeflow-dispatch/A.java b/java/ql/test/library-tests/dataflow/typeflow-dispatch/A.java new file mode 100644 index 000000000000..f40f5a69d4cb --- /dev/null +++ b/java/ql/test/library-tests/dataflow/typeflow-dispatch/A.java @@ -0,0 +1,60 @@ +import java.util.*; +import java.util.function.*; + +public class A { + static String source(String tag) { return null; } + + static void sink(Object o) { } + + interface MyConsumer { + void run(Object o); + } + + void apply(MyConsumer f, Object x) { + f.run(x); + } + + void apply_wrap(MyConsumer f, Object x) { + apply(f, x); + } + + void testLambdaDispatch1() { + apply_wrap(x -> { sink(x); }, source("A")); // $ hasValueFlow=A + apply_wrap(x -> { sink(x); }, null); // no flow + apply_wrap(x -> { }, source("B")); + apply_wrap(x -> { }, null); + } + + void forEach_wrap(List l, Consumer f) { + l.forEach(f); + } + + void testLambdaDispatch2() { + List tainted = new ArrayList<>(); + tainted.add(source("L")); + List safe = new ArrayList<>(); + forEach_wrap(safe, x -> { sink(x); }); // no flow + forEach_wrap(tainted, x -> { sink(x); }); // $ hasValueFlow=L + } + + static class TaintedClass { + public String toString() { return source("TaintedClass"); } + } + + static class SafeClass { + public String toString() { return "safe"; } + } + + String convertToString(Object o) { + return o.toString(); + } + + String convertToString_wrap(Object o) { + return convertToString(o); + } + + void testToString1() { + String unused = convertToString_wrap(new TaintedClass()); + sink(convertToString_wrap(new SafeClass())); // no flow + } +} diff --git a/java/ql/test/library-tests/dataflow/typeflow-dispatch/test.expected b/java/ql/test/library-tests/dataflow/typeflow-dispatch/test.expected new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/java/ql/test/library-tests/dataflow/typeflow-dispatch/test.ql b/java/ql/test/library-tests/dataflow/typeflow-dispatch/test.ql new file mode 100644 index 000000000000..50e3f8d2f7de --- /dev/null +++ b/java/ql/test/library-tests/dataflow/typeflow-dispatch/test.ql @@ -0,0 +1,2 @@ +import TestUtilities.InlineFlowTest +import DefaultFlowTest From 1750d00fbec40ccb0ca2528901abe5e7cad22dc9 Mon Sep 17 00:00:00 2001 From: Anders Schack-Mulligen Date: Mon, 4 Sep 2023 16:12:48 +0200 Subject: [PATCH 06/15] C#: Add localMustFlowStep --- .../dataflow/internal/DataFlowPrivate.qll | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowPrivate.qll b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowPrivate.qll index dd8438900bde..c1c851d18fd3 100644 --- a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowPrivate.qll +++ b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowPrivate.qll @@ -533,8 +533,36 @@ module LocalFlow { ) and not exists(getALastEvalNode(result)) } + + /** + * Holds if the value of `node2` is given by `node1`. + */ + predicate localMustFlowStep(Node node1, Node node2) { + exists(Callable c, Expr e | + node1.(InstanceParameterNode).getCallable() = c and + node2.asExpr() = e and + (e instanceof ThisAccess or e instanceof BaseAccess) and + c = e.getEnclosingCallable() + ) + or + hasNodePath(any(LocalExprStepConfiguration x), node1, node2) and + (node2 instanceof SsaDefinitionExtNode or node2.asExpr() instanceof Cast) + or + exists(SsaImpl::Definition def | + def = getSsaDefinitionExt(node1) and + exists(SsaImpl::getAReadAtNode(def, node2.(ExprNode).getControlFlowNode())) + ) + or + node1 = + unique(FlowSummaryNode n1 | + FlowSummaryImpl::Private::Steps::summaryLocalStep(n1.getSummaryNode(), + node2.(FlowSummaryNode).getSummaryNode(), true) + ) + } } +predicate localMustFlowStep = LocalFlow::localMustFlowStep/2; + /** * This is the local flow predicate that is used as a building block in global * data flow. It excludes SSA flow through instance fields, as flow through fields From f5a4b792bd792818bfe388e1d948126c753cc055 Mon Sep 17 00:00:00 2001 From: Anders Schack-Mulligen Date: Mon, 4 Sep 2023 16:17:10 +0200 Subject: [PATCH 07/15] C++/Go/Python/Ruby/Swift: Add dummy localMustFlowStep. --- .../lib/semmle/code/cpp/dataflow/internal/DataFlowPrivate.qll | 2 ++ .../semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll | 2 ++ go/ql/lib/semmle/go/dataflow/internal/DataFlowPrivate.qll | 2 ++ .../lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll | 2 ++ ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll | 2 ++ swift/ql/lib/codeql/swift/dataflow/internal/DataFlowPrivate.qll | 2 ++ 6 files changed, 12 insertions(+) diff --git a/cpp/ql/lib/semmle/code/cpp/dataflow/internal/DataFlowPrivate.qll b/cpp/ql/lib/semmle/code/cpp/dataflow/internal/DataFlowPrivate.qll index 00eca92b3e44..a62e080dfb05 100644 --- a/cpp/ql/lib/semmle/code/cpp/dataflow/internal/DataFlowPrivate.qll +++ b/cpp/ql/lib/semmle/code/cpp/dataflow/internal/DataFlowPrivate.qll @@ -208,6 +208,8 @@ predicate expectsContent(Node n, ContentSet c) { none() } predicate typeStrongerThan(DataFlowType t1, DataFlowType t2) { none() } +predicate localMustFlowStep(Node node1, Node node2) { none() } + /** Gets the type of `n` used for type pruning. */ Type getNodeType(Node n) { suppressUnusedNode(n) and diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll index 7e4b87e38538..066e177fea07 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll @@ -804,6 +804,8 @@ predicate expectsContent(Node n, ContentSet c) { none() } predicate typeStrongerThan(DataFlowType t1, DataFlowType t2) { none() } +predicate localMustFlowStep(Node node1, Node node2) { none() } + /** Gets the type of `n` used for type pruning. */ DataFlowType getNodeType(Node n) { suppressUnusedNode(n) and diff --git a/go/ql/lib/semmle/go/dataflow/internal/DataFlowPrivate.qll b/go/ql/lib/semmle/go/dataflow/internal/DataFlowPrivate.qll index 4c718bbe279f..525106603abc 100644 --- a/go/ql/lib/semmle/go/dataflow/internal/DataFlowPrivate.qll +++ b/go/ql/lib/semmle/go/dataflow/internal/DataFlowPrivate.qll @@ -205,6 +205,8 @@ predicate expectsContent(Node n, ContentSet c) { predicate typeStrongerThan(DataFlowType t1, DataFlowType t2) { none() } +predicate localMustFlowStep(Node node1, Node node2) { none() } + /** Gets the type of `n` used for type pruning. */ DataFlowType getNodeType(Node n) { result = TTodoDataFlowType() and exists(n) } diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll index e98d4d2de62b..45e2a541edad 100644 --- a/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll +++ b/python/ql/lib/semmle/python/dataflow/new/internal/DataFlowPrivate.qll @@ -539,6 +539,8 @@ predicate compatibleTypes(DataFlowType t1, DataFlowType t2) { any() } predicate typeStrongerThan(DataFlowType t1, DataFlowType t2) { none() } +predicate localMustFlowStep(Node node1, Node node2) { none() } + /** * Gets the type of `node`. */ diff --git a/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll b/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll index f53fb80fcea7..8eb3cfdd2ecb 100644 --- a/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll +++ b/ruby/ql/lib/codeql/ruby/dataflow/internal/DataFlowPrivate.qll @@ -1697,6 +1697,8 @@ private predicate mustHaveLambdaType(CfgNodes::ExprCfgNode e, Callable c) { ) } +predicate localMustFlowStep(Node node1, Node node2) { none() } + /** Gets the type of `n` used for type pruning. */ DataFlowType getNodeType(Node n) { result = TLambdaDataFlowType(n.(LambdaSelfReferenceNode).getCallable()) diff --git a/swift/ql/lib/codeql/swift/dataflow/internal/DataFlowPrivate.qll b/swift/ql/lib/codeql/swift/dataflow/internal/DataFlowPrivate.qll index 3255afb19db5..3e1ccc15a209 100644 --- a/swift/ql/lib/codeql/swift/dataflow/internal/DataFlowPrivate.qll +++ b/swift/ql/lib/codeql/swift/dataflow/internal/DataFlowPrivate.qll @@ -1038,6 +1038,8 @@ class DataFlowType extends TDataFlowType { predicate typeStrongerThan(DataFlowType t1, DataFlowType t2) { none() } +predicate localMustFlowStep(Node node1, Node node2) { none() } + /** Gets the type of `n` used for type pruning. */ DataFlowType getNodeType(Node n) { any() // return the singleton DataFlowType until we support type pruning for Swift From 47f68504a82af70c9c3f2bb0221953cc6ef103eb Mon Sep 17 00:00:00 2001 From: Anders Schack-Mulligen Date: Fri, 8 Sep 2023 13:40:31 +0200 Subject: [PATCH 08/15] Dataflow: Postpone typeflow calledge pruning until stage 3. --- .../codeql/dataflow/internal/DataFlowImpl.qll | 8 +++ .../dataflow/internal/DataFlowImplCommon.qll | 61 +++++++++++-------- 2 files changed, 45 insertions(+), 24 deletions(-) diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll index 3e62803b09d1..335fadc6817e 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll @@ -1185,6 +1185,8 @@ module MakeImpl { bindingset[typ, contentType] predicate typecheckStore(Typ typ, DataFlowType contentType); + + default predicate enableTypeFlow() { any() } } module Stage implements StageSig { @@ -1541,6 +1543,8 @@ module MakeImpl { } private module FwdTypeFlowInput implements TypeFlowInput { + predicate enableTypeFlow = Param::enableTypeFlow/0; + predicate relevantCallEdgeIn(DataFlowCall call, DataFlowCallable c) { flowIntoCallApa(call, c, _, _, _, _) } @@ -1845,6 +1849,8 @@ module MakeImpl { } private module RevTypeFlowInput implements TypeFlowInput { + predicate enableTypeFlow = Param::enableTypeFlow/0; + predicate relevantCallEdgeIn(DataFlowCall call, DataFlowCallable c) { flowOutOfCallAp(call, c, _, _, _, _) } @@ -2306,6 +2312,8 @@ module MakeImpl { bindingset[typ, contentType] predicate typecheckStore(Typ typ, DataFlowType contentType) { any() } + + predicate enableTypeFlow() { none() } } private module Stage2 implements StageSig { diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImplCommon.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImplCommon.qll index c1f7965c20d7..14508eb89eea 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImplCommon.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImplCommon.qll @@ -971,6 +971,8 @@ module MakeImplCommon { } signature module TypeFlowInput { + predicate enableTypeFlow(); + /** Holds if the edge is possibly needed in the direction `call` to `c`. */ predicate relevantCallEdgeIn(DataFlowCall call, DataFlowCallable c); @@ -1036,24 +1038,27 @@ module MakeImplCommon { */ pragma[nomagic] private predicate trackedArgTypeCand(ArgNode arg) { - exists(ParamNode p, DataFlowType at, DataFlowType pt | - at = getNodeType(arg) and - pt = getNodeType(p) and - relevantCallEdge(_, _, arg, p) and - typeStrongerThan0(pt, at) - ) - or - exists(ParamNode p, DataFlowType at, DataFlowType pt | - at = getNodeType(arg) and - pt = getNodeType(p) and - paramMustFlow(p, arg) and - relevantCallEdge(_, _, arg, _) and - typeStrongerThan0(at, pt) - ) - or - exists(ParamNode p | - trackedParamTypeCand(p) and - relevantCallEdge(_, _, arg, p) + Input::enableTypeFlow() and + ( + exists(ParamNode p, DataFlowType at, DataFlowType pt | + at = getNodeType(arg) and + pt = getNodeType(p) and + relevantCallEdge(_, _, arg, p) and + typeStrongerThan0(pt, at) + ) + or + exists(ParamNode p, DataFlowType at, DataFlowType pt | + at = getNodeType(arg) and + pt = getNodeType(p) and + paramMustFlow(p, arg) and + relevantCallEdge(_, _, arg, _) and + typeStrongerThan0(at, pt) + ) + or + exists(ParamNode p | + trackedParamTypeCand(p) and + relevantCallEdge(_, _, arg, p) + ) ) } @@ -1276,10 +1281,14 @@ module MakeImplCommon { predicate typeFlowValidEdgeIn(DataFlowCall call, DataFlowCallable c, boolean cc) { Input::relevantCallEdgeIn(call, c) and cc = [true, false] and - forall(ArgNode arg, ParamNode p | - callEdge(call, c, arg, p) and trackedArgType(arg) and paramMustFlow(_, arg) - | - validArgParamIn(arg, p, cc) + ( + not Input::enableTypeFlow() + or + forall(ArgNode arg, ParamNode p | + callEdge(call, c, arg, p) and trackedArgType(arg) and paramMustFlow(_, arg) + | + validArgParamIn(arg, p, cc) + ) ) } @@ -1313,8 +1322,12 @@ module MakeImplCommon { */ predicate typeFlowValidEdgeOut(DataFlowCall call, DataFlowCallable c) { Input::relevantCallEdgeOut(call, c) and - forall(ArgNode arg, ParamNode p | callEdge(call, c, arg, p) and trackedParamType(p) | - validArgParamOut(arg, p) + ( + not Input::enableTypeFlow() + or + forall(ArgNode arg, ParamNode p | callEdge(call, c, arg, p) and trackedParamType(p) | + validArgParamOut(arg, p) + ) ) } } From 13e7e6b98356ccf53662e5fdfc1be9614141441d Mon Sep 17 00:00:00 2001 From: Anders Schack-Mulligen Date: Tue, 12 Sep 2023 14:17:23 +0200 Subject: [PATCH 09/15] Dataflow: Improve fwd-taken call edge predicate and improve fwd-to-rev call edge pruning. --- .../codeql/dataflow/internal/DataFlowImpl.qll | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll index 335fadc6817e..c437f17868b6 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll @@ -1555,8 +1555,9 @@ module MakeImpl { pragma[nomagic] predicate dataFlowTakenCallEdgeIn(DataFlowCall call, DataFlowCallable c, boolean cc) { - exists(ParamNodeEx p, Cc outercc | - fwdFlowIn(call, p, _, outercc, _, _, _, _, _, _, _) and + exists(ParamNodeEx p, Cc outercc, FlowState state, Cc innercc, Typ t, Ap ap | + fwdFlowIn(call, p, state, outercc, innercc, _, _, _, t, ap, _) and + fwdFlow1(p, state, innercc, _, _, _, t, _, ap, _) and c = p.getEnclosingCallable() and if outercc instanceof CcCall then cc = true else cc = false ) @@ -1564,7 +1565,10 @@ module MakeImpl { pragma[nomagic] predicate dataFlowTakenCallEdgeOut(DataFlowCall call, DataFlowCallable c) { - fwdFlowOut(call, c, _, _, _, _, _, _, _, _, _) + exists(NodeEx node, FlowState state, Cc cc, Typ t, Ap ap | + fwdFlowOut(call, c, node, state, cc, _, _, _, t, ap, _) and + fwdFlow1(node, state, cc, _, _, _, t, _, ap, _) + ) } predicate dataFlowNonCallEntry(DataFlowCallable c, boolean cc) { @@ -1585,12 +1589,12 @@ module MakeImpl { private module FwdTypeFlow = TypeFlow; - private predicate flowIntoCallApaValid( + private predicate flowIntoCallApaTaken( DataFlowCall call, DataFlowCallable c, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, ApApprox apa ) { flowIntoCallApa(call, c, arg, p, allowsFieldFlow, apa) and - FwdTypeFlow::typeFlowValidEdgeIn(call, c, _) + FwdTypeFlowInput::dataFlowTakenCallEdgeIn(call, c, _) } pragma[nomagic] @@ -1692,7 +1696,7 @@ module MakeImpl { exists(ApApprox argApa, Typ argT | returnFlowsThrough(_, _, _, _, pragma[only_bind_into](p), pragma[only_bind_into](argT), pragma[only_bind_into](argAp), ap) and - flowIntoCallApaValid(call, _, pragma[only_bind_into](arg), p, allowsFieldFlow, argApa) and + flowIntoCallApaTaken(call, _, pragma[only_bind_into](arg), p, allowsFieldFlow, argApa) and fwdFlow(arg, _, _, _, _, _, pragma[only_bind_into](argT), pragma[only_bind_into](argAp), argApa) and if allowsFieldFlow = false then argAp instanceof ApNil else any() @@ -1704,7 +1708,7 @@ module MakeImpl { DataFlowCall call, DataFlowCallable c, ArgNodeEx arg, ParamNodeEx p, Ap ap ) { exists(ApApprox apa, boolean allowsFieldFlow | - flowIntoCallApaValid(call, c, arg, p, allowsFieldFlow, apa) and + flowIntoCallApaTaken(call, c, arg, p, allowsFieldFlow, apa) and fwdFlow(arg, _, _, _, _, _, _, ap, apa) and if allowsFieldFlow = false then ap instanceof ApNil else any() ) @@ -1721,8 +1725,8 @@ module MakeImpl { pos = ret.getReturnPosition() and if allowsFieldFlow = false then ap instanceof ApNil else any() | - FwdTypeFlow::typeFlowValidEdgeIn(call, c, _) or - FwdTypeFlow::typeFlowValidEdgeOut(call, c) + FwdTypeFlowInput::dataFlowTakenCallEdgeIn(call, c, _) or + FwdTypeFlowInput::dataFlowTakenCallEdgeOut(call, c) ) } From f456bf8d574fc56e4a87f0f14c59b4da85c4b90a Mon Sep 17 00:00:00 2001 From: Anders Schack-Mulligen Date: Tue, 12 Sep 2023 14:25:17 +0200 Subject: [PATCH 10/15] Dataflow: Add change note. --- shared/dataflow/change-notes/2023-09-12-typeflow.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 shared/dataflow/change-notes/2023-09-12-typeflow.md diff --git a/shared/dataflow/change-notes/2023-09-12-typeflow.md b/shared/dataflow/change-notes/2023-09-12-typeflow.md new file mode 100644 index 000000000000..e5f010354789 --- /dev/null +++ b/shared/dataflow/change-notes/2023-09-12-typeflow.md @@ -0,0 +1,4 @@ +--- +category: majorAnalysis +--- +* Added support for type-based call edge pruning. This removes data flow call edges that are incompatible with the set of flow paths that reach it based on type information. This improves dispatch precision for constructs like lambdas, `Object.toString()` calls, and the visitor pattern. For now this is only enabled for Java and C#. From b456ba217ae56e0baba0547769fb08ce963f7e21 Mon Sep 17 00:00:00 2001 From: Anders Schack-Mulligen Date: Wed, 13 Sep 2023 11:40:36 +0200 Subject: [PATCH 11/15] Dataflow: Improve rev-to-fwd call edge pruning. --- .../codeql/dataflow/internal/DataFlowImpl.qll | 175 ++++++++---------- 1 file changed, 76 insertions(+), 99 deletions(-) diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll index c437f17868b6..8d334a6fe532 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll @@ -917,22 +917,46 @@ module MakeImpl { ) } + predicate callEdgeArgParam( + DataFlowCall call, DataFlowCallable c, ArgNodeEx arg, ParamNodeEx p, + boolean allowsFieldFlow, Ap ap + ) { + flowIntoCallNodeCand1(call, arg, p, allowsFieldFlow) and + c = p.getEnclosingCallable() and + exists(ap) + } + + predicate callEdgeReturn( + DataFlowCall call, DataFlowCallable c, RetNodeEx ret, ReturnKindExt kind, NodeEx out, + boolean allowsFieldFlow, Ap ap + ) { + flowOutOfCallNodeCand1(call, ret, kind, out, allowsFieldFlow) and + c = ret.getEnclosingCallable() and + exists(ap) + } + additional predicate stats( - boolean fwd, int nodes, int fields, int conscand, int states, int tuples + boolean fwd, int nodes, int fields, int conscand, int states, int tuples, int calledges ) { fwd = true and nodes = count(NodeEx node | fwdFlow(node)) and fields = count(Content f0 | fwdFlowConsCand(f0)) and conscand = -1 and states = count(FlowState state | fwdFlowState(state)) and - tuples = count(NodeEx n, boolean b | fwdFlow(n, b)) + tuples = count(NodeEx n, boolean b | fwdFlow(n, b)) and + calledges = -1 or fwd = false and nodes = count(NodeEx node | revFlow(node, _)) and fields = count(Content f0 | revFlowConsCand(f0)) and conscand = -1 and states = count(FlowState state | revFlowState(state)) and - tuples = count(NodeEx n, boolean b | revFlow(n, b)) + tuples = count(NodeEx n, boolean b | revFlow(n, b)) and + calledges = + count(DataFlowCall call, DataFlowCallable c | + callEdgeArgParam(call, c, _, _, _, _) or + callEdgeReturn(call, c, _, _, _, _, _) + ) } /* End: Stage 1 logic. */ } @@ -1093,6 +1117,16 @@ module MakeImpl { ); predicate readStepCand(NodeEx n1, Content c, NodeEx n2); + + predicate callEdgeArgParam( + DataFlowCall call, DataFlowCallable c, ArgNodeEx arg, ParamNodeEx p, + boolean allowsFieldFlow, Ap ap + ); + + predicate callEdgeReturn( + DataFlowCall call, DataFlowCallable c, RetNodeEx ret, ReturnKindExt kind, NodeEx out, + boolean allowsFieldFlow, Ap ap + ); } private module MkStage { @@ -1172,14 +1206,6 @@ module MakeImpl { Typ t, LocalCc lcc ); - predicate flowOutOfCall( - DataFlowCall call, RetNodeEx ret, ReturnKindExt kind, NodeEx out, boolean allowsFieldFlow - ); - - predicate flowIntoCall( - DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow - ); - bindingset[node, state, t0, ap] predicate filter(NodeEx node, FlowState state, Typ t0, Ap ap, Typ t); @@ -1202,35 +1228,13 @@ module MakeImpl { PrevStage::revFlow(node) and result = getTyp(node.getDataFlowType()) } - pragma[nomagic] - private predicate flowIntoCallApa( - DataFlowCall call, DataFlowCallable c, ArgNodeEx arg, ParamNodeEx p, - boolean allowsFieldFlow, ApApprox apa - ) { - flowIntoCall(call, arg, p, allowsFieldFlow) and - PrevStage::revFlowAp(p, pragma[only_bind_into](apa)) and - PrevStage::revFlowAp(arg, pragma[only_bind_into](apa)) and - c = p.getEnclosingCallable() - } - - pragma[nomagic] - private predicate flowOutOfCallApa( - DataFlowCall call, DataFlowCallable c, RetNodeEx ret, ReturnKindExt kind, NodeEx out, - boolean allowsFieldFlow, ApApprox apa - ) { - flowOutOfCall(call, ret, kind, out, allowsFieldFlow) and - PrevStage::revFlowAp(out, pragma[only_bind_into](apa)) and - PrevStage::revFlowAp(ret, pragma[only_bind_into](apa)) and - c = ret.getEnclosingCallable() - } - pragma[nomagic] private predicate flowThroughOutOfCall( DataFlowCall call, CcCall ccc, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, ApApprox argApa, ApApprox apa ) { exists(ReturnKindExt kind | - flowOutOfCallApa(call, _, ret, kind, out, allowsFieldFlow, apa) and + PrevStage::callEdgeReturn(call, _, ret, kind, out, allowsFieldFlow, apa) and PrevStage::callMayFlowThroughRev(call) and PrevStage::returnMayFlowThrough(ret, argApa, apa, kind) and matchesCall(ccc, call) @@ -1434,7 +1438,7 @@ module MakeImpl { DataFlowCall call, DataFlowCallable c, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, ApApprox apa ) { - flowIntoCallApa(call, c, arg, p, allowsFieldFlow, apa) + PrevStage::callEdgeArgParam(call, c, arg, p, allowsFieldFlow, apa) } bindingset[call, ctx] @@ -1499,7 +1503,7 @@ module MakeImpl { DataFlowCall call, DataFlowCallable c, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, ApApprox apa ) { - flowOutOfCallApa(call, c, ret, _, out, allowsFieldFlow, apa) + PrevStage::callEdgeReturn(call, c, ret, _, out, allowsFieldFlow, apa) } bindingset[c, ret, apa, innercc] @@ -1510,7 +1514,7 @@ module MakeImpl { ApApprox apa, CcNoCall innercc ) { viableImplNotCallContextReducedReverse(innercc) and - flowOutOfCallApa(call, c, ret, _, out, allowsFieldFlow, apa) + PrevStage::callEdgeReturn(call, c, ret, _, out, allowsFieldFlow, apa) } pragma[nomagic] @@ -1546,11 +1550,11 @@ module MakeImpl { predicate enableTypeFlow = Param::enableTypeFlow/0; predicate relevantCallEdgeIn(DataFlowCall call, DataFlowCallable c) { - flowIntoCallApa(call, c, _, _, _, _) + PrevStage::callEdgeArgParam(call, c, _, _, _, _) } predicate relevantCallEdgeOut(DataFlowCall call, DataFlowCallable c) { - flowOutOfCallApa(call, c, _, _, _, _, _) + PrevStage::callEdgeReturn(call, c, _, _, _, _, _) } pragma[nomagic] @@ -1593,7 +1597,7 @@ module MakeImpl { DataFlowCall call, DataFlowCallable c, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, ApApprox apa ) { - flowIntoCallApa(call, c, arg, p, allowsFieldFlow, apa) and + PrevStage::callEdgeArgParam(call, c, arg, p, allowsFieldFlow, apa) and FwdTypeFlowInput::dataFlowTakenCallEdgeIn(call, c, _) } @@ -1720,7 +1724,7 @@ module MakeImpl { Ap ap ) { exists(ApApprox apa, boolean allowsFieldFlow | - flowOutOfCallApa(call, c, ret, _, out, allowsFieldFlow, apa) and + PrevStage::callEdgeReturn(call, c, ret, _, out, allowsFieldFlow, apa) and fwdFlow(ret, _, _, _, _, _, _, ap, apa) and pos = ret.getReturnPosition() and if allowsFieldFlow = false then ap instanceof ApNil else any() @@ -2064,6 +2068,35 @@ module MakeImpl { ) } + predicate callEdgeArgParam( + DataFlowCall call, DataFlowCallable c, ArgNodeEx arg, ParamNodeEx p, + boolean allowsFieldFlow, Ap ap + ) { + exists(FlowState state | + flowIntoCallAp(call, c, arg, p, ap) and + revFlow(arg, pragma[only_bind_into](state), pragma[only_bind_into](ap)) and + revFlow(p, pragma[only_bind_into](state), pragma[only_bind_into](ap)) and + allowsFieldFlow = true + | + RevTypeFlowInput::dataFlowTakenCallEdgeIn(call, c, _) or + RevTypeFlowInput::dataFlowTakenCallEdgeOut(call, c) + ) + } + + predicate callEdgeReturn( + DataFlowCall call, DataFlowCallable c, RetNodeEx ret, ReturnKindExt kind, NodeEx out, + boolean allowsFieldFlow, Ap ap + ) { + exists(FlowState state, ReturnPosition pos | + flowOutOfCallAp(call, c, ret, pos, out, ap) and + revFlow(ret, pragma[only_bind_into](state), pragma[only_bind_into](ap)) and + revFlow(out, pragma[only_bind_into](state), pragma[only_bind_into](ap)) and + kind = pos.getKind() and + allowsFieldFlow = true and + RevTypeFlowInput::dataFlowTakenCallEdgeIn(call, c, _) + ) + } + additional predicate stats( boolean fwd, int nodes, int fields, int conscand, int states, int tuples, int calledges, int tfnodes, int tftuples @@ -2287,10 +2320,6 @@ module MakeImpl { exists(lcc) } - predicate flowOutOfCall = flowOutOfCallNodeCand1/5; - - predicate flowIntoCall = flowIntoCallNodeCand1/4; - pragma[nomagic] private predicate expectsContentCand(NodeEx node) { exists(Content c | @@ -2544,10 +2573,6 @@ module MakeImpl { exists(lcc) } - predicate flowOutOfCall = flowOutOfCallNodeCand2/5; - - predicate flowIntoCall = flowIntoCallNodeCand2/4; - pragma[nomagic] private predicate expectsContentCand(NodeEx node, Ap ap) { exists(Content c | @@ -2639,29 +2664,6 @@ module MakeImpl { exists(lcc) } - pragma[nomagic] - predicate flowOutOfCall( - DataFlowCall call, RetNodeEx node1, ReturnKindExt kind, NodeEx node2, - boolean allowsFieldFlow - ) { - exists(FlowState state | - flowOutOfCallNodeCand2(call, node1, kind, node2, allowsFieldFlow) and - PrevStage::revFlow(node2, pragma[only_bind_into](state), _) and - PrevStage::revFlow(node1, pragma[only_bind_into](state), _) - ) - } - - pragma[nomagic] - predicate flowIntoCall( - DataFlowCall call, ArgNodeEx node1, ParamNodeEx node2, boolean allowsFieldFlow - ) { - exists(FlowState state | - flowIntoCallNodeCand2(call, node1, node2, allowsFieldFlow) and - PrevStage::revFlow(node2, pragma[only_bind_into](state), _) and - PrevStage::revFlow(node1, pragma[only_bind_into](state), _) - ) - } - pragma[nomagic] private predicate clearSet(NodeEx node, ContentSet c) { PrevStage::revFlow(node) and @@ -2944,29 +2946,6 @@ module MakeImpl { PrevStage::revFlow(node2, pragma[only_bind_into](state2), _) } - pragma[nomagic] - predicate flowOutOfCall( - DataFlowCall call, RetNodeEx node1, ReturnKindExt kind, NodeEx node2, - boolean allowsFieldFlow - ) { - exists(FlowState state | - flowOutOfCallNodeCand2(call, node1, kind, node2, allowsFieldFlow) and - PrevStage::revFlow(node2, pragma[only_bind_into](state), _) and - PrevStage::revFlow(node1, pragma[only_bind_into](state), _) - ) - } - - pragma[nomagic] - predicate flowIntoCall( - DataFlowCall call, ArgNodeEx node1, ParamNodeEx node2, boolean allowsFieldFlow - ) { - exists(FlowState state | - flowIntoCallNodeCand2(call, node1, node2, allowsFieldFlow) and - PrevStage::revFlow(node2, pragma[only_bind_into](state), _) and - PrevStage::revFlow(node1, pragma[only_bind_into](state), _) - ) - } - bindingset[node, state, t0, ap] predicate filter(NodeEx node, FlowState state, Typ t0, Ap ap, Typ t) { strengthenType(node, t0, t) and @@ -4136,15 +4115,13 @@ module MakeImpl { ) { stage = "1 Fwd" and n = 10 and - Stage1::stats(true, nodes, fields, conscand, states, tuples) and - calledges = -1 and + Stage1::stats(true, nodes, fields, conscand, states, tuples, calledges) and tfnodes = -1 and tftuples = -1 or stage = "1 Rev" and n = 15 and - Stage1::stats(false, nodes, fields, conscand, states, tuples) and - calledges = -1 and + Stage1::stats(false, nodes, fields, conscand, states, tuples, calledges) and tfnodes = -1 and tftuples = -1 or From b63d51811454747c4ee037a1ddbcbc70942224ab Mon Sep 17 00:00:00 2001 From: Anders Schack-Mulligen Date: Wed, 13 Sep 2023 12:50:28 +0200 Subject: [PATCH 12/15] Dataflow: Minor perf tweak. --- shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll index 8d334a6fe532..4460bba4646e 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll @@ -1459,8 +1459,7 @@ module MakeImpl { pragma[nomagic] private predicate fwdFlowInCand( DataFlowCall call, DataFlowCallable inner, ParamNodeEx p, FlowState state, Cc outercc, - CcCall innercc, ParamNodeOption summaryCtx, TypOption argT, ApOption argAp, Typ t, Ap ap, - ApApprox apa + ParamNodeOption summaryCtx, TypOption argT, ApOption argAp, Typ t, Ap ap, ApApprox apa ) { exists(ArgNodeEx arg, boolean allowsFieldFlow | fwdFlow(arg, state, outercc, summaryCtx, argT, argAp, t, ap, apa) and @@ -1471,7 +1470,6 @@ module MakeImpl { ) and flowIntoCallApaInlineLate(call, inner, arg, p, allowsFieldFlow, apa) | - innercc = getCallContextCall(call, inner) and if allowsFieldFlow = false then ap instanceof ApNil else any() ) } @@ -1482,9 +1480,9 @@ module MakeImpl { ParamNodeOption summaryCtx, TypOption argT, ApOption argAp, Typ t, Ap ap, ApApprox apa ) { exists(DataFlowCallable inner, boolean cc | - fwdFlowInCand(call, inner, p, state, outercc, innercc, summaryCtx, argT, argAp, t, ap, - apa) and + fwdFlowInCand(call, inner, p, state, outercc, summaryCtx, argT, argAp, t, ap, apa) and FwdTypeFlow::typeFlowValidEdgeIn(call, inner, cc) and + innercc = getCallContextCall(call, inner) and if outercc instanceof CcCall then cc = true else cc = false ) } From 74787bfba88b0c1645cc2f6e43ca13b9467d2613 Mon Sep 17 00:00:00 2001 From: Anders Schack-Mulligen Date: Wed, 13 Sep 2023 13:10:15 +0200 Subject: [PATCH 13/15] Dataflow: One more minor perf tweak. --- shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll index 4460bba4646e..7d9ef3e2404e 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll @@ -1517,7 +1517,7 @@ module MakeImpl { pragma[nomagic] private predicate fwdFlowOutCand( - DataFlowCall call, DataFlowCallable inner, NodeEx out, FlowState state, CcNoCall outercc, + DataFlowCall call, DataFlowCallable inner, NodeEx out, FlowState state, ParamNodeOption summaryCtx, TypOption argT, ApOption argAp, Typ t, Ap ap, ApApprox apa ) { exists(RetNodeEx ret, boolean allowsFieldFlow, CcNoCall innercc | @@ -1531,7 +1531,6 @@ module MakeImpl { innercc) ) | - outercc = getCallContextReturn(inner, call) and if allowsFieldFlow = false then ap instanceof ApNil else any() ) } @@ -1540,8 +1539,9 @@ module MakeImpl { DataFlowCall call, DataFlowCallable inner, NodeEx out, FlowState state, CcNoCall outercc, ParamNodeOption summaryCtx, TypOption argT, ApOption argAp, Typ t, Ap ap, ApApprox apa ) { - fwdFlowOutCand(call, inner, out, state, outercc, summaryCtx, argT, argAp, t, ap, apa) and - FwdTypeFlow::typeFlowValidEdgeOut(call, inner) + fwdFlowOutCand(call, inner, out, state, summaryCtx, argT, argAp, t, ap, apa) and + FwdTypeFlow::typeFlowValidEdgeOut(call, inner) and + outercc = getCallContextReturn(inner, call) } private module FwdTypeFlowInput implements TypeFlowInput { From b13d02643469b6d965c2a669c57fc64978eb4aa5 Mon Sep 17 00:00:00 2001 From: Anders Schack-Mulligen Date: Mon, 18 Sep 2023 13:15:26 +0200 Subject: [PATCH 14/15] Dataflow: Review fixes. --- .../dataflow/internal/DataFlowPrivate.qll | 6 ++- .../codeql/dataflow/internal/DataFlowImpl.qll | 45 +++++++++---------- .../dataflow/internal/DataFlowImplCommon.qll | 30 ++++++++++--- 3 files changed, 49 insertions(+), 32 deletions(-) diff --git a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowPrivate.qll b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowPrivate.qll index c1c851d18fd3..b77bb173135d 100644 --- a/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowPrivate.qll +++ b/csharp/ql/lib/semmle/code/csharp/dataflow/internal/DataFlowPrivate.qll @@ -546,7 +546,11 @@ module LocalFlow { ) or hasNodePath(any(LocalExprStepConfiguration x), node1, node2) and - (node2 instanceof SsaDefinitionExtNode or node2.asExpr() instanceof Cast) + ( + node2 instanceof SsaDefinitionExtNode or + node2.asExpr() instanceof Cast or + node2.asExpr() instanceof AssignExpr + ) or exists(SsaImpl::Definition def | def = getSsaDefinitionExt(node1) and diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll index 7d9ef3e2404e..3ea0df260591 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll @@ -1316,7 +1316,7 @@ module MakeImpl { ) or // flow into a callable - fwdFlowIn(_, node, state, _, cc, _, _, _, t, ap, apa) and + fwdFlowIn(_, _, node, state, _, cc, _, _, _, t, ap, apa, _) and if PrevStage::parameterMayFlowThrough(node, apa) then ( summaryCtx = TParamNodeSome(node.asNode()) and @@ -1476,15 +1476,14 @@ module MakeImpl { pragma[nomagic] private predicate fwdFlowIn( - DataFlowCall call, ParamNodeEx p, FlowState state, Cc outercc, CcCall innercc, - ParamNodeOption summaryCtx, TypOption argT, ApOption argAp, Typ t, Ap ap, ApApprox apa + DataFlowCall call, DataFlowCallable inner, ParamNodeEx p, FlowState state, Cc outercc, + CcCall innercc, ParamNodeOption summaryCtx, TypOption argT, ApOption argAp, Typ t, Ap ap, + ApApprox apa, boolean cc ) { - exists(DataFlowCallable inner, boolean cc | - fwdFlowInCand(call, inner, p, state, outercc, summaryCtx, argT, argAp, t, ap, apa) and - FwdTypeFlow::typeFlowValidEdgeIn(call, inner, cc) and - innercc = getCallContextCall(call, inner) and - if outercc instanceof CcCall then cc = true else cc = false - ) + fwdFlowInCand(call, inner, p, state, outercc, summaryCtx, argT, argAp, t, ap, apa) and + FwdTypeFlow::typeFlowValidEdgeIn(call, inner, cc) and + innercc = getCallContextCall(call, inner) and + if outercc instanceof CcCall then cc = true else cc = false } bindingset[ctx, result] @@ -1535,6 +1534,7 @@ module MakeImpl { ) } + pragma[nomagic] private predicate fwdFlowOut( DataFlowCall call, DataFlowCallable inner, NodeEx out, FlowState state, CcNoCall outercc, ParamNodeOption summaryCtx, TypOption argT, ApOption argAp, Typ t, Ap ap, ApApprox apa @@ -1557,11 +1557,9 @@ module MakeImpl { pragma[nomagic] predicate dataFlowTakenCallEdgeIn(DataFlowCall call, DataFlowCallable c, boolean cc) { - exists(ParamNodeEx p, Cc outercc, FlowState state, Cc innercc, Typ t, Ap ap | - fwdFlowIn(call, p, state, outercc, innercc, _, _, _, t, ap, _) and - fwdFlow1(p, state, innercc, _, _, _, t, _, ap, _) and - c = p.getEnclosingCallable() and - if outercc instanceof CcCall then cc = true else cc = false + exists(ParamNodeEx p, FlowState state, Cc innercc, Typ t, Ap ap | + fwdFlowIn(call, c, p, state, _, innercc, _, _, _, t, ap, _, cc) and + fwdFlow1(p, state, innercc, _, _, _, t, _, ap, _) ) } @@ -1647,8 +1645,8 @@ module MakeImpl { ApOption argAp, ParamNodeEx p, Typ t, Ap ap ) { exists(ApApprox apa | - fwdFlowIn(call, pragma[only_bind_into](p), _, cc, innerCc, summaryCtx, argT, argAp, t, - ap, pragma[only_bind_into](apa)) and + fwdFlowIn(call, _, pragma[only_bind_into](p), _, cc, innerCc, summaryCtx, argT, argAp, + t, ap, pragma[only_bind_into](apa), _) and PrevStage::parameterMayFlowThrough(p, apa) and PrevStage::callMayFlowThroughRev(call) ) @@ -1802,7 +1800,7 @@ module MakeImpl { or // flow out of a callable exists(ReturnPosition pos | - revFlowOut(_, node, pos, state, _, _, ap) and + revFlowOut(_, node, pos, state, _, _, _, ap) and if returnFlowsThrough(node, pos, state, _, _, _, _, ap) then ( returnCtx = TReturnCtxMaybeFlowThrough(pos) and @@ -1867,10 +1865,9 @@ module MakeImpl { pragma[nomagic] predicate dataFlowTakenCallEdgeIn(DataFlowCall call, DataFlowCallable c, boolean cc) { - exists(RetNodeEx ret, ReturnCtx returnCtx | - revFlowOut(call, ret, _, _, returnCtx, _, _) and - c = ret.getEnclosingCallable() and - if returnCtx instanceof TReturnCtxNone then cc = false else cc = true + exists(RetNodeEx ret | + revFlowOut(call, ret, _, _, _, cc, _, _) and + c = ret.getEnclosingCallable() ) } @@ -1925,9 +1922,9 @@ module MakeImpl { pragma[nomagic] private predicate revFlowOut( DataFlowCall call, RetNodeEx ret, ReturnPosition pos, FlowState state, - ReturnCtx returnCtx, ApOption returnAp, Ap ap + ReturnCtx returnCtx, boolean cc, ApOption returnAp, Ap ap ) { - exists(NodeEx out, boolean cc | + exists(NodeEx out | revFlow(out, state, returnCtx, returnAp, ap) and flowOutOfCallApValid(call, ret, pos, out, ap, cc) and if returnCtx instanceof TReturnCtxNone then cc = false else cc = true @@ -1963,7 +1960,7 @@ module MakeImpl { DataFlowCall call, ReturnCtx returnCtx, ApOption returnAp, ReturnPosition pos, Ap ap ) { exists(RetNodeEx ret, FlowState state, CcCall ccc | - revFlowOut(call, ret, pos, state, returnCtx, returnAp, ap) and + revFlowOut(call, ret, pos, state, returnCtx, _, returnAp, ap) and returnFlowsThrough(ret, pos, state, ccc, _, _, _, ap) and matchesCall(ccc, call) ) diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImplCommon.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImplCommon.qll index 14508eb89eea..62cfc3051a45 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImplCommon.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImplCommon.qll @@ -1002,7 +1002,7 @@ module MakeImplCommon { * flow between parameter and argument nodes in the cases where it is possible * for a type to first be weakened and then strengthened again. When the * stronger types at the end-points of such a type flow path are incompatible, - * the call relevant call edges can be excluded as impossible. + * the relevant call edges can be excluded as impossible. * * The predicates `relevantCallEdgeIn` and `relevantCallEdgeOut` give the * graph to be explored prior to the recursion, and the other three predicates @@ -1021,7 +1021,7 @@ module MakeImplCommon { } /** - * Holds if a sequence calls may propagates the value of `p` to some + * Holds if a sequence of calls may propagate the value of `p` to some * argument-to-parameter call edge that strengthens the static type. */ pragma[nomagic] @@ -1033,7 +1033,7 @@ module MakeImplCommon { } /** - * Holds if a sequence calls may propagates the value of `arg` to some + * Holds if a sequence of calls may propagate the value of `arg` to some * argument-to-parameter call edge that strengthens the static type. */ pragma[nomagic] @@ -1048,6 +1048,9 @@ module MakeImplCommon { ) or exists(ParamNode p, DataFlowType at, DataFlowType pt | + // A call edge may implicitly strengthen a type by ensuring that a + // specific argument node was reached if the type of that argument was + // strengthened via a cast. at = getNodeType(arg) and pt = getNodeType(p) and paramMustFlow(p, arg) and @@ -1072,6 +1075,10 @@ module MakeImplCommon { DataFlowCall call1, DataFlowCallable c1, ArgNode argOut, DataFlowCall call2, DataFlowCallable c2, ArgNode argIn | + // Data flow may exit `call1` and enter `call2`. If a stronger type is + // known for `argOut`, `argIn` may reach a strengthening, and both are + // determined by the same parameter `p` so we know they're equal, then + // we should track those nodes. trackedParamTypeCand(p) and callEdge(call1, c1, argOut, _) and Input::relevantCallEdgeOut(call1, c1) and @@ -1152,6 +1159,15 @@ module MakeImplCommon { paramMustFlow(_, arg) } + /** + * Gets the strongest of the two types `t1` and `t2`. If neither type is + * stronger then compatibility is checked and `t1` is returned. + */ + bindingset[t1, t2] + DataFlowType getStrongestType(DataFlowType t1, DataFlowType t2) { + if typeStrongerThan(t2, t1) then result = t2 else (compatibleTypes(t1, t2) and result = t1) + } + /** * Holds if `t` is a possible type for an argument reaching the tracked * parameter `p` through an in-going edge in the current data flow stage. @@ -1179,7 +1195,7 @@ module MakeImplCommon { cc = true and typeFlowParamTypeCand(p, t1) and nodeDataFlowType(p, t2) and - if typeStrongerThan(t2, t1) then t = t2 else (compatibleTypes(t1, t2) and t = t1) + t = getStrongestType(t1, t2) ) or exists(ArgNode arg, DataFlowType t1, DataFlowType t2 | @@ -1187,7 +1203,7 @@ module MakeImplCommon { typeFlowArgTypeFromReturn(arg, t1) and paramMustFlow(p, arg) and nodeDataFlowType(p, t2) and - if typeStrongerThan(t2, t1) then t = t2 else (compatibleTypes(t1, t2) and t = t1) + t = getStrongestType(t1, t2) ) or exists(DataFlowCall call | @@ -1208,7 +1224,7 @@ module MakeImplCommon { dataFlowTakenCallEdgeOut(_, _, arg, p) and (if trackedParamType(p) then typeFlowParamType(p, t1, false) else nodeDataFlowType(p, t1)) and nodeDataFlowType(arg, t2) and - if typeStrongerThan(t2, t1) then t = t2 else (compatibleTypes(t1, t2) and t = t1) + t = getStrongestType(t1, t2) ) } @@ -1224,7 +1240,7 @@ module MakeImplCommon { paramMustFlow(p, arg) and typeFlowParamType(p, t1, cc) and nodeDataFlowType(arg, t2) and - if typeStrongerThan(t2, t1) then t = t2 else (compatibleTypes(t1, t2) and t = t1) + t = getStrongestType(t1, t2) ) or cc = [true, false] and From 42054539f4e5b89af7f6e588481f557affb9da92 Mon Sep 17 00:00:00 2001 From: Anders Schack-Mulligen Date: Tue, 19 Sep 2023 12:12:15 +0200 Subject: [PATCH 15/15] Dataflow: Minor review fixes. --- shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll index 3ea0df260591..e4182a32a4bf 100644 --- a/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll +++ b/shared/dataflow/codeql/dataflow/internal/DataFlowImpl.qll @@ -1725,6 +1725,7 @@ module MakeImpl { pos = ret.getReturnPosition() and if allowsFieldFlow = false then ap instanceof ApNil else any() | + // both directions are needed for flow-through FwdTypeFlowInput::dataFlowTakenCallEdgeIn(call, c, _) or FwdTypeFlowInput::dataFlowTakenCallEdgeOut(call, c) ) @@ -1894,6 +1895,7 @@ module MakeImpl { private module RevTypeFlow = TypeFlow; + pragma[nomagic] private predicate flowIntoCallApValid( DataFlowCall call, DataFlowCallable c, ArgNodeEx arg, ParamNodeEx p, Ap ap ) { @@ -1901,6 +1903,7 @@ module MakeImpl { RevTypeFlow::typeFlowValidEdgeOut(call, c) } + pragma[nomagic] private predicate flowOutOfCallApValid( DataFlowCall call, RetNodeEx ret, ReturnPosition pos, NodeEx out, Ap ap, boolean cc ) { @@ -2071,8 +2074,12 @@ module MakeImpl { flowIntoCallAp(call, c, arg, p, ap) and revFlow(arg, pragma[only_bind_into](state), pragma[only_bind_into](ap)) and revFlow(p, pragma[only_bind_into](state), pragma[only_bind_into](ap)) and + // allowsFieldFlow has already been checked in flowIntoCallAp, since + // `Ap` is at least as precise as a boolean from Stage 2 and + // forward, so no need to check it again later. allowsFieldFlow = true | + // both directions are needed for flow-through RevTypeFlowInput::dataFlowTakenCallEdgeIn(call, c, _) or RevTypeFlowInput::dataFlowTakenCallEdgeOut(call, c) )