diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h index 04eacd1df7ffe2..2625e94d07d4a8 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h @@ -121,6 +121,34 @@ struct EvalCallOptions { EvalCallOptions() {} }; +/// Simple control flow statements like `if` can only produce a single two-way +/// state split, so when the analyzer cannot determine the value of the +/// condition, it can assume either of the two options, because the fact that +/// they are in the source code implies that the programmer thought that they +/// are possible (at least under some conditions). +/// (Note that this heuristic is not entirely correct when there are _several_ +/// `if` statements with unmarked logical connections between them, but it's +/// still good enough and the analyzer heavily relies on it.) +/// In contrast with this, a single loop statement can produce multiple state +/// splits, and we cannot always single out safe assumptions where we can say +/// that "the programmer included this loop in the source code, so they clearly +/// thought that this execution path is possible". +/// However, the analyzer wants to explore the code in and after the loop, so +/// it makes assumptions about the loop condition (to get a concrete execution +/// path) even when they are not justified. +/// This function is called by the engine to mark the `State` when it makes an +/// assumption which is "weak". Checkers may use this heuristical mark to +/// discard the result and reduce the amount of false positives. +/// TODO: Instead of just marking these branches for checker-specific handling, +/// we could discard them completely. I suspect that this could eliminate some +/// false positives without suppressing too many true positives, but I didn't +/// have time to measure its effects. +ProgramStateRef recordWeakLoopAssumption(ProgramStateRef State); + +/// Returns true if `recordWeakLoopAssumption()` was called on the execution +/// path which produced `State`. +bool seenWeakLoopAssumption(ProgramStateRef State); + class ExprEngine { void anchor(); @@ -322,13 +350,14 @@ class ExprEngine { ExplodedNode *Pred); /// ProcessBranch - Called by CoreEngine. Used to generate successor - /// nodes by processing the 'effects' of a branch condition. - void processBranch(const Stmt *Condition, - NodeBuilderContext& BuilderCtx, - ExplodedNode *Pred, - ExplodedNodeSet &Dst, - const CFGBlock *DstT, - const CFGBlock *DstF); + /// nodes by processing the 'effects' of a branch condition. + /// If the branch condition is a loop condition, IterationsFinishedInLoop is + /// the number of already finished iterations (0, 1, 2, ...); otherwise it's + /// std::nullopt. + void processBranch(const Stmt *Condition, NodeBuilderContext &BuilderCtx, + ExplodedNode *Pred, ExplodedNodeSet &Dst, + const CFGBlock *DstT, const CFGBlock *DstF, + std::optional IterationsFinishedInLoop); /// Called by CoreEngine. /// Used to generate successor nodes for temporary destructors depending @@ -583,11 +612,11 @@ class ExprEngine { ExplodedNode *Pred, ExplodedNodeSet &Dst); - /// evalEagerlyAssumeBinOpBifurcation - Given the nodes in 'Src', eagerly assume symbolic - /// expressions of the form 'x != 0' and generate new nodes (stored in Dst) - /// with those assumptions. - void evalEagerlyAssumeBinOpBifurcation(ExplodedNodeSet &Dst, ExplodedNodeSet &Src, - const Expr *Ex); + /// evalEagerlyAssumeOpBifurcation - Given the nodes in 'Src', eagerly assume + /// symbolic expressions of the form 'x != 0' or '!x' and generate new nodes + /// (stored in Dst) with those assumptions. + void evalEagerlyAssumeOpBifurcation(ExplodedNodeSet &Dst, + ExplodedNodeSet &Src, const Expr *Ex); static std::pair geteagerlyAssumeBinOpBifurcationTags(); diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp index 3f837564cf47c4..da9ae1c749a3db 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp @@ -697,6 +697,11 @@ void ArrayBoundCheckerV2::reportOOB(CheckerContext &C, ProgramStateRef ErrorState, Messages Msgs, NonLoc Offset, std::optional Extent, bool IsTaintBug /*=false*/) const { + // Suppress results found through execution paths where in some loop the + // analyzer arbitrarily assumed either that the loop is skipped (0 iterations) + // or that 3 or more iterations are executed. + if (seenWeakLoopAssumption(ErrorState)) + return; ExplodedNode *ErrorNode = C.generateErrorNode(ErrorState); if (!ErrorNode) diff --git a/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp b/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp index 8605fa149e4f52..54a6f87dcc8be5 100644 --- a/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/CoreEngine.cpp @@ -441,10 +441,32 @@ void CoreEngine::HandleCallEnter(const CallEnter &CE, ExplodedNode *Pred) { void CoreEngine::HandleBranch(const Stmt *Cond, const Stmt *Term, const CFGBlock * B, ExplodedNode *Pred) { assert(B->succ_size() == 2); + + const LocationContext *LC = Pred->getLocationContext(); + BlockCounter Counter = WList->getBlockCounter(); + unsigned BlockCount = + Counter.getNumVisited(LC->getStackFrame(), B->getBlockID()); + std::optional IterationsFinishedInLoop = std::nullopt; + if (isa(Term)) { + // FIXME: This code approximates the number of finished iterations based on + // the block count, i.e. the number of evaluations of the terminator block + // on the current execution path (which includes the current evaluation, so + // is always >= 1). This is probably acceptable for the checker-specific + // false positive suppression that currently uses this value, but it would + // be better to calcuate an accurate count of iterations. + assert(BlockCount >= 1); + IterationsFinishedInLoop = BlockCount - 1; + } else if (isa(Term)) { + // FIXME: The fixme note in the previous branch also applies here. + // In a do-while loop one iteration happens before the first evaluation of + // the loop condition, so we don't subtract one from the block count. + IterationsFinishedInLoop = BlockCount; + } + NodeBuilderContext Ctx(*this, B, Pred); ExplodedNodeSet Dst; ExprEng.processBranch(Cond, Ctx, Pred, Dst, *(B->succ_begin()), - *(B->succ_begin() + 1)); + *(B->succ_begin() + 1), IterationsFinishedInLoop); // Enqueue the new frontier onto the worklist. enqueue(Dst); } diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index fdabba46992b08..cdd557c50a0126 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -212,6 +212,24 @@ typedef llvm::ImmutableMap REGISTER_TRAIT_WITH_PROGRAMSTATE(PendingArrayDestruction, PendingArrayDestructionMap) +// This trait is used to heuristically filter out results produced from +// execution paths that took "weak" assumptions within a loop. +REGISTER_TRAIT_WITH_PROGRAMSTATE(SeenWeakLoopAssumption, bool) + +ProgramStateRef clang::ento::recordWeakLoopAssumption(ProgramStateRef State) { + return State->set(true); +} + +bool clang::ento::seenWeakLoopAssumption(ProgramStateRef State) { + return State->get(); +} + +// This trait points to the last expression (logical operator) where an eager +// assumption introduced a state split (i.e. both cases were feasible). This is +// used by the WeakLoopAssumption heuristic to find situations where an eager +// assumption introduces a state split in the evaluation of a loop condition. +REGISTER_TRAIT_WITH_PROGRAMSTATE(LastEagerlyAssumeAssumptionAt, const Expr *) + //===----------------------------------------------------------------------===// // Engine construction and deletion. //===----------------------------------------------------------------------===// @@ -2128,7 +2146,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred, (B->isRelationalOp() || B->isEqualityOp())) { ExplodedNodeSet Tmp; VisitBinaryOperator(cast(S), Pred, Tmp); - evalEagerlyAssumeBinOpBifurcation(Dst, Tmp, cast(S)); + evalEagerlyAssumeOpBifurcation(Dst, Tmp, cast(S)); } else VisitBinaryOperator(cast(S), Pred, Dst); @@ -2401,7 +2419,7 @@ void ExprEngine::Visit(const Stmt *S, ExplodedNode *Pred, if (AMgr.options.ShouldEagerlyAssume && (U->getOpcode() == UO_LNot)) { ExplodedNodeSet Tmp; VisitUnaryOperator(U, Pred, Tmp); - evalEagerlyAssumeBinOpBifurcation(Dst, Tmp, U); + evalEagerlyAssumeOpBifurcation(Dst, Tmp, U); } else VisitUnaryOperator(U, Pred, Dst); @@ -2761,12 +2779,10 @@ assumeCondition(const Stmt *Condition, ExplodedNode *N) { return State->assume(V); } -void ExprEngine::processBranch(const Stmt *Condition, - NodeBuilderContext& BldCtx, - ExplodedNode *Pred, - ExplodedNodeSet &Dst, - const CFGBlock *DstT, - const CFGBlock *DstF) { +void ExprEngine::processBranch( + const Stmt *Condition, NodeBuilderContext &BldCtx, ExplodedNode *Pred, + ExplodedNodeSet &Dst, const CFGBlock *DstT, const CFGBlock *DstF, + std::optional IterationsFinishedInLoop) { assert((!Condition || !isa(Condition)) && "CXXBindTemporaryExprs are handled by processBindTemporary."); const LocationContext *LCtx = Pred->getLocationContext(); @@ -2804,31 +2820,59 @@ void ExprEngine::processBranch(const Stmt *Condition, ProgramStateRef PrevState = PredN->getState(); ProgramStateRef StTrue, StFalse; - if (const auto KnownCondValueAssumption = assumeCondition(Condition, PredN)) + StTrue = StFalse = PrevState; + + if (const auto KnownCondValueAssumption = + assumeCondition(Condition, PredN)) { std::tie(StTrue, StFalse) = *KnownCondValueAssumption; - else { - assert(!isa(Condition)); - builder.generateNode(PrevState, true, PredN); - builder.generateNode(PrevState, false, PredN); - continue; + + if (!StTrue) + builder.markInfeasible(true); + + if (!StFalse) + builder.markInfeasible(false); } - if (StTrue && StFalse) + bool BothFeasible = builder.isFeasible(true) && builder.isFeasible(false); + + if (BothFeasible) assert(!isa(Condition)); + const Expr *EagerlyAssumeExpr = + PrevState->get(); + bool DidEagerlyAssume = EagerlyAssumeExpr == dyn_cast(Condition); + // Process the true branch. if (builder.isFeasible(true)) { - if (StTrue) - builder.generateNode(StTrue, true, PredN); - else - builder.markInfeasible(true); + if ((BothFeasible || DidEagerlyAssume) && IterationsFinishedInLoop && + *IterationsFinishedInLoop >= 2) { + // When programmers write a loop, they imply that at least two + // iterations are possible (otherwise they would just write an `if`), + // but the third iteration is not implied: there are situations where + // the programmer knows that there won't be a third iteration, but + // this is not marked in the source code. (For example, the ffmpeg + // project has 2-element arrays which are accessed from loops where + // the number of steps is opaque and the analyzer cannot deduce that + // there are <= 2 iterations.) + // Checkers may use this heuristic mark to discard results found on + // branches that contain this "weak" assumption. + StTrue = recordWeakLoopAssumption(StTrue); + } + builder.generateNode(StTrue, true, PredN); } // Process the false branch. if (builder.isFeasible(false)) { - if (StFalse) - builder.generateNode(StFalse, false, PredN); - else - builder.markInfeasible(false); + if ((BothFeasible || DidEagerlyAssume) && IterationsFinishedInLoop && + *IterationsFinishedInLoop == 0) { + // There are many situations where the programmers know that there + // will be at least one iteration in a loop (e.g. a structure is not + // empty) but the analyzer cannot deduce this and reports false + // positives after skipping the loop. + // Checkers may use this heuristic mark to discard results found on + // branches that contain this "weak" assumption. + StFalse = recordWeakLoopAssumption(StFalse); + } + builder.generateNode(StFalse, false, PredN); } } currBldrCtx = nullptr; @@ -3752,9 +3796,9 @@ ExprEngine::geteagerlyAssumeBinOpBifurcationTags() { &eagerlyAssumeBinOpBifurcationFalse); } -void ExprEngine::evalEagerlyAssumeBinOpBifurcation(ExplodedNodeSet &Dst, - ExplodedNodeSet &Src, - const Expr *Ex) { +void ExprEngine::evalEagerlyAssumeOpBifurcation(ExplodedNodeSet &Dst, + ExplodedNodeSet &Src, + const Expr *Ex) { StmtNodeBuilder Bldr(Src, Dst, *currBldrCtx); for (const auto Pred : Src) { @@ -3776,6 +3820,11 @@ void ExprEngine::evalEagerlyAssumeBinOpBifurcation(ExplodedNodeSet &Dst, ProgramStateRef StateTrue, StateFalse; std::tie(StateTrue, StateFalse) = state->assume(*SEV); + if (StateTrue && StateFalse) { + StateTrue = StateTrue->set(Ex); + StateFalse = StateFalse->set(Ex); + } + // First assume that the condition is true. if (StateTrue) { SVal Val = svalBuilder.makeIntVal(1U, Ex->getType()); diff --git a/clang/test/Analysis/loop-unrolling.cpp b/clang/test/Analysis/loop-unrolling.cpp index 66a828abfb5133..1d58ba171c0856 100644 --- a/clang/test/Analysis/loop-unrolling.cpp +++ b/clang/test/Analysis/loop-unrolling.cpp @@ -349,7 +349,7 @@ int simple_unknown_bound_loop() { #ifdef DFS clang_analyzer_numTimesReached(); // expected-warning {{16}} #else - clang_analyzer_numTimesReached(); // expected-warning {{8}} + clang_analyzer_numTimesReached(); // expected-warning {{10}} #endif } return 0; @@ -369,9 +369,9 @@ int nested_inlined_no_unroll1() { int k; for (int i = 0; i < 9; i++) { #ifdef DFS - clang_analyzer_numTimesReached(); // expected-warning {{18}} + clang_analyzer_numTimesReached(); // expected-warning {{20}} #else - clang_analyzer_numTimesReached(); // expected-warning {{14}} + clang_analyzer_numTimesReached(); // expected-warning {{18}} #endif k = simple_unknown_bound_loop(); // reevaluation without inlining, splits the state as well } diff --git a/clang/test/Analysis/out-of-bounds.c b/clang/test/Analysis/out-of-bounds.c index 1f771c2b3bd138..fde53d148ba442 100644 --- a/clang/test/Analysis/out-of-bounds.c +++ b/clang/test/Analysis/out-of-bounds.c @@ -1,4 +1,9 @@ -// RUN: %clang_analyze_cc1 -Wno-array-bounds -analyzer-checker=core,alpha.security.ArrayBoundV2,debug.ExprInspection -verify %s +// RUN: %clang_analyze_cc1 -Wno-array-bounds -analyzer-checker=core,alpha.security.ArrayBoundV2,debug.ExprInspection -verify=expected,eagerlyassume %s +// RUN: %clang_analyze_cc1 -Wno-array-bounds -analyzer-checker=core,alpha.security.ArrayBoundV2,debug.ExprInspection -analyzer-config eagerly-assume=false -verify %s + +// Note that eagerly-assume=false is tested separately because the +// WeakLoopAssumption suppression heuristic uses different code paths to +// achieve the same result with and without eagerly-assume. void clang_analyzer_eval(int); @@ -194,3 +199,146 @@ char test_comparison_with_extent_symbol(struct incomplete *p) { return ((char *)p)[-1]; // no-warning } +// WeakLoopAssumption suppression +/////////////////////////////////////////////////////////////////////// + +int GlobalArray[100]; +int loop_suppress_after_zero_iterations(unsigned len) { + for (unsigned i = 0; i < len; i++) + if (GlobalArray[i] > 0) + return GlobalArray[i]; + // Previously this would have produced an overflow warning because splitting + // the state on the loop condition introduced an execution path where the + // analyzer thinks that len == 0. + // There are many situations where the programmer knows that an argument is + // positive, but this is not indicated in the source code, so we must avoid + // reporting errors (especially out of bounds errors) on these branches, + // because otherwise we'd get prohibitively many false positives. + return GlobalArray[len - 1]; // no-warning +} + +void loop_report_in_second_iteration(int len) { + int buf[1] = {0}; + for (int i = 0; i < len; i++) { + // When a programmer writes a loop, we may assume that they intended at + // least two iterations. + buf[i] = 1; // expected-warning{{Out of bound access to memory}} + } +} + +void loop_suppress_in_third_iteration(int len) { + int buf[2] = {0}; + for (int i = 0; i < len; i++) { + // We should suppress array bounds errors on the third and later iterations + // of loops, because sometimes programmers write a loop in sitiuations + // where they know that there will be at most two iterations, but the + // analyzer cannot deduce this property. + buf[i] = 1; // no-warning + } +} + +int no_suppression_when_no_assumption_zero_iterations(unsigned len) { + if (len != 0) { + // This 'if' introduces a state split between len == 0 and len != 0. + } + + // On the branch where we assumed that len is zero, this loop will be + // skipped. We (intentionally) don't suppress this execution path becasue + // here the analyzer doesn't assume anything new when it evaluates the loop + // condition. + for (unsigned i = 0; i < len; i++) + if (GlobalArray[i] > 0) + return GlobalArray[i]; + + return GlobalArray[len - 1]; // expected-warning{{Out of bound access to memory}} +} + +void no_suppression_when_no_assumption_third_iteration(int len) { + if (len < 20) { + // This 'if' introduces a state split with len >= 20 on one branch. + } + + int buf[2] = {0}; + for (int i = 0; i < len; i++) { + // As in no_suppression_when_no_assumption_zero_iterations, the suppression + // only activates when the analyzer assumes something new in the loop + // condition. On the branch where `len >= 20` entering the third iteration + // doesn't involve a new assumption, so this warning is not suppressed: + buf[i] = 1; // expected-warning{{Out of bound access to memory}} + } +} + +void loop_suppress_in_third_iteration_logical_and(int len, int flag) { + int buf[2] = {0}; + for (int i = 0; i < len && flag; i++) { + // FIXME: In this case the checker should suppress the warning the same way + // as it's suppressed in loop_suppress_in_third_iteration, but the + // suppression is not activated because the terminator statement associated + // with the loop is just the expression 'flag', while 'i < len' is a + // separate terminator statement that's associated with the + // short-circuiting operator '&&'. + // I have seen a real-world FP that looks like this, but it is much rarer + // than the basic setup. + buf[i] = 1; // expected-warning{{Out of bound access to memory}} + } +} + +void loop_suppress_in_third_iteration_logical_and_2(int len, int flag) { + int buf[2] = {0}; + for (int i = 0; flag && i < len; i++) { + // If the two operands of '&&' are flipped, the suppression works, because + // then 'flag' is the terminator statement associated with '&&' (which + // determines whether the short-circuiting happens or not) and 'i < len' is + // the terminator statement of the loop itself. + buf[i] = 1; // no-warning + } +} + +void loop_suppress_in_third_iteration_cast(int len) { + int buf[2] = {0}; + for (int i = 0; (unsigned)(i < len); i++) { + // The behavior of this suppression is slightly different under + // eagerly-assume=true (the default) and eagerly-assume=false: + // * When eager assumptions are disabled, it's enough to look for cases + // where we get two non-null states from splitting the state over the + // 'SVal' that represents the full loop condition. + // * When eager assumptions are enabled, we also accept situations when the + // loop condition expression triggers an eager state split and therefore + // we won't see a state split at the "normal" point because it's executed + // on two already separated execution paths. + // However, for the sake of simplicity we don't activate the suppression in + // cases when _a subexpression_ of the loop condition triggers an eager + // assumption. There are already many differences between analysis with and + // without eager assumptions, so it would be pointless to write more + // complicated code to eliminate these rare differences. + buf[i] = 1; // eagerlyassume-warning{{Out of bound access to memory}} + } +} + +int coinflip(void); +int do_while_report_after_one_iteration(void) { + int i = 0; + do { + i++; + } while (coinflip()); + // Unlike `loop_suppress_after_zero_iterations`, running just one iteration + // in a do-while is not a corner case that would produce too many false + // positives, so don't suppress bounds errors in these situations. + return GlobalArray[i-2]; // expected-warning{{Out of bound access to memory}} +} + +void do_while_report_in_second_iteration(int len) { + int buf[1] = {0}; + int i = 0; + do { + buf[i] = 1; // expected-warning{{Out of bound access to memory}} + } while (i++ < len); +} + +void do_while_suppress_in_third_iteration(int len) { + int buf[2] = {0}; + int i = 0; + do { + buf[i] = 1; // no-warning + } while (i++ < len); +}