diff --git a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp index 109faacf1726a..e989f61f258b1 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ArrayBoundChecker.cpp @@ -34,24 +34,37 @@ using namespace taint; using llvm::formatv; namespace { -/// If `E` is a "clean" array subscript expression, return the type of the -/// accessed element. If the base of the subscript expression is modified by -/// pointer arithmetic (and not the beginning of a "full" memory region), this -/// always returns nullopt because that's the right (or the least bad) thing to -/// do for the diagnostic output that's relying on this. -static std::optional determineElementType(const Expr *E, - const CheckerContext &C) { +/// If `E` is an array subscript expression with a base that is "clean" (= not +/// modified by pointer arithmetic = the beginning of a memory region), return +/// it as a pointer to ArraySubscriptExpr; otherwise return nullptr. +/// This helper function is used by two separate heuristics that are only valid +/// in these "clean" cases. +static const ArraySubscriptExpr * +getAsCleanArraySubscriptExpr(const Expr *E, const CheckerContext &C) { const auto *ASE = dyn_cast(E); if (!ASE) - return std::nullopt; + return nullptr; const MemRegion *SubscriptBaseReg = C.getSVal(ASE->getBase()).getAsRegion(); if (!SubscriptBaseReg) - return std::nullopt; + return nullptr; // The base of the subscript expression is affected by pointer arithmetics, - // so we want to report byte offsets instead of indices. + // so we want to report byte offsets instead of indices and we don't want to + // activate the "index is unsigned -> cannot be negative" shortcut. if (isa(SubscriptBaseReg->StripCasts())) + return nullptr; + + return ASE; +} + +/// If `E` is a "clean" array subscript expression, return the type of the +/// accessed element; otherwise return std::nullopt because that's the best (or +/// least bad) option for the diagnostic generation that relies on this. +static std::optional determineElementType(const Expr *E, + const CheckerContext &C) { + const auto *ASE = getAsCleanArraySubscriptExpr(E, C); + if (!ASE) return std::nullopt; return ASE->getType(); @@ -140,7 +153,9 @@ class ArrayBoundChecker : public Checker, ProgramStateRef ErrorState, NonLoc Val, bool MarkTaint); - static bool isFromCtypeMacro(const Stmt *S, ASTContext &AC); + static bool isFromCtypeMacro(const Expr *E, ASTContext &AC); + + static bool isOffsetObviouslyNonnegative(const Expr *E, CheckerContext &C); static bool isIdiomaticPastTheEndPtr(const Expr *E, ProgramStateRef State, NonLoc Offset, NonLoc Limit, @@ -588,20 +603,48 @@ void ArrayBoundChecker::performCheck(const Expr *E, CheckerContext &C) const { State, ByteOffset, SVB.makeZeroArrayIndex(), SVB); if (PrecedesLowerBound) { - // The offset may be invalid (negative)... - if (!WithinLowerBound) { - // ...and it cannot be valid (>= 0), so report an error. - Messages Msgs = getPrecedesMsgs(Reg, ByteOffset); - reportOOB(C, PrecedesLowerBound, Msgs, ByteOffset, std::nullopt); - return; + // The analyzer thinks that the offset may be invalid (negative)... + + if (isOffsetObviouslyNonnegative(E, C)) { + // ...but the offset is obviously non-negative (clear array subscript + // with an unsigned index), so we're in a buggy situation. + + // TODO: Currently the analyzer ignores many casts (e.g. signed -> + // unsigned casts), so it can easily reach states where it will load a + // signed (and negative) value from an unsigned variable. This sanity + // check is a duct tape "solution" that silences most of the ugly false + // positives that are caused by this buggy behavior. Note that this is + // not a complete solution: this cannot silence reports where pointer + // arithmetic complicates the picture and cannot ensure modeling of the + // "unsigned index is positive with highest bit set" cases which are + // "usurped" by the nonsense "unsigned index is negative" case. + // For more information about this topic, see the umbrella ticket + // https://github.com/llvm/llvm-project/issues/39492 + // TODO: Remove this hack once 'SymbolCast's are modeled properly. + + if (!WithinLowerBound) { + // The state is completely nonsense -- let's just sink it! + C.addSink(); + return; + } + // Otherwise continue on the 'WithinLowerBound' branch where the + // unsigned index _is_ non-negative. Don't mention this assumption as a + // note tag, because it would just confuse the users! + } else { + if (!WithinLowerBound) { + // ...and it cannot be valid (>= 0), so report an error. + Messages Msgs = getPrecedesMsgs(Reg, ByteOffset); + reportOOB(C, PrecedesLowerBound, Msgs, ByteOffset, std::nullopt); + return; + } + // ...but it can be valid as well, so the checker will (optimistically) + // assume that it's valid and mention this in the note tag. + SUR.recordNonNegativeAssumption(); } - // ...but it can be valid as well, so the checker will (optimistically) - // assume that it's valid and mention this in the note tag. - SUR.recordNonNegativeAssumption(); } // Actually update the state. The "if" only fails in the extremely unlikely - // case when compareValueToThreshold returns {nullptr, nullptr} becasue + // case when compareValueToThreshold returns {nullptr, nullptr} because // evalBinOpNN fails to evaluate the less-than operator. if (WithinLowerBound) State = WithinLowerBound; @@ -661,7 +704,7 @@ void ArrayBoundChecker::performCheck(const Expr *E, CheckerContext &C) const { } // Actually update the state. The "if" only fails in the extremely unlikely - // case when compareValueToThreshold returns {nullptr, nullptr} becasue + // case when compareValueToThreshold returns {nullptr, nullptr} because // evalBinOpNN fails to evaluate the less-than operator. if (WithinUpperBound) State = WithinUpperBound; @@ -726,8 +769,8 @@ void ArrayBoundChecker::reportOOB(CheckerContext &C, ProgramStateRef ErrorState, C.emitReport(std::move(BR)); } -bool ArrayBoundChecker::isFromCtypeMacro(const Stmt *S, ASTContext &ACtx) { - SourceLocation Loc = S->getBeginLoc(); +bool ArrayBoundChecker::isFromCtypeMacro(const Expr *E, ASTContext &ACtx) { + SourceLocation Loc = E->getBeginLoc(); if (!Loc.isMacroID()) return false; @@ -745,6 +788,14 @@ bool ArrayBoundChecker::isFromCtypeMacro(const Stmt *S, ASTContext &ACtx) { (MacroName == "isupper") || (MacroName == "isxdigit")); } +bool ArrayBoundChecker::isOffsetObviouslyNonnegative(const Expr *E, + CheckerContext &C) { + const ArraySubscriptExpr *ASE = getAsCleanArraySubscriptExpr(E, C); + if (!ASE) + return false; + return ASE->getIdx()->getType()->isUnsignedIntegerOrEnumerationType(); +} + bool ArrayBoundChecker::isInAddressOf(const Stmt *S, ASTContext &ACtx) { ParentMapContext &ParentCtx = ACtx.getParentMapContext(); do { diff --git a/clang/test/Analysis/out-of-bounds.c b/clang/test/Analysis/out-of-bounds.c index 7a094b8fdc840..7d6cb4ecf1b24 100644 --- a/clang/test/Analysis/out-of-bounds.c +++ b/clang/test/Analysis/out-of-bounds.c @@ -188,29 +188,31 @@ int test_cast_to_unsigned(signed char x) { if (x >= 0) return x; // FIXME: Here the analyzer ignores the signed -> unsigned cast, and manages to - // load a negative value from an unsigned variable. This causes an underflow - // report, which is an ugly false positive. + // load a negative value from an unsigned variable. // The underlying issue is tracked by Github ticket #39492. clang_analyzer_value(y); // expected-warning {{8s:{ [-128, -1] } }} - return table[y]; // expected-warning {{Out of bound access to memory preceding}} + // However, a hack in the ArrayBound checker suppresses the false positive + // underflow report that would be generated here. + return table[y]; // no-warning } int test_cast_to_unsigned_overflow(signed char x) { unsigned char y = x; if (x >= 0) return x; - // A variant of 'test_cast_to_unsigned' where the correct behavior would be - // an overflow report (because the negative values are cast to `unsigned - // char` values that are too large). - // FIXME: See comment in 'test_cast_to_unsigned'. + // FIXME: As in 'test_cast_to_unsigned', the analyzer thinks that this + // unsigned variable contains a negative value. clang_analyzer_value(y); // expected-warning {{8s:{ [-128, -1] } }} - return small_table[y]; // expected-warning {{Out of bound access to memory preceding}} + // FIXME: The following subscript expression should produce an overflow + // report (because negative signed char corresponds to unsigned char >= 128); + // but the hack in ArrayBound just silences reports and cannot "restore" the + // real execution paths. + return small_table[y]; // no-warning } int test_negative_offset_with_unsigned_idx(void) { // An example where the subscript operator uses an unsigned index, but the - // underflow report is still justified. (We should try to keep this if we - // silence false positives like the one in 'test_cast_to_unsigned'.) + // underflow report is still justified. int *p = table - 10; unsigned idx = 2u; return p[idx]; // expected-warning {{Out of bound access to memory preceding}}