-
Notifications
You must be signed in to change notification settings - Fork 12.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Wunsafe-buffer-usage] False positives for & expression indexing constant size array (arr[anything & 0]) #112284
[Wunsafe-buffer-usage] False positives for & expression indexing constant size array (arr[anything & 0]) #112284
Conversation
@llvm/pr-subscribers-clang Author: Malavika Samak (malavikasamak) ChangesDo not warn when a constant sized array is indexed with an expression that contains bitwise and operation (rdar://136684050) Full diff: https://github.com/llvm/llvm-project/pull/112284.diff 2 Files Affected:
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 97f1c4f16b8f4c..cdfdcc17536391 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -427,6 +427,45 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) {
// - e. g. "Try harder to find a NamedDecl to point at in the note."
// already duplicated
// - call both from Sema and from here
+ std::function<bool(const Expr *exp, const ConstantArrayType *CATy,
+ unsigned int limit)>
+ SafeMaskedAccess;
+ unsigned int RecLimit = 5;
+ llvm::APInt Max;
+ bool Initialized = false;
+
+ SafeMaskedAccess = [&](const Expr *exp, const ConstantArrayType *CATy,
+ unsigned int RecLimit) -> bool {
+ if (RecLimit == 0)
+ return false;
+
+ RecLimit--;
+
+ if (const auto *IntLit = dyn_cast<IntegerLiteral>(exp)) {
+ const APInt ArrIdx = IntLit->getValue();
+ if (ArrIdx.isNonNegative() &&
+ ArrIdx.getLimitedValue() < CATy->getLimitedSize())
+ return true;
+ if (!Initialized) {
+ Max = ArrIdx;
+ Initialized = true;
+ } else {
+ Max = Max & ArrIdx.getLimitedValue();
+ }
+ if (Max.getLimitedValue() < CATy->getLimitedSize())
+ return true;
+ }
+
+ if (const auto *BinEx = dyn_cast<BinaryOperator>(exp)) {
+ if (SafeMaskedAccess(BinEx->getLHS()->IgnoreParenCasts(), CATy, RecLimit))
+ return true;
+ else if (SafeMaskedAccess(BinEx->getRHS()->IgnoreParenCasts(), CATy,
+ RecLimit))
+ return true;
+ }
+
+ return false;
+ };
const auto *BaseDRE =
dyn_cast<DeclRefExpr>(Node.getBase()->IgnoreParenImpCasts());
@@ -446,6 +485,12 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) {
if (ArrIdx.isNonNegative() &&
ArrIdx.getLimitedValue() < CATy->getLimitedSize())
return true;
+ } else if (const auto *BinEx = dyn_cast<BinaryOperator>(Node.getIdx())) {
+ if (BinEx->getOpcode() != BO_And)
+ return false;
+
+ Max.setAllBits();
+ return SafeMaskedAccess(Node.getIdx(), CATy, RecLimit);
}
return false;
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-array.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-array.cpp
index 8b2f103ec66708..e5a5ca57f66883 100644
--- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-array.cpp
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-array.cpp
@@ -33,6 +33,20 @@ void constant_idx_safe0(unsigned idx) {
buffer[0] = 0;
}
+int array[10]; // expected-warning{{'array' is an unsafe buffer that does not perform bounds checks}}
+
+void masked_idx(unsigned long long idx) {
+ array[idx & 5] = 10; // no warning
+ array[idx & 11 & 5] = 3; // no warning
+ array[idx & 11] = 20; // expected-note{{used in buffer access here}}
+}
+
+int array2[5];
+
+void mased_idx_false() {
+ array2[6 & 5];
+}
+
void constant_idx_unsafe(unsigned idx) {
int buffer[10]; // expected-warning{{'buffer' is an unsafe buffer that does not perform bounds checks}}
// expected-note@-1{{change type of 'buffer' to 'std::array' to label it for hardening}}
|
@llvm/pr-subscribers-clang-analysis Author: Malavika Samak (malavikasamak) ChangesDo not warn when a constant sized array is indexed with an expression that contains bitwise and operation (rdar://136684050) Full diff: https://github.com/llvm/llvm-project/pull/112284.diff 2 Files Affected:
diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp
index 97f1c4f16b8f4c..cdfdcc17536391 100644
--- a/clang/lib/Analysis/UnsafeBufferUsage.cpp
+++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp
@@ -427,6 +427,45 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) {
// - e. g. "Try harder to find a NamedDecl to point at in the note."
// already duplicated
// - call both from Sema and from here
+ std::function<bool(const Expr *exp, const ConstantArrayType *CATy,
+ unsigned int limit)>
+ SafeMaskedAccess;
+ unsigned int RecLimit = 5;
+ llvm::APInt Max;
+ bool Initialized = false;
+
+ SafeMaskedAccess = [&](const Expr *exp, const ConstantArrayType *CATy,
+ unsigned int RecLimit) -> bool {
+ if (RecLimit == 0)
+ return false;
+
+ RecLimit--;
+
+ if (const auto *IntLit = dyn_cast<IntegerLiteral>(exp)) {
+ const APInt ArrIdx = IntLit->getValue();
+ if (ArrIdx.isNonNegative() &&
+ ArrIdx.getLimitedValue() < CATy->getLimitedSize())
+ return true;
+ if (!Initialized) {
+ Max = ArrIdx;
+ Initialized = true;
+ } else {
+ Max = Max & ArrIdx.getLimitedValue();
+ }
+ if (Max.getLimitedValue() < CATy->getLimitedSize())
+ return true;
+ }
+
+ if (const auto *BinEx = dyn_cast<BinaryOperator>(exp)) {
+ if (SafeMaskedAccess(BinEx->getLHS()->IgnoreParenCasts(), CATy, RecLimit))
+ return true;
+ else if (SafeMaskedAccess(BinEx->getRHS()->IgnoreParenCasts(), CATy,
+ RecLimit))
+ return true;
+ }
+
+ return false;
+ };
const auto *BaseDRE =
dyn_cast<DeclRefExpr>(Node.getBase()->IgnoreParenImpCasts());
@@ -446,6 +485,12 @@ AST_MATCHER(ArraySubscriptExpr, isSafeArraySubscript) {
if (ArrIdx.isNonNegative() &&
ArrIdx.getLimitedValue() < CATy->getLimitedSize())
return true;
+ } else if (const auto *BinEx = dyn_cast<BinaryOperator>(Node.getIdx())) {
+ if (BinEx->getOpcode() != BO_And)
+ return false;
+
+ Max.setAllBits();
+ return SafeMaskedAccess(Node.getIdx(), CATy, RecLimit);
}
return false;
diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-array.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-array.cpp
index 8b2f103ec66708..e5a5ca57f66883 100644
--- a/clang/test/SemaCXX/warn-unsafe-buffer-usage-array.cpp
+++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-array.cpp
@@ -33,6 +33,20 @@ void constant_idx_safe0(unsigned idx) {
buffer[0] = 0;
}
+int array[10]; // expected-warning{{'array' is an unsafe buffer that does not perform bounds checks}}
+
+void masked_idx(unsigned long long idx) {
+ array[idx & 5] = 10; // no warning
+ array[idx & 11 & 5] = 3; // no warning
+ array[idx & 11] = 20; // expected-note{{used in buffer access here}}
+}
+
+int array2[5];
+
+void mased_idx_false() {
+ array2[6 & 5];
+}
+
void constant_idx_unsafe(unsigned idx) {
int buffer[10]; // expected-warning{{'buffer' is an unsafe buffer that does not perform bounds checks}}
// expected-note@-1{{change type of 'buffer' to 'std::array' to label it for hardening}}
|
b557d1d
to
81af812
Compare
✅ With the latest revision this PR passed the C/C++ code formatter. |
llvm::APInt LHS = SafeMaskedAccess(BinEx->getLHS()->IgnoreParenCasts(), RecLimit); | ||
llvm::APInt RHS = SafeMaskedAccess(BinEx->getRHS()->IgnoreParenCasts(), RecLimit); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm somewhat worried about IgnoreParenCasts()
because it disrespects the type system. You can get values of the wrong type if you skip it like this, or worse, run into sign extension issues. IgnoreParens()
is probably fine but casts should probably be handled as a separate recursive case where you manually cast your APInt
to the right type.
On the bright side, this might let you get rid of .getLimitedValue()
because there will be no more type mismatches(?)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The code has now re-organized with ASTVisitors. I don't think casts should pose a major challenges anymore, but please let me know if you think otherwise. Also have added more type cast tests to cover all bases.
80e593f
to
7e00765
Compare
|
||
explicit MaxValueEval(ASTContext &Ctx, const Expr *exp) : Context(Ctx) { | ||
bit_width = Ctx.getIntWidth(exp->getType()); | ||
Max = llvm::APInt::getSignedMaxValue(bit_width); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For the same bit_width
, the unsigned max value will be different from the signed one, right? Maybe Max
should depend on exp
's type.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks. Will handle this in the next version.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also types of those bitwise operands matter. For example, ((unsigned long long) -1) & ((unsigned) var)
is of type unsigned long long
but the value is actually upper-bounded by the max value of unsigned
. (https://godbolt.org/z/b1zd1zEje)
Handle this in next version sounds good me.
return false; | ||
} | ||
|
||
bool VisitBinaryOperator(BinaryOperator *E) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
According to the document of RecursiveASTVisitor
:
/// These three method groups are tiered (Traverse* > WalkUpFrom* >
/// Visit*). A method (e.g. Traverse*) may call methods from the same
/// tier (e.g. other Traverse*) or one tier lower (e.g. WalkUpFrom*).
/// It may not call methods from a higher tier.
we should override TraverseBinaryOperator
here, because this function depends on the results of its' two children (i.e., TraverseStmt(E->getLHS())
below).
Looks like Visit*
is for cases where visiting a node is independent of its' children.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Well.. This is a bit tricky and we actually don't always depend on the LHS and RHS evaluation. Here, I am first checking if the expr evaluates to a constant value. If yes, we store the result and don't traverse any further. If it does not, only then we process the LHS and RHS to extract their limits.
return false; | ||
} else { | ||
TraverseStmt(E->getLHS()); | ||
llvm::APInt LHS = val.back(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think there is no guarantee of val.size() > 0
here.
This expression will break it: (unsigned long)"x" << 1
I also tried s.f << 1
. It doesn't crash because the DeclRefExpr s
gets visited and hence val
is populated. But I guess such behavior is not what you intended, because val
should represent the approximation of s.f
not s
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think ConstStmtVisitor
is a better option than RecursiveASTVisitor
because the return type of each Visit*
is a template parameter. You can make Visit*
return APInt
, which I think is a more natural way of doing it.
return false; | ||
} | ||
|
||
bool VisitBinaryOperator(BinaryOperator *E) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's first establish the scope of this work.
The current approach seems very ambitious (which is generally great!) but misses some of the tricky details.
For example the same arithmetic expression evaluated in APInt domain (Arbitrary Precision Integer) and e. g. uint32_t
domain gives different results because of overflow/underflow, etc.
I don't think we need to be that ambitious here and now - if we leave some false positives for -Wunsafe-buffer-usage
for my_array[ /* some weird expression */ ]
that's fine and we can improve it later using the data from the adoption as a signal for prioritization.
I also assume correct implementation of the most ambitious version of this functionality would take way more effort and time than is reasonable to invest into this problem at this point.
The conclusion for me is - let's support only certain types of expressions for now and keep the implementation simpler.
Based on what we've seen in the adoption and my intuition I feel that this approximation might be sufficient for now (let's discuss what I've missed though!):
arbitrary_unsigned_E & constant
- max is constant
constant & arbitrary_unsigned_E
- max is constant
arbitrary_unsigned_E % constant
- max is constant
arbitrary_unsigned_E >> constant
- max is something like (this is just buggy pseudo-code) pow( max(bitwidth(arbitrary_E) - constant, 0), 2 )
For this we might also avoid using the recursive visitor and simplify the implementation to just a single function where we traverse the expression manually using getLHS
, getRHS
, etc.
85b4ebc
to
ea92377
Compare
8febd5a
to
f5b5835
Compare
Expr *eExpr = E->getSubExpr(); | ||
clang::IntegerLiteral *intLiteral = clang::IntegerLiteral::Create( | ||
Context, subEValue, eExpr->getType(), {}); | ||
E->setSubExpr(intLiteral); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Calling setSubExpr
changes the AST. I think we should not do this?
llvm::APInt TraverseStmt(Stmt *S) { | ||
if (Expr *E = dyn_cast<Expr>(S)) { | ||
Expr::EvalResult EVResult; | ||
if (ImplicitCastExpr *ICE = dyn_cast<ImplicitCastExpr>(E)) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: we could do a switch
on E->getStmtClass()
here, which is more efficient than a list of if-else.
…tant size array (arr[anything & 0]) Do not warn when a constant sized array is indexed with an expression that contains bitwise and operation involving constants and it always results in a bound safe access. rdar://136684050
f5b5835
to
bf1f0b8
Compare
@dtarditi FYI. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you for minimizing the change so that the review becomes much easier.
Consider all my comments nitpicks.
} | ||
return false; | ||
|
||
} else |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: this else
is not needed. (VSCode often warns me about this.)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Will get rid of it. Thanks!
const Expr *LHS = BE->getLHS(); | ||
const Expr *RHS = BE->getRHS(); | ||
|
||
if ((!LHS->isValueDependent() && |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if LHS->isValueDependent()
, no constant value can be derived from LHS
anyway. So I suspect this condition is not needed.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The Expr::EvaluateAsInt(...) method assumes the expression it operates on in not value dependent. So, we have to check this before hand to make sure the method does not crash. We can only skip this check if we can be guaranteed the expression is never value dependent. I don't think that is true here.
array2[6 & idx & (idx + 1) & 5]; // expected-note{{used in buffer access here}} | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
how about some test cases for negative integer constants?
Do not warn when a constant sized array is indexed with an expression that contains bitwise and operation
involving constants and it always results in a bound safe access.
(rdar://136684050)