Skip to content

Commit 0844f83

Browse files
authored
[clang][analyzer] Stable order for SymbolRef-keyed containers (llvm#121551)
Generalize the `SymbolID`s used for `SymbolData` to all `SymExpr`s and use these IDs for comparison `SymbolRef` keys in various containers, such as `ConstraintMap`. These IDs are superior to raw pointer values because they are more controllable and are not randomized across executions (unlike [pointers](https://en.wikipedia.org/wiki/Address_space_layout_randomization)). These IDs order is stable across runs because SymExprs are allocated in the same order. Stability of the constraint order is important for the stability of the analyzer results. I evaluated this change on a set of 200+ open-source C and C++ projects with the total number of ~78 000 symbolic-execution issues passing Z3 refutation. This patch reduced the run-to-run churn (flakiness) in SE issues from 80-90 to 30-40 (out of 78K) in our CSA deployment (in our setting flaky issues are mostly due to Z3 refutation instability). Note, most of the issue churn (flakiness) is caused by the mentioned Z3 refutation. With Z3 refutation disabled, issue churn goes down to ~10 issues out of 83K and this patch has no effect on appearing/disappearing issues between runs. It however, seems to reduce the volatility of the execution flow: before we had 40-80 issues with changed execution flow, after - 10-30. Importantly, this change is necessary for the next step in stabilizing analysis results by caching Z3 query outcomes between analysis runs (work in progress). Across our admittedly noisy CI runs, I detected no significant effect on memory footprint or analysis time. CPP-5919
1 parent 39a9073 commit 0844f83

11 files changed

+149
-85
lines changed

clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h

+22-9
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ namespace ento {
2525

2626
class MemRegion;
2727

28+
using SymbolID = unsigned;
29+
2830
/// Symbolic value. These values used to capture symbolic execution of
2931
/// the program.
3032
class SymExpr : public llvm::FoldingSetNode {
@@ -39,9 +41,19 @@ class SymExpr : public llvm::FoldingSetNode {
3941

4042
private:
4143
Kind K;
44+
/// A unique identifier for this symbol.
45+
///
46+
/// It is useful for SymbolData to easily differentiate multiple symbols, but
47+
/// also for "ephemeral" symbols, such as binary operations, because this id
48+
/// can be used for arranging constraints or equivalence classes instead of
49+
/// unstable pointer values.
50+
///
51+
/// Note, however, that it can't be used in Profile because SymbolManager
52+
/// needs to compute Profile before allocating SymExpr.
53+
const SymbolID Sym;
4254

4355
protected:
44-
SymExpr(Kind k) : K(k) {}
56+
SymExpr(Kind k, SymbolID Sym) : K(k), Sym(Sym) {}
4557

4658
static bool isValidTypeForSymbol(QualType T) {
4759
// FIXME: Depending on whether we choose to deprecate structural symbols,
@@ -56,6 +68,14 @@ class SymExpr : public llvm::FoldingSetNode {
5668

5769
Kind getKind() const { return K; }
5870

71+
/// Get a unique identifier for this symbol.
72+
/// The ID is unique across all SymExprs in a SymbolManager.
73+
/// They reflect the allocation order of these SymExprs,
74+
/// and are likely stable across runs.
75+
/// Used as a key in SymbolRef containers and as part of identity
76+
/// for SymbolData, e.g. SymbolConjured with ID = 7 is "conj_$7".
77+
SymbolID getSymbolID() const { return Sym; }
78+
5979
virtual void dump() const;
6080

6181
virtual void dumpToStream(raw_ostream &os) const {}
@@ -112,28 +132,21 @@ inline raw_ostream &operator<<(raw_ostream &os,
112132

113133
using SymbolRef = const SymExpr *;
114134
using SymbolRefSmallVectorTy = SmallVector<SymbolRef, 2>;
115-
using SymbolID = unsigned;
116135

117136
/// A symbol representing data which can be stored in a memory location
118137
/// (region).
119138
class SymbolData : public SymExpr {
120-
const SymbolID Sym;
121-
122139
void anchor() override;
123140

124141
protected:
125-
SymbolData(Kind k, SymbolID sym) : SymExpr(k), Sym(sym) {
126-
assert(classof(this));
127-
}
142+
SymbolData(Kind k, SymbolID sym) : SymExpr(k, sym) { assert(classof(this)); }
128143

129144
public:
130145
~SymbolData() override = default;
131146

132147
/// Get a string representation of the kind of the region.
133148
virtual StringRef getKindStr() const = 0;
134149

135-
SymbolID getSymbolID() const { return Sym; }
136-
137150
unsigned computeComplexity() const override {
138151
return 1;
139152
};

clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h

+78-22
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "llvm/ADT/DenseMap.h"
2626
#include "llvm/ADT/DenseSet.h"
2727
#include "llvm/ADT/FoldingSet.h"
28+
#include "llvm/ADT/ImmutableSet.h"
2829
#include "llvm/ADT/iterator_range.h"
2930
#include "llvm/Support/Allocator.h"
3031
#include <cassert>
@@ -43,15 +44,16 @@ class StoreManager;
4344
class SymbolRegionValue : public SymbolData {
4445
const TypedValueRegion *R;
4546

46-
public:
47+
friend class SymExprAllocator;
4748
SymbolRegionValue(SymbolID sym, const TypedValueRegion *r)
4849
: SymbolData(SymbolRegionValueKind, sym), R(r) {
4950
assert(r);
5051
assert(isValidTypeForSymbol(r->getValueType()));
5152
}
5253

54+
public:
5355
LLVM_ATTRIBUTE_RETURNS_NONNULL
54-
const TypedValueRegion* getRegion() const { return R; }
56+
const TypedValueRegion *getRegion() const { return R; }
5557

5658
static void Profile(llvm::FoldingSetNodeID& profile, const TypedValueRegion* R) {
5759
profile.AddInteger((unsigned) SymbolRegionValueKind);
@@ -84,7 +86,7 @@ class SymbolConjured : public SymbolData {
8486
const LocationContext *LCtx;
8587
const void *SymbolTag;
8688

87-
public:
89+
friend class SymExprAllocator;
8890
SymbolConjured(SymbolID sym, const Stmt *s, const LocationContext *lctx,
8991
QualType t, unsigned count, const void *symbolTag)
9092
: SymbolData(SymbolConjuredKind, sym), S(s), T(t), Count(count),
@@ -98,6 +100,7 @@ class SymbolConjured : public SymbolData {
98100
assert(isValidTypeForSymbol(t));
99101
}
100102

103+
public:
101104
/// It might return null.
102105
const Stmt *getStmt() const { return S; }
103106
unsigned getCount() const { return Count; }
@@ -137,14 +140,15 @@ class SymbolDerived : public SymbolData {
137140
SymbolRef parentSymbol;
138141
const TypedValueRegion *R;
139142

140-
public:
143+
friend class SymExprAllocator;
141144
SymbolDerived(SymbolID sym, SymbolRef parent, const TypedValueRegion *r)
142145
: SymbolData(SymbolDerivedKind, sym), parentSymbol(parent), R(r) {
143146
assert(parent);
144147
assert(r);
145148
assert(isValidTypeForSymbol(r->getValueType()));
146149
}
147150

151+
public:
148152
LLVM_ATTRIBUTE_RETURNS_NONNULL
149153
SymbolRef getParentSymbol() const { return parentSymbol; }
150154
LLVM_ATTRIBUTE_RETURNS_NONNULL
@@ -180,12 +184,13 @@ class SymbolDerived : public SymbolData {
180184
class SymbolExtent : public SymbolData {
181185
const SubRegion *R;
182186

183-
public:
187+
friend class SymExprAllocator;
184188
SymbolExtent(SymbolID sym, const SubRegion *r)
185189
: SymbolData(SymbolExtentKind, sym), R(r) {
186190
assert(r);
187191
}
188192

193+
public:
189194
LLVM_ATTRIBUTE_RETURNS_NONNULL
190195
const SubRegion *getRegion() const { return R; }
191196

@@ -222,7 +227,7 @@ class SymbolMetadata : public SymbolData {
222227
unsigned Count;
223228
const void *Tag;
224229

225-
public:
230+
friend class SymExprAllocator;
226231
SymbolMetadata(SymbolID sym, const MemRegion* r, const Stmt *s, QualType t,
227232
const LocationContext *LCtx, unsigned count, const void *tag)
228233
: SymbolData(SymbolMetadataKind, sym), R(r), S(s), T(t), LCtx(LCtx),
@@ -234,6 +239,7 @@ class SymbolMetadata : public SymbolData {
234239
assert(tag);
235240
}
236241

242+
public:
237243
LLVM_ATTRIBUTE_RETURNS_NONNULL
238244
const MemRegion *getRegion() const { return R; }
239245

@@ -286,15 +292,16 @@ class SymbolCast : public SymExpr {
286292
/// The type of the result.
287293
QualType ToTy;
288294

289-
public:
290-
SymbolCast(const SymExpr *In, QualType From, QualType To)
291-
: SymExpr(SymbolCastKind), Operand(In), FromTy(From), ToTy(To) {
295+
friend class SymExprAllocator;
296+
SymbolCast(SymbolID Sym, const SymExpr *In, QualType From, QualType To)
297+
: SymExpr(SymbolCastKind, Sym), Operand(In), FromTy(From), ToTy(To) {
292298
assert(In);
293299
assert(isValidTypeForSymbol(From));
294300
// FIXME: GenericTaintChecker creates symbols of void type.
295301
// Otherwise, 'To' should also be a valid type.
296302
}
297303

304+
public:
298305
unsigned computeComplexity() const override {
299306
if (Complexity == 0)
300307
Complexity = 1 + Operand->computeComplexity();
@@ -332,9 +339,10 @@ class UnarySymExpr : public SymExpr {
332339
UnaryOperator::Opcode Op;
333340
QualType T;
334341

335-
public:
336-
UnarySymExpr(const SymExpr *In, UnaryOperator::Opcode Op, QualType T)
337-
: SymExpr(UnarySymExprKind), Operand(In), Op(Op), T(T) {
342+
friend class SymExprAllocator;
343+
UnarySymExpr(SymbolID Sym, const SymExpr *In, UnaryOperator::Opcode Op,
344+
QualType T)
345+
: SymExpr(UnarySymExprKind, Sym), Operand(In), Op(Op), T(T) {
338346
// Note, some unary operators are modeled as a binary operator. E.g. ++x is
339347
// modeled as x + 1.
340348
assert((Op == UO_Minus || Op == UO_Not) && "non-supported unary expression");
@@ -345,6 +353,7 @@ class UnarySymExpr : public SymExpr {
345353
assert(!Loc::isLocType(T) && "unary symbol should be nonloc");
346354
}
347355

356+
public:
348357
unsigned computeComplexity() const override {
349358
if (Complexity == 0)
350359
Complexity = 1 + Operand->computeComplexity();
@@ -381,8 +390,8 @@ class BinarySymExpr : public SymExpr {
381390
QualType T;
382391

383392
protected:
384-
BinarySymExpr(Kind k, BinaryOperator::Opcode op, QualType t)
385-
: SymExpr(k), Op(op), T(t) {
393+
BinarySymExpr(SymbolID Sym, Kind k, BinaryOperator::Opcode op, QualType t)
394+
: SymExpr(k, Sym), Op(op), T(t) {
386395
assert(classof(this));
387396
// Binary expressions are results of arithmetic. Pointer arithmetic is not
388397
// handled by binary expressions, but it is instead handled by applying
@@ -425,14 +434,15 @@ class BinarySymExprImpl : public BinarySymExpr {
425434
LHSTYPE LHS;
426435
RHSTYPE RHS;
427436

428-
public:
429-
BinarySymExprImpl(LHSTYPE lhs, BinaryOperator::Opcode op, RHSTYPE rhs,
430-
QualType t)
431-
: BinarySymExpr(ClassKind, op, t), LHS(lhs), RHS(rhs) {
437+
friend class SymExprAllocator;
438+
BinarySymExprImpl(SymbolID Sym, LHSTYPE lhs, BinaryOperator::Opcode op,
439+
RHSTYPE rhs, QualType t)
440+
: BinarySymExpr(Sym, ClassKind, op, t), LHS(lhs), RHS(rhs) {
432441
assert(getPointer(lhs));
433442
assert(getPointer(rhs));
434443
}
435444

445+
public:
436446
void dumpToStream(raw_ostream &os) const override {
437447
dumpToStreamImpl(os, LHS);
438448
dumpToStreamImpl(os, getOpcode());
@@ -478,6 +488,21 @@ using IntSymExpr = BinarySymExprImpl<APSIntPtr, const SymExpr *,
478488
using SymSymExpr = BinarySymExprImpl<const SymExpr *, const SymExpr *,
479489
SymExpr::Kind::SymSymExprKind>;
480490

491+
class SymExprAllocator {
492+
SymbolID NextSymbolID = 0;
493+
llvm::BumpPtrAllocator &Alloc;
494+
495+
public:
496+
explicit SymExprAllocator(llvm::BumpPtrAllocator &Alloc) : Alloc(Alloc) {}
497+
498+
template <class SymT, typename... ArgsT> SymT *make(ArgsT &&...Args) {
499+
return new (Alloc) SymT(nextID(), std::forward<ArgsT>(Args)...);
500+
}
501+
502+
private:
503+
SymbolID nextID() { return NextSymbolID++; }
504+
};
505+
481506
class SymbolManager {
482507
using DataSetTy = llvm::FoldingSet<SymExpr>;
483508
using SymbolDependTy =
@@ -489,15 +514,14 @@ class SymbolManager {
489514
/// alive as long as the key is live.
490515
SymbolDependTy SymbolDependencies;
491516

492-
unsigned SymbolCounter = 0;
493-
llvm::BumpPtrAllocator& BPAlloc;
517+
SymExprAllocator Alloc;
494518
BasicValueFactory &BV;
495519
ASTContext &Ctx;
496520

497521
public:
498522
SymbolManager(ASTContext &ctx, BasicValueFactory &bv,
499-
llvm::BumpPtrAllocator& bpalloc)
500-
: SymbolDependencies(16), BPAlloc(bpalloc), BV(bv), Ctx(ctx) {}
523+
llvm::BumpPtrAllocator &bpalloc)
524+
: SymbolDependencies(16), Alloc(bpalloc), BV(bv), Ctx(ctx) {}
501525

502526
static bool canSymbolicate(QualType T);
503527

@@ -687,4 +711,36 @@ class SymbolVisitor {
687711

688712
} // namespace clang
689713

714+
// Override the default definition that would use pointer values of SymbolRefs
715+
// to order them, which is unstable due to ASLR.
716+
// Use the SymbolID instead which reflect the order in which the symbols were
717+
// allocated. This is usually stable across runs leading to the stability of
718+
// ConstraintMap and other containers using SymbolRef as keys.
719+
template <>
720+
struct ::llvm::ImutContainerInfo<clang::ento::SymbolRef>
721+
: public ImutProfileInfo<clang::ento::SymbolRef> {
722+
using value_type = clang::ento::SymbolRef;
723+
using value_type_ref = clang::ento::SymbolRef;
724+
using key_type = value_type;
725+
using key_type_ref = value_type_ref;
726+
using data_type = bool;
727+
using data_type_ref = bool;
728+
729+
static key_type_ref KeyOfValue(value_type_ref D) { return D; }
730+
static data_type_ref DataOfValue(value_type_ref) { return true; }
731+
732+
static bool isEqual(clang::ento::SymbolRef LHS, clang::ento::SymbolRef RHS) {
733+
return LHS->getSymbolID() == RHS->getSymbolID();
734+
}
735+
736+
static bool isLess(clang::ento::SymbolRef LHS, clang::ento::SymbolRef RHS) {
737+
return LHS->getSymbolID() < RHS->getSymbolID();
738+
}
739+
740+
// This might seem redundant, but it is required because of the way
741+
// ImmutableSet is implemented through AVLTree:
742+
// same as ImmutableMap, but with a non-informative "data".
743+
static bool isDataEqual(data_type_ref, data_type_ref) { return true; }
744+
};
745+
690746
#endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SYMBOLMANAGER_H

0 commit comments

Comments
 (0)