[analyzer] Revert incorrect LazyCoumpoundVal changes (#163461)

Reverts #115917 and its follow up #116840.
Fixes #153782 and introduces regression tests.
Reopens #114270.
This commit is contained in:
Marco Borgeaud
2025-11-04 17:39:23 +01:00
committed by GitHub
parent 78769d51c6
commit cc3ad201ec
10 changed files with 130 additions and 92 deletions

View File

@@ -714,11 +714,6 @@ public: // Part of public interface to class.
return getBinding(getRegionBindings(S), L, T);
}
std::optional<SVal> getUniqueDefaultBinding(RegionBindingsConstRef B,
const TypedValueRegion *R) const;
std::optional<SVal>
getUniqueDefaultBinding(nonloc::LazyCompoundVal LCV) const;
std::optional<SVal> getDefaultBinding(Store S, const MemRegion *R) override {
RegionBindingsRef B = getRegionBindings(S);
// Default bindings are always applied over a base region so look up the
@@ -2465,11 +2460,6 @@ SVal RegionStoreManager::getBindingForStruct(RegionBindingsConstRef B,
// behavior doesn't depend on the struct layout.
// This way even an empty struct can carry taint, no matter if creduce drops
// the last field member or not.
// Try to avoid creating a LCV if it would anyways just refer to a single
// default binding.
if (std::optional<SVal> Val = getUniqueDefaultBinding(B, R))
return *Val;
return createLazyBinding(B, R);
}
@@ -2757,50 +2747,12 @@ RegionStoreManager::bindVector(LimitedRegionBindingsConstRef B,
return NewB;
}
std::optional<SVal>
RegionStoreManager::getUniqueDefaultBinding(RegionBindingsConstRef B,
const TypedValueRegion *R) const {
if (R != R->getBaseRegion())
return std::nullopt;
const auto *Cluster = B.lookup(R);
if (!Cluster || !llvm::hasSingleElement(*Cluster))
return std::nullopt;
const auto [Key, Value] = *Cluster->begin();
return Key.isDirect() ? std::optional<SVal>{} : Value;
}
std::optional<SVal>
RegionStoreManager::getUniqueDefaultBinding(nonloc::LazyCompoundVal LCV) const {
auto B = getRegionBindings(LCV.getStore());
return getUniqueDefaultBinding(B, LCV.getRegion());
}
std::optional<LimitedRegionBindingsRef> RegionStoreManager::tryBindSmallStruct(
LimitedRegionBindingsConstRef B, const TypedValueRegion *R,
const RecordDecl *RD, nonloc::LazyCompoundVal LCV) {
if (B.hasExhaustedBindingLimit())
return B.withValuesEscaped(LCV);
// If we try to copy a Conjured value representing the value of the whole
// struct, don't try to element-wise copy each field.
// That would unnecessarily bind Derived symbols slicing off the subregion for
// the field from the whole Conjured symbol.
//
// struct Window { int width; int height; };
// Window getWindow(); <-- opaque fn.
// Window w = getWindow(); <-- conjures a new Window.
// Window w2 = w; <-- trivial copy "w", calling "tryBindSmallStruct"
//
// We should not end up with a new Store for "w2" like this:
// Direct [ 0..31]: Derived{Conj{}, w.width}
// Direct [32..63]: Derived{Conj{}, w.height}
// Instead, we should just bind that Conjured value instead.
if (std::optional<SVal> Val = getUniqueDefaultBinding(LCV)) {
return B.addBinding(BindingKey::Make(R, BindingKey::Default), Val.value());
}
FieldVector Fields;
if (const CXXRecordDecl *Class = dyn_cast<CXXRecordDecl>(RD))

View File

@@ -3,13 +3,13 @@
// RUN: -analyzer-checker=core \
// RUN: -analyzer-checker=cplusplus.NewDelete
//
// RUN: %clang_analyze_cc1 -DLEAKS -std=c++11 -fblocks %s \
// RUN: %clang_analyze_cc1 -std=c++11 -fblocks %s \
// RUN: -verify=expected,newdelete,leak \
// RUN: -analyzer-checker=core \
// RUN: -analyzer-checker=cplusplus.NewDelete \
// RUN: -analyzer-checker=cplusplus.NewDeleteLeaks
//
// RUN: %clang_analyze_cc1 -std=c++11 -fblocks -verify %s \
// RUN: %clang_analyze_cc1 -std=c++11 -fblocks %s \
// RUN: -verify=expected,leak \
// RUN: -analyzer-checker=core \
// RUN: -analyzer-checker=cplusplus.NewDeleteLeaks
@@ -19,13 +19,13 @@
// RUN: -analyzer-checker=core \
// RUN: -analyzer-checker=cplusplus.NewDelete
//
// RUN: %clang_analyze_cc1 -DLEAKS -std=c++17 -fblocks %s \
// RUN: %clang_analyze_cc1 -std=c++17 -fblocks %s \
// RUN: -verify=expected,newdelete,leak \
// RUN: -analyzer-checker=core \
// RUN: -analyzer-checker=cplusplus.NewDelete \
// RUN: -analyzer-checker=cplusplus.NewDeleteLeaks
//
// RUN: %clang_analyze_cc1 -std=c++17 -fblocks -verify %s \
// RUN: %clang_analyze_cc1 -std=c++17 -fblocks %s \
// RUN: -verify=expected,leak,inspection \
// RUN: -analyzer-checker=core \
// RUN: -analyzer-checker=cplusplus.NewDeleteLeaks \
@@ -503,3 +503,75 @@ namespace optional_union {
custom_union_t a;
} // leak-warning{{Potential leak of memory pointed to by 'a.present.q'}}
}
namespace gh153782 {
// Ensure we do not regress on the following use case.
namespace mutually_exclusive_test_case_1 {
struct StorageWrapper {
// Imagine the destructor and copy constructor both call a reset() function (among other things).
~StorageWrapper() { delete parts; }
StorageWrapper(StorageWrapper const&) = default;
// Mind that there is no `parts = other.parts` assignment -- this is the bug we would like to find.
void operator=(StorageWrapper&& other) { delete parts; } // newdelete-warning{{Attempt to release already released memory}}
// Not provided, typically would do `parts = new long`.
StorageWrapper();
long* parts;
};
void test_non_trivial_struct_assignment() {
StorageWrapper* object = new StorageWrapper[]{StorageWrapper()};
object[0] = StorageWrapper(); // This assignment leads to the double-free.
}
} // mutually_exclusive_test_case_1
namespace mutually_exclusive_test_case_2 {
struct StorageWrapper {
// Imagine the destructor and copy constructor both call a reset() function (among other things).
~StorageWrapper() { delete parts; }
StorageWrapper(StorageWrapper const&) = default;
// Mind that there is no `parts = other.parts` assignment -- this is the bug we would like to find.
void operator=(StorageWrapper&& other) { delete parts; }
// Not provided, typically would do `parts = new long`.
StorageWrapper();
long* parts;
};
void test_non_trivial_struct_assignment() {
StorageWrapper* object = new StorageWrapper[]{StorageWrapper()};
// object[0] = StorageWrapper(); // Remove the source of double free to make the potential leak appear.
} // leak-warning{{Potential leak of memory pointed to by 'object'}}
} // mutually_exclusive_test_case_2
namespace mutually_exclusive_test_case_3 {
struct StorageWrapper {
// Imagine the destructor and copy constructor both call a reset() function (among other things).
~StorageWrapper() { delete parts; }
StorageWrapper(StorageWrapper const&) = default;
// Mind that there is no `parts = other.parts` assignment -- this is the bug we would like to find.
void operator=(StorageWrapper&& other) { delete parts; } // newdelete-warning{{Attempt to release already released memory}}
// Not provided, typically would do `parts = new long`.
StorageWrapper();
long* parts;
};
struct TestDoubleFreeWithInitializerList {
StorageWrapper* Object;
TestDoubleFreeWithInitializerList()
: Object(new StorageWrapper[]{StorageWrapper()}) {
Object[0] = StorageWrapper(); // This assignment leads to the double-free.
}
};
} // mutually_exclusive_test_case_3
} // namespace gh153782

View File

@@ -5,8 +5,6 @@
void clang_analyzer_printState();
template <typename T> void clang_analyzer_dump_lref(T& param);
template <typename T> void clang_analyzer_dump_val(T param);
template <typename T> void clang_analyzer_denote(T param, const char *name);
template <typename T> void clang_analyzer_express(T param);
template <typename T> T conjure();
template <typename... Ts> void nop(const Ts &... args) {}
@@ -42,10 +40,16 @@ void test_assign_return() {
namespace trivial_struct_copy {
void _01_empty_structs() {
clang_analyzer_dump_val(conjure<empty>()); // expected-warning {{conj_$}}
clang_analyzer_dump_val(conjure<empty>()); // expected-warning {{lazyCompoundVal}}
empty Empty = conjure<empty>();
empty Empty2 = Empty;
empty Empty3 = Empty2;
// All of these should refer to the exact same LCV, because all of
// these trivial copies refer to the original conjured value.
// There were Unknown before:
clang_analyzer_dump_val(Empty); // expected-warning {{lazyCompoundVal}}
clang_analyzer_dump_val(Empty2); // expected-warning {{lazyCompoundVal}}
clang_analyzer_dump_val(Empty3); // expected-warning {{lazyCompoundVal}}
// We only have binding for the original Empty object, because copying empty
// objects is a no-op in the performTrivialCopy. This is fine, because empty
@@ -67,20 +71,18 @@ void _01_empty_structs() {
}
void _02_structs_with_members() {
clang_analyzer_dump_val(conjure<aggr>()); // expected-warning {{conj_$}}
clang_analyzer_dump_val(conjure<aggr>()); // expected-warning {{lazyCompoundVal}}
aggr Aggr = conjure<aggr>();
aggr Aggr2 = Aggr;
aggr Aggr3 = Aggr2;
// All of these should refer to the exact same symbol, because all of
// All of these should refer to the exact same LCV, because all of
// these trivial copies refer to the original conjured value.
clang_analyzer_denote(Aggr, "$Aggr");
clang_analyzer_express(Aggr); // expected-warning {{$Aggr}}
clang_analyzer_express(Aggr2); // expected-warning {{$Aggr}}
clang_analyzer_express(Aggr3); // expected-warning {{$Aggr}}
clang_analyzer_dump_val(Aggr); // expected-warning {{lazyCompoundVal}}
clang_analyzer_dump_val(Aggr2); // expected-warning {{lazyCompoundVal}}
clang_analyzer_dump_val(Aggr3); // expected-warning {{lazyCompoundVal}}
// We should have the same Conjured symbol for "Aggr", "Aggr2" and "Aggr3".
// We used to have Derived symbols for the individual fields that were
// copied as part of copying the whole struct.
// We have fields in the struct we copy, thus we also have the entries for the copies
// (and for all of their fields).
clang_analyzer_printState();
// CHECK: "store": { "pointer": "0x{{[0-9a-f]+}}", "items": [
// CHECK-NEXT: { "cluster": "GlobalInternalSpaceRegion", "pointer": "0x{{[0-9a-f]+}}", "items": [
@@ -93,10 +95,12 @@ void _02_structs_with_members() {
// CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "[[AGGR_CONJ:conj_\$[0-9]+{int, LC[0-9]+, S[0-9]+, #[0-9]+}]]" }
// CHECK-NEXT: ]},
// CHECK-NEXT: { "cluster": "Aggr2", "pointer": "0x{{[0-9a-f]+}}", "items": [
// CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "[[AGGR_CONJ]]" }
// CHECK-NEXT: { "kind": "Direct", "offset": 0, "value": "derived_${{[0-9]+}}{[[AGGR_CONJ]],Aggr.x}" },
// CHECK-NEXT: { "kind": "Direct", "offset": 32, "value": "derived_${{[0-9]+}}{[[AGGR_CONJ]],Aggr.y}" }
// CHECK-NEXT: ]},
// CHECK-NEXT: { "cluster": "Aggr3", "pointer": "0x{{[0-9a-f]+}}", "items": [
// CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "[[AGGR_CONJ]]" }
// CHECK-NEXT: { "kind": "Direct", "offset": 0, "value": "derived_${{[0-9]+}}{[[AGGR_CONJ]],Aggr.x}" },
// CHECK-NEXT: { "kind": "Direct", "offset": 32, "value": "derived_${{[0-9]+}}{[[AGGR_CONJ]],Aggr.y}" }
// CHECK-NEXT: ]}
// CHECK-NEXT: ]},
@@ -113,3 +117,31 @@ void entrypoint() {
}
} // namespace trivial_struct_copy
namespace gh153782 {
// Ensure we do not regress on the following use cases.
// The assumption made on a field in `setPtr` should apply to the returned copy in `func`.
struct Status { int error; };
Status getError();
Status setPtr(int **outptr, int* ptr) {
Status e = getError();
if (e.error != 0) return e; // When assuming the error field is non-zero,
*outptr = ptr; // this is not executed
return e;
}
int func() {
int *ptr = nullptr;
int x = 42;
if (setPtr(&ptr, &x).error == 0) {
// The assumption made in get() SHOULD match the assumption about
// the returned value, hence the engine SHOULD NOT assume ptr is null.
clang_analyzer_dump_val(ptr); // expected-warning {{&x}}
return *ptr;
}
return 0;
}
} // namespace gh153782

View File

@@ -99,7 +99,7 @@ public:
} // end of anonymous namespace
void test_6() {
clang_analyzer_explain(conjure_S()); // expected-warning-re{{{{^symbol of type 'int' conjured at CFG element 'conjure_S\(\) \(CXXRecordTypedCall, \+0\)'$}}}}
clang_analyzer_explain(conjure_S()); // expected-warning-re{{{{^lazily frozen compound value of 1st parameter of function 'clang_analyzer_explain\(\)'$}}}}
clang_analyzer_explain(conjure_S().z); // expected-warning-re{{{{^value derived from \(symbol of type 'int' conjured at CFG element 'conjure_S\(\) \(CXXRecordTypedCall, \)'\) for field 'z' of temporary object constructed at statement 'conjure_S\(\)'$}}}}
}

View File

@@ -2035,7 +2035,6 @@ void print_state(std::vector<int> &V) {
// CHECK: "checker_messages": [
// CHECK: { "checker": "alpha.cplusplus.IteratorModeling", "messages": [
// CHECK-NEXT: "Iterator Positions :",
// CHECK-NEXT: "conj_$[[#]]{int, LC[[#]], S[[#]], #[[#]]} : Valid ; Container == SymRegion{reg_$[[#]]<std::vector<int> & V>} ; Offset == conj_$[[#]]{long, LC[[#]], S[[#]], #[[#]]}",
// CHECK-NEXT: "i0 : Valid ; Container == SymRegion{reg_$[[#]]<std::vector<int> & V>} ; Offset == conj_$[[#]]{long, LC[[#]], S[[#]], #[[#]]}"
// CHECK-NEXT: ]}
@@ -2046,7 +2045,6 @@ void print_state(std::vector<int> &V) {
// CHECK: "checker_messages": [
// CHECK: { "checker": "alpha.cplusplus.IteratorModeling", "messages": [
// CHECK-NEXT: "Iterator Positions :",
// CHECK-NEXT: "conj_$[[#]]{int, LC[[#]], S[[#]], #[[#]]} : Valid ; Container == SymRegion{reg_$[[#]]<std::vector<int> & V>} ; Offset == conj_$[[#]]{long, LC[[#]], S[[#]], #[[#]]}",
// CHECK-NEXT: "i1 : Valid ; Container == SymRegion{reg_$[[#]]<std::vector<int> & V>} ; Offset == conj_$[[#]]{long, LC[[#]], S[[#]], #[[#]]}"
// CHECK-NEXT: ]}

View File

@@ -4,16 +4,6 @@
// RUN: -analyzer-config alpha.cplusplus.STLAlgorithmModeling:AggressiveStdFindModeling=true\
// RUN: -verify
// STLAlgorithmModeling and DebugIteratorModeling are probably bugged because
// these tests started failing after we just directly copy the symbol
// representing the value of a variable instead of creating a LazyCompoundVal
// of that single conjured value.
// In theory, it shouldn't matter if we eagerly copy the value that we would
// "load" from the LCV once requested or just directly binding the backing symbol.
// Yet, these tests fail, so there is likely messed up how/what the checker
// metadata is associated with.
// XFAIL: *
#include "Inputs/system-header-simulator-cxx.h"
void clang_analyzer_eval(bool);

View File

@@ -3,16 +3,6 @@
// RUN: -analyzer-config aggressive-binary-operation-simplification=true\
// RUN: -verify
// STLAlgorithmModeling and DebugIteratorModeling are probably bugged because
// these tests started failing after we just directly copy the symbol
// representing the value of a variable instead of creating a LazyCompoundVal
// of that single conjured value.
// In theory, it shouldn't matter if we eagerly copy the value that we would
// "load" from the LCV once requested or just directly binding the backing symbol.
// Yet, these tests fail, so there is likely messed up how/what the checker
// metadata is associated with.
// XFAIL: *
#include "Inputs/system-header-simulator-cxx.h"
void clang_analyzer_eval(bool);

View File

@@ -41,7 +41,7 @@ void test_output(int n) {
// CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "conj_$
// CHECK-NEXT: ]},
// CHECK-NEXT: { "cluster": "objfirst", "pointer": "0x{{[0-9a-f]+}}", "items": [
// CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "conj_$
// CHECK-NEXT: { "kind": "Default", "offset": 0, "value": "lazyCompoundVal
// CHECK-NEXT: { "kind": "Direct", "offset": 320, "value": "1 S32b" },
// CHECK-NEXT: { "kind": "Direct", "offset": 352, "value": "2 S32b" },
// CHECK-NEXT: { "kind": "Direct", "offset": 384, "value": "3 S32b" }

View File

@@ -158,7 +158,11 @@ void top() {
clang_analyzer_isTainted(E); // expected-warning {{NO}}
Aggr A = mySource1<Aggr>();
clang_analyzer_isTainted(A); // expected-warning {{YES}}
// FIXME Ideally, both A and A.data should be tainted. However, the
// implementation used by e5ac9145ba29 ([analyzer][taint] Recognize
// tainted LazyCompoundVals (4/4) (#115919), 2024-11-15) led to FPs and
// FNs in various scenarios and had to be reverted to fix #153782.
clang_analyzer_isTainted(A); // expected-warning {{NO}}
clang_analyzer_isTainted(A.data); // expected-warning {{YES}}
}
} // namespace gh114270

View File

@@ -11,7 +11,7 @@ bool operator ==(Box lhs, Box rhs) {
return lhs.value == rhs.value;
}
template <Box V> void dumps() {
clang_analyzer_dump(V); // expected-warning {{Unknown}}
clang_analyzer_dump(V); // expected-warning {{lazyCompoundVal}}
clang_analyzer_dump(&V); // expected-warning {{Unknown}}
clang_analyzer_dump(V.value); // expected-warning {{Unknown}} FIXME: It should be '6 S32b'.
clang_analyzer_dump(&V.value); // expected-warning {{Unknown}}