[lldb/libc++] Simplify the libc++ string formatter

Precise string layout has changed a lot recently, but a long of these
changes did not have any effect on the usages of its fields -- e.g.
introduction/removal of an anonymous struct or union does not change the
way one can access the field in C++. Our name-based variable lookup
rules (deliberately) copy the C++ semantics, which means these changes
would have been invisible to the them, if only we were using name-based
lookup.

This patch replaces the opaque child index accesses with name-based
lookups, which allows us to greatly simplify the data formatter code.
The formatter continues to support all the string layouts that it
previously supported.

It is unclear why the formatter was not using this approach from the
beginning. I would speculate that the original version was working
around some (now fixed) issue with anonymous members or base classes,
and the subsequent revisions stuck with that approach out of inertia.

Differential Revision: https://reviews.llvm.org/D129490
This commit is contained in:
Pavel Labath
2022-07-11 16:56:04 +02:00
parent f44d28f840
commit d4381153ea

View File

@@ -547,101 +547,77 @@ bool lldb_private::formatters::LibcxxContainerSummaryProvider(
}
/// The field layout in a libc++ string (cap, side, data or data, size, cap).
enum LibcxxStringLayoutMode {
eLibcxxStringLayoutModeCSD = 0,
eLibcxxStringLayoutModeDSC = 1,
eLibcxxStringLayoutModeInvalid = 0xffff
};
namespace {
enum class StringLayout { CSD, DSC };
}
/// Determine the size in bytes of \p valobj (a libc++ std::string object) and
/// extract its data payload. Return the size + payload pair.
// TODO: Support big-endian architectures.
static llvm::Optional<std::pair<uint64_t, ValueObjectSP>>
ExtractLibcxxStringInfo(ValueObject &valobj) {
ValueObjectSP dataval_sp(valobj.GetChildAtIndexPath({0, 0, 0, 0}));
if (!dataval_sp)
return {};
if (!dataval_sp->GetError().Success())
ValueObjectSP valobj_r_sp =
valobj.GetChildMemberWithName(ConstString("__r_"), /*can_create=*/true);
if (!valobj_r_sp)
return {};
ValueObjectSP layout_decider(
dataval_sp->GetChildAtIndexPath(llvm::ArrayRef<size_t>({0, 0})));
// this child should exist
if (!layout_decider)
// __r_ is a compressed_pair of the actual data and the allocator. The data we
// want is in the first base class.
ValueObjectSP valobj_r_base_sp =
valobj_r_sp->GetChildAtIndex(0, /*can_create=*/true);
if (!valobj_r_base_sp)
return {};
ConstString g_data_name("__data_");
ConstString g_size_name("__size_");
ValueObjectSP valobj_rep_sp = valobj_r_base_sp->GetChildMemberWithName(
ConstString("__value_"), /*can_create=*/true);
if (!valobj_rep_sp)
return {};
ValueObjectSP l = valobj_rep_sp->GetChildMemberWithName(ConstString("__l"),
/*can_create=*/true);
if (!l)
return {};
StringLayout layout = l->GetIndexOfChildWithName(ConstString("__data_")) == 0
? StringLayout::DSC
: StringLayout::CSD;
bool short_mode = false; // this means the string is in short-mode and the
// data is stored inline
bool using_bitmasks = true; // Whether the class uses bitmasks for the mode
// flag (pre-D123580).
uint64_t size;
LibcxxStringLayoutMode layout = (layout_decider->GetName() == g_data_name)
? eLibcxxStringLayoutModeDSC
: eLibcxxStringLayoutModeCSD;
uint64_t size_mode_value = 0;
ValueObjectSP short_sp(dataval_sp->GetChildAtIndex(1, true));
ValueObjectSP short_sp = valobj_rep_sp->GetChildMemberWithName(
ConstString("__s"), /*can_create=*/true);
if (!short_sp)
return {};
ValueObjectSP short_fields_sp;
ValueObjectSP is_long =
short_sp->GetChildMemberWithName(ConstString("__is_long_"), true);
if (is_long) {
short_fields_sp = short_sp;
} else {
// After D128285, we need to access the `__is_long_` and `__size_` fields
// from a packed anonymous struct
short_fields_sp = short_sp->GetChildAtIndex(0, true);
is_long = short_sp->GetChildMemberWithName(ConstString("__is_long_"), true);
}
ValueObjectSP size_sp =
short_sp->GetChildAtNamePath({ConstString("__size_")});
if (!size_sp)
return {};
if (is_long) {
using_bitmasks = false;
short_mode = !is_long->GetValueAsUnsigned(/*fail_value=*/0);
if (ValueObjectSP size_member =
dataval_sp->GetChildAtNamePath({ConstString("__s"), ConstString("__size_")}))
size = size_member->GetValueAsUnsigned(/*fail_value=*/0);
else
return {};
} else if (layout == eLibcxxStringLayoutModeDSC) {
llvm::SmallVector<size_t, 3> size_mode_locations[] = {
{1, 2}, // Post-c3d0205ee771 layout. This was in use for only a brief
// period, so we can delete it if it becomes a burden.
{1, 1, 0},
{1, 1, 1},
};
ValueObjectSP size_mode;
for (llvm::ArrayRef<size_t> loc : size_mode_locations) {
size_mode = dataval_sp->GetChildAtIndexPath(loc);
if (size_mode && size_mode->GetName() == g_size_name)
break;
}
if (!size_mode)
return {};
size_mode_value = (size_mode->GetValueAsUnsigned(0));
short_mode = ((size_mode_value & 0x80) == 0);
size = size_sp->GetValueAsUnsigned(/*fail_value=*/0);
} else {
ValueObjectSP size_mode(dataval_sp->GetChildAtIndexPath({1, 0, 0}));
if (!size_mode)
return {};
size_mode_value = (size_mode->GetValueAsUnsigned(0));
short_mode = ((size_mode_value & 1) == 0);
// The string mode is encoded in the size field.
size_mode_value = size_sp->GetValueAsUnsigned(0);
uint8_t mode_mask = layout == StringLayout::DSC ? 0x80 : 1;
short_mode = (size_mode_value & mode_mask) == 0;
}
if (short_mode) {
ValueObjectSP location_sp =
short_sp->GetChildMemberWithName(g_data_name, true);
short_sp->GetChildMemberWithName(ConstString("__data_"), true);
if (using_bitmasks)
size = (layout == eLibcxxStringLayoutModeDSC)
? size_mode_value
: ((size_mode_value >> 1) % 256);
size = (layout == StringLayout::DSC) ? size_mode_value
: ((size_mode_value >> 1) % 256);
// When the small-string optimization takes place, the data must fit in the
// inline string buffer (23 bytes on x86_64/Darwin). If it doesn't, it's
@@ -656,10 +632,6 @@ ExtractLibcxxStringInfo(ValueObject &valobj) {
return std::make_pair(size, location_sp);
}
ValueObjectSP l(dataval_sp->GetChildAtIndex(0, true));
if (!l)
return {};
// we can use the layout_decider object as the data pointer
ValueObjectSP location_sp =
l->GetChildMemberWithName(ConstString("__data_"), /*can_create=*/true);
@@ -667,19 +639,11 @@ ExtractLibcxxStringInfo(ValueObject &valobj) {
l->GetChildMemberWithName(ConstString("__size_"), /*can_create=*/true);
ValueObjectSP capacity_vo =
l->GetChildMemberWithName(ConstString("__cap_"), /*can_create=*/true);
if (!capacity_vo) {
// After D128285, we need to access the `__cap_` field from a packed
// anonymous struct
if (ValueObjectSP packed_fields_sp = l->GetChildAtIndex(0, true)) {
ValueObjectSP capacity_vo = packed_fields_sp->GetChildMemberWithName(
ConstString("__cap_"), /*can_create=*/true);
}
}
if (!size_vo || !location_sp || !capacity_vo)
return {};
size = size_vo->GetValueAsUnsigned(LLDB_INVALID_OFFSET);
uint64_t capacity = capacity_vo->GetValueAsUnsigned(LLDB_INVALID_OFFSET);
if (!using_bitmasks && layout == eLibcxxStringLayoutModeCSD)
if (!using_bitmasks && layout == StringLayout::CSD)
capacity *= 2;
if (size == LLDB_INVALID_OFFSET || capacity == LLDB_INVALID_OFFSET ||
capacity < size)