Reduce llvm-gsymutil memory usage (#139907)

For large binaries gsymutil ends up using too much memory. This diff
adds DIE tree cleanup per compile unit to reduce memory usage.

P. S. Not sure about formatting. Maybe it hasn't been run in a while, or
I have misconfigured something.

`$ git clang-format HEAD~1
clang-format did not modify any files
$ clang-format --version
clang-format version 21.0.0git
(git@github.com:peremyach/llvm-project.git
8d945c8357e1bd9872a34f92620d4916bfd27482)
`

Co-authored-by: Arslan Khabutdinov <akhabutdinov@fb.com>
This commit is contained in:
peremyach
2025-05-16 17:44:17 +01:00
committed by GitHub
parent 52af23f950
commit ebb15353d2
5 changed files with 137 additions and 109 deletions

View File

@@ -103,6 +103,7 @@ public:
std::unique_ptr<DWARFDebugMacro>
parseMacroOrMacinfo(MacroSecType SectionType);
virtual Error doWorkThreadSafely(function_ref<Error()> Work) = 0;
};
friend class DWARFContextState;
@@ -491,6 +492,10 @@ public:
/// manually only for DWARF5.
void setParseCUTUIndexManually(bool PCUTU) { ParseCUTUIndexManually = PCUTU; }
Error doWorkThreadSafely(function_ref<Error()> Work) {
return State->doWorkThreadSafely(Work);
}
private:
void addLocalsForDie(DWARFCompileUnit *CU, DWARFDie Subprogram, DWARFDie Die,
std::vector<DILocal> &Result);

View File

@@ -566,6 +566,9 @@ public:
Error tryExtractDIEsIfNeeded(bool CUDieOnly);
/// clearDIEs - Clear parsed DIEs to keep memory usage low.
void clearDIEs(bool KeepCUDie, bool KeepDWODies = false);
private:
/// Size in bytes of the .debug_info data associated with this compile unit.
size_t getDebugInfoSize() const {
@@ -581,9 +584,6 @@ private:
void extractDIEsToVector(bool AppendCUDie, bool AppendNonCUDIEs,
std::vector<DWARFDebugInfoEntry> &DIEs) const;
/// clearDIEs - Clear parsed DIEs to keep memory usage low.
void clearDIEs(bool KeepCUDie);
/// parseDWO - Parses .dwo file for current compile unit. Returns true if
/// it was actually constructed.
/// The \p AlternativeLocation specifies an alternative location to get

View File

@@ -622,7 +622,9 @@ public:
return getNormalTypeUnitMap();
}
Error doWorkThreadSafely(function_ref<Error()> Work) override {
return Work();
}
};
class ThreadSafeState : public ThreadUnsafeDWARFContextState {
@@ -738,6 +740,11 @@ public:
std::unique_lock<std::recursive_mutex> LockGuard(Mutex);
return ThreadUnsafeDWARFContextState::getTypeUnitMap(IsDWO);
}
Error doWorkThreadSafely(function_ref<Error()> Work) override {
std::unique_lock<std::recursive_mutex> LockGuard(Mutex);
return ThreadUnsafeDWARFContextState::doWorkThreadSafely(Work);
}
};
} // namespace

View File

@@ -496,108 +496,111 @@ void DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) {
}
Error DWARFUnit::tryExtractDIEsIfNeeded(bool CUDieOnly) {
if ((CUDieOnly && !DieArray.empty()) ||
DieArray.size() > 1)
return Error::success(); // Already parsed.
return Context.doWorkThreadSafely([&]() -> Error {
if ((CUDieOnly && !DieArray.empty()) || DieArray.size() > 1)
return Error::success(); // Already parsed.
bool HasCUDie = !DieArray.empty();
extractDIEsToVector(!HasCUDie, !CUDieOnly, DieArray);
bool HasCUDie = !DieArray.empty();
extractDIEsToVector(!HasCUDie, !CUDieOnly, DieArray);
if (DieArray.empty())
return Error::success();
if (DieArray.empty())
return Error::success();
// If CU DIE was just parsed, copy several attribute values from it.
if (HasCUDie)
return Error::success();
// If CU DIE was just parsed, copy several attribute values from it.
if (HasCUDie)
return Error::success();
DWARFDie UnitDie(this, &DieArray[0]);
if (std::optional<uint64_t> DWOId =
toUnsigned(UnitDie.find(DW_AT_GNU_dwo_id)))
Header.setDWOId(*DWOId);
if (!IsDWO) {
assert(AddrOffsetSectionBase == std::nullopt);
assert(RangeSectionBase == 0);
assert(LocSectionBase == 0);
AddrOffsetSectionBase = toSectionOffset(UnitDie.find(DW_AT_addr_base));
if (!AddrOffsetSectionBase)
AddrOffsetSectionBase =
toSectionOffset(UnitDie.find(DW_AT_GNU_addr_base));
RangeSectionBase = toSectionOffset(UnitDie.find(DW_AT_rnglists_base), 0);
LocSectionBase = toSectionOffset(UnitDie.find(DW_AT_loclists_base), 0);
}
DWARFDie UnitDie(this, &DieArray[0]);
if (std::optional<uint64_t> DWOId =
toUnsigned(UnitDie.find(DW_AT_GNU_dwo_id)))
Header.setDWOId(*DWOId);
if (!IsDWO) {
assert(AddrOffsetSectionBase == std::nullopt);
assert(RangeSectionBase == 0);
assert(LocSectionBase == 0);
AddrOffsetSectionBase = toSectionOffset(UnitDie.find(DW_AT_addr_base));
if (!AddrOffsetSectionBase)
AddrOffsetSectionBase =
toSectionOffset(UnitDie.find(DW_AT_GNU_addr_base));
RangeSectionBase = toSectionOffset(UnitDie.find(DW_AT_rnglists_base), 0);
LocSectionBase = toSectionOffset(UnitDie.find(DW_AT_loclists_base), 0);
}
// In general, in DWARF v5 and beyond we derive the start of the unit's
// contribution to the string offsets table from the unit DIE's
// DW_AT_str_offsets_base attribute. Split DWARF units do not use this
// attribute, so we assume that there is a contribution to the string
// offsets table starting at offset 0 of the debug_str_offsets.dwo section.
// In both cases we need to determine the format of the contribution,
// which may differ from the unit's format.
DWARFDataExtractor DA(Context.getDWARFObj(), StringOffsetSection,
IsLittleEndian, 0);
if (IsDWO || getVersion() >= 5) {
auto StringOffsetOrError =
IsDWO ? determineStringOffsetsTableContributionDWO(DA)
: determineStringOffsetsTableContribution(DA);
if (!StringOffsetOrError)
return createStringError(errc::invalid_argument,
"invalid reference to or invalid content in "
".debug_str_offsets[.dwo]: " +
toString(StringOffsetOrError.takeError()));
// In general, in DWARF v5 and beyond we derive the start of the unit's
// contribution to the string offsets table from the unit DIE's
// DW_AT_str_offsets_base attribute. Split DWARF units do not use this
// attribute, so we assume that there is a contribution to the string
// offsets table starting at offset 0 of the debug_str_offsets.dwo section.
// In both cases we need to determine the format of the contribution,
// which may differ from the unit's format.
DWARFDataExtractor DA(Context.getDWARFObj(), StringOffsetSection,
IsLittleEndian, 0);
if (IsDWO || getVersion() >= 5) {
auto StringOffsetOrError =
IsDWO ? determineStringOffsetsTableContributionDWO(DA)
: determineStringOffsetsTableContribution(DA);
if (!StringOffsetOrError) {
return createStringError(errc::invalid_argument,
"invalid reference to or invalid content in "
".debug_str_offsets[.dwo]: " +
toString(StringOffsetOrError.takeError()));
}
StringOffsetsTableContribution = *StringOffsetOrError;
}
StringOffsetsTableContribution = *StringOffsetOrError;
}
// DWARF v5 uses the .debug_rnglists and .debug_rnglists.dwo sections to
// describe address ranges.
if (getVersion() >= 5) {
// In case of DWP, the base offset from the index has to be added.
if (IsDWO) {
uint64_t ContributionBaseOffset = 0;
if (auto *IndexEntry = Header.getIndexEntry())
if (auto *Contrib = IndexEntry->getContribution(DW_SECT_RNGLISTS))
ContributionBaseOffset = Contrib->getOffset();
setRangesSection(
&Context.getDWARFObj().getRnglistsDWOSection(),
ContributionBaseOffset +
DWARFListTableHeader::getHeaderSize(Header.getFormat()));
} else
setRangesSection(&Context.getDWARFObj().getRnglistsSection(),
toSectionOffset(UnitDie.find(DW_AT_rnglists_base),
DWARFListTableHeader::getHeaderSize(
Header.getFormat())));
}
// DWARF v5 uses the .debug_rnglists and .debug_rnglists.dwo sections to
// describe address ranges.
if (getVersion() >= 5) {
// In case of DWP, the base offset from the index has to be added.
if (IsDWO) {
uint64_t ContributionBaseOffset = 0;
// If we are reading a package file, we need to adjust the location list
// data based on the index entries.
StringRef Data = Header.getVersion() >= 5
? Context.getDWARFObj().getLoclistsDWOSection().Data
: Context.getDWARFObj().getLocDWOSection().Data;
if (auto *IndexEntry = Header.getIndexEntry())
if (auto *Contrib = IndexEntry->getContribution(DW_SECT_RNGLISTS))
ContributionBaseOffset = Contrib->getOffset();
setRangesSection(
&Context.getDWARFObj().getRnglistsDWOSection(),
ContributionBaseOffset +
DWARFListTableHeader::getHeaderSize(Header.getFormat()));
} else
setRangesSection(&Context.getDWARFObj().getRnglistsSection(),
toSectionOffset(UnitDie.find(DW_AT_rnglists_base),
DWARFListTableHeader::getHeaderSize(
Header.getFormat())));
}
if (const auto *C = IndexEntry->getContribution(
Header.getVersion() >= 5 ? DW_SECT_LOCLISTS : DW_SECT_EXT_LOC))
Data = Data.substr(C->getOffset(), C->getLength());
if (IsDWO) {
// If we are reading a package file, we need to adjust the location list
// data based on the index entries.
StringRef Data = Header.getVersion() >= 5
? Context.getDWARFObj().getLoclistsDWOSection().Data
: Context.getDWARFObj().getLocDWOSection().Data;
if (auto *IndexEntry = Header.getIndexEntry())
if (const auto *C = IndexEntry->getContribution(
Header.getVersion() >= 5 ? DW_SECT_LOCLISTS : DW_SECT_EXT_LOC))
Data = Data.substr(C->getOffset(), C->getLength());
DWARFDataExtractor DWARFData(Data, IsLittleEndian, getAddressByteSize());
LocTable =
std::make_unique<DWARFDebugLoclists>(DWARFData, Header.getVersion());
LocSectionBase = DWARFListTableHeader::getHeaderSize(Header.getFormat());
} else if (getVersion() >= 5) {
LocTable = std::make_unique<DWARFDebugLoclists>(
DWARFDataExtractor(Context.getDWARFObj(),
Context.getDWARFObj().getLoclistsSection(),
IsLittleEndian, getAddressByteSize()),
getVersion());
} else {
LocTable = std::make_unique<DWARFDebugLoc>(DWARFDataExtractor(
Context.getDWARFObj(), Context.getDWARFObj().getLocSection(),
IsLittleEndian, getAddressByteSize()));
}
DWARFDataExtractor DWARFData(Data, IsLittleEndian, getAddressByteSize());
LocTable =
std::make_unique<DWARFDebugLoclists>(DWARFData, Header.getVersion());
LocSectionBase = DWARFListTableHeader::getHeaderSize(Header.getFormat());
} else if (getVersion() >= 5) {
LocTable = std::make_unique<DWARFDebugLoclists>(
DWARFDataExtractor(Context.getDWARFObj(),
Context.getDWARFObj().getLoclistsSection(),
IsLittleEndian, getAddressByteSize()),
getVersion());
} else {
LocTable = std::make_unique<DWARFDebugLoc>(DWARFDataExtractor(
Context.getDWARFObj(), Context.getDWARFObj().getLocSection(),
IsLittleEndian, getAddressByteSize()));
}
// Don't fall back to DW_AT_GNU_ranges_base: it should be ignored for
// skeleton CU DIE, so that DWARF users not aware of it are not broken.
// Don't fall back to DW_AT_GNU_ranges_base: it should be ignored for
// skeleton CU DIE, so that DWARF users not aware of it are not broken.
return Error::success();
return Error::success();
});
}
bool DWARFUnit::parseDWO(StringRef DWOAlternativeLocation) {
@@ -652,15 +655,21 @@ bool DWARFUnit::parseDWO(StringRef DWOAlternativeLocation) {
return true;
}
void DWARFUnit::clearDIEs(bool KeepCUDie) {
// Do not use resize() + shrink_to_fit() to free memory occupied by dies.
// shrink_to_fit() is a *non-binding* request to reduce capacity() to size().
// It depends on the implementation whether the request is fulfilled.
// Create a new vector with a small capacity and assign it to the DieArray to
// have previous contents freed.
DieArray = (KeepCUDie && !DieArray.empty())
? std::vector<DWARFDebugInfoEntry>({DieArray[0]})
: std::vector<DWARFDebugInfoEntry>();
void DWARFUnit::clearDIEs(bool KeepCUDie, bool KeepDWODies) {
assert(!Context.doWorkThreadSafely([&] {
if (!KeepDWODies && DWO) {
DWO->clearDIEs(KeepCUDie, KeepDWODies);
}
// Do not use resize() + shrink_to_fit() to free memory occupied by dies.
// shrink_to_fit() is a *non-binding* request to reduce capacity() to
// size(). It depends on the implementation whether the request is
// fulfilled. Create a new vector with a small capacity and assign it to the
// DieArray to have previous contents freed.
DieArray = (KeepCUDie && !DieArray.empty())
? std::vector<DWARFDebugInfoEntry>({DieArray[0]})
: std::vector<DWARFDebugInfoEntry>();
return Error::success();
}));
}
Expected<DWARFAddressRangesVector>

View File

@@ -656,6 +656,11 @@ Error DwarfTransformer::convert(uint32_t NumThreads, OutputAggregator &Out) {
DWARFDie Die = getDie(*CU);
CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get()));
handleDie(Out, CUI, Die);
// Release the line table, once we're done.
DICtx.clearLineTableForUnit(CU.get());
// Free any DIEs that were allocated by the DWARF parser.
// If/when they're needed by other CU's, they'll be recreated.
CU->clearDIEs(/*KeepCUDie=*/false, /*KeepDWODIEs=*/false);
}
} else {
// LLVM Dwarf parser is not thread-safe and we need to parse all DWARF up
@@ -668,12 +673,7 @@ Error DwarfTransformer::convert(uint32_t NumThreads, OutputAggregator &Out) {
for (const auto &CU : DICtx.compile_units())
CU->getAbbreviations();
// Now parse all DIEs in case we have cross compile unit references in a
// thread pool.
DefaultThreadPool pool(hardware_concurrency(NumThreads));
for (const auto &CU : DICtx.compile_units())
pool.async([&CU]() { CU->getUnitDIE(false /*CUDieOnly*/); });
pool.wait();
// Now convert all DWARF to GSYM in a thread pool.
std::mutex LogMutex;
@@ -681,11 +681,15 @@ Error DwarfTransformer::convert(uint32_t NumThreads, OutputAggregator &Out) {
DWARFDie Die = getDie(*CU);
if (Die) {
CUInfo CUI(DICtx, dyn_cast<DWARFCompileUnit>(CU.get()));
pool.async([this, CUI, &LogMutex, &Out, Die]() mutable {
pool.async([this, CUI, &CU, &LogMutex, &Out, Die]() mutable {
std::string storage;
raw_string_ostream StrStream(storage);
OutputAggregator ThreadOut(Out.GetOS() ? &StrStream : nullptr);
handleDie(ThreadOut, CUI, Die);
DICtx.clearLineTableForUnit(CU.get());
// Free any DIEs that were allocated by the DWARF parser.
// If/when they're needed by other CU's, they'll be recreated.
CU->clearDIEs(/*KeepCUDie=*/false, /*KeepDWODIEs=*/false);
// Print ThreadLogStorage lines into an actual stream under a lock
std::lock_guard<std::mutex> guard(LogMutex);
if (Out.GetOS()) {
@@ -697,6 +701,9 @@ Error DwarfTransformer::convert(uint32_t NumThreads, OutputAggregator &Out) {
}
pool.wait();
}
// Now get rid of all the DIEs that may have been recreated
for (const auto &CU : DICtx.compile_units())
CU->clearDIEs(/*KeepCUDie=*/false, /*KeepDWODIEs=*/false);
size_t FunctionsAddedCount = Gsym.getNumFunctionInfos() - NumBefore;
Out << "Loaded " << FunctionsAddedCount << " functions from DWARF.\n";
return Error::success();