[PDB] One more fix for hasing GSI records.

The reference implementation uses a case-insensitive string
comparison for strings of equal length.  This will cause the
string "tEo" to compare less than "VUo".  However we were using
a case sensitive comparison, which would generate the opposite
outcome.  Switch to a case insensitive comparison.  Also, when
one of the strings contains non-ascii characters, fallback to
a straight memcmp.

The only way to really test this is with a DIA test.  Before this
patch, the test will fail (but succeed if link.exe is used instead
of lld-link).  After the patch, it succeeds even with lld-link.

llvm-svn: 336464
This commit is contained in:
Zachary Turner
2018-07-06 21:01:42 +00:00
parent 975c711358
commit 648bebdc67
6 changed files with 199 additions and 9 deletions

Binary file not shown.

View File

@@ -0,0 +1,81 @@
REQUIRES: diasdk
Input object file reconstruction:
; // foo.cpp
; void LJPwNRh() {}
; void HGfxvKdQO() {}
; void wuN() {}
; void tEo() {}
; void VUo() {}
; void teO() {}
; void bqSuLGQgWa() {}
; void SyJYcL() {}
; void OUV() {}
; void quH() {}
; void rbEaPKrlrRwk() {}
; void oet() {}
; void tuM() {}
; void LuU() {}
; void loxueqJLH() {}
; void QplRJuDs() {}
; void rWDokkLG() {}
; void sEH() {}
; void pui() {}
; void xoZvxw() {}
;
; int main(int argc, char **argv) {
; return 0;
; }
clang-cl /Z7 /GS- /GR- /c main.cpp /Foglobals-dia-func-collision3.obj
RUN: lld-link /debug /nodefaultlib /incremental:no /entry:main /out:%t.exe %S/Inputs/globals-dia-func-collision3.obj
RUN: llvm-pdbutil pretty -with-name=LuU -with-name=oet -with-name=OUV -with-name=pui \
RUN: -with-name=quH -with-name=sEH -with-name=teO -with-name=tEo \
RUN: -with-name=tuM -with-name=VUo -with-name=wuN -with-name=SyJYcL \
RUN: -with-name=xoZvxw -with-name=LJPwNRh -with-name=QplRJuDs -with-name=rWDokkLG \
RUN: -with-name=HGfxvKdQO -with-name=loxueqJLH -with-name=bqSuLGQgWa -with-name=rbEaPKrlrRwk \
RUN: %t.pdb | FileCheck %s
CHECK: [1 occurrences] - LuU
CHECK-NEXT: func [0x000010d0+ 0 - 0x000010d1- 1 | sizeof= 1] (FPO) void __cdecl LuU()
CHECK-NEXT: [1 occurrences] - oet
CHECK-NEXT: func [0x000010b0+ 0 - 0x000010b1- 1 | sizeof= 1] (FPO) void __cdecl oet()
CHECK-NEXT: [1 occurrences] - OUV
CHECK-NEXT: func [0x00001080+ 0 - 0x00001081- 1 | sizeof= 1] (FPO) void __cdecl OUV()
CHECK-NEXT: [1 occurrences] - pui
CHECK-NEXT: func [0x00001120+ 0 - 0x00001121- 1 | sizeof= 1] (FPO) void __cdecl pui()
CHECK-NEXT: [1 occurrences] - quH
CHECK-NEXT: func [0x00001090+ 0 - 0x00001091- 1 | sizeof= 1] (FPO) void __cdecl quH()
CHECK-NEXT: [1 occurrences] - sEH
CHECK-NEXT: func [0x00001110+ 0 - 0x00001111- 1 | sizeof= 1] (FPO) void __cdecl sEH()
CHECK-NEXT: [1 occurrences] - teO
CHECK-NEXT: func [0x00001050+ 0 - 0x00001051- 1 | sizeof= 1] (FPO) void __cdecl teO()
CHECK-NEXT: [1 occurrences] - tEo
CHECK-NEXT: func [0x00001030+ 0 - 0x00001031- 1 | sizeof= 1] (FPO) void __cdecl tEo()
CHECK-NEXT: [1 occurrences] - tuM
CHECK-NEXT: func [0x000010c0+ 0 - 0x000010c1- 1 | sizeof= 1] (FPO) void __cdecl tuM()
CHECK-NEXT: [1 occurrences] - VUo
CHECK-NEXT: func [0x00001040+ 0 - 0x00001041- 1 | sizeof= 1] (FPO) void __cdecl VUo()
CHECK-NEXT: [1 occurrences] - wuN
CHECK-NEXT: func [0x00001020+ 0 - 0x00001021- 1 | sizeof= 1] (FPO) void __cdecl wuN()
CHECK-NEXT: [1 occurrences] - SyJYcL
CHECK-NEXT: func [0x00001070+ 0 - 0x00001071- 1 | sizeof= 1] (FPO) void __cdecl SyJYcL()
CHECK-NEXT: [1 occurrences] - xoZvxw
CHECK-NEXT: func [0x00001130+ 0 - 0x00001131- 1 | sizeof= 1] (FPO) void __cdecl xoZvxw()
CHECK-NEXT: [1 occurrences] - LJPwNRh
CHECK-NEXT: func [0x00001000+ 0 - 0x00001001- 1 | sizeof= 1] (FPO) void __cdecl LJPwNRh()
CHECK-NEXT: [1 occurrences] - QplRJuDs
CHECK-NEXT: func [0x000010f0+ 0 - 0x000010f1- 1 | sizeof= 1] (FPO) void __cdecl QplRJuDs()
CHECK-NEXT: [1 occurrences] - rWDokkLG
CHECK-NEXT: func [0x00001100+ 0 - 0x00001101- 1 | sizeof= 1] (FPO) void __cdecl rWDokkLG()
CHECK-NEXT: [1 occurrences] - HGfxvKdQO
CHECK-NEXT: func [0x00001010+ 0 - 0x00001011- 1 | sizeof= 1] (FPO) void __cdecl HGfxvKdQO()
CHECK-NEXT: [1 occurrences] - loxueqJLH
CHECK-NEXT: func [0x000010e0+ 0 - 0x000010e1- 1 | sizeof= 1] (FPO) void __cdecl loxueqJLH()
CHECK-NEXT: [1 occurrences] - bqSuLGQgWa
CHECK-NEXT: func [0x00001060+ 0 - 0x00001061- 1 | sizeof= 1] (FPO) void __cdecl bqSuLGQgWa()
CHECK-NEXT: [1 occurrences] - rbEaPKrlrRwk
CHECK-NEXT: func [0x000010a0+ 0 - 0x000010a1- 1 | sizeof= 1] (FPO) void __cdecl rbEaPKrlrRwk()

View File

@@ -82,6 +82,26 @@ Error GSIHashStreamBuilder::commit(BinaryStreamWriter &Writer) {
return Error::success();
}
static bool isAsciiString(StringRef S) {
return llvm::all_of(S, [](char C) { return unsigned(C) < 0x80; });
}
// See `caseInsensitiveComparePchPchCchCch` in gsi.cpp
static bool gsiRecordLess(StringRef S1, StringRef S2) {
size_t LS = S1.size();
size_t RS = S2.size();
// Shorter strings always compare less than longer strings.
if (LS != RS)
return LS < RS;
// If either string contains non ascii characters, memcmp them.
if (LLVM_UNLIKELY(!isAsciiString(S1) || !isAsciiString(S2)))
return memcmp(S1.data(), S2.data(), LS) < 0;
// Both strings are ascii, use memicmp.
return memicmp(S1.data(), S2.data(), LS) < 0;
}
void GSIHashStreamBuilder::finalizeBuckets(uint32_t RecordZeroOffset) {
std::array<std::vector<std::pair<StringRef, PSHashRecord>>, IPHR_HASH + 1>
TmpBuckets;
@@ -118,17 +138,16 @@ void GSIHashStreamBuilder::finalizeBuckets(uint32_t RecordZeroOffset) {
ulittle32_t(HashRecords.size() * SizeOfHROffsetCalc);
HashBuckets.push_back(ChainStartOff);
// Sort each bucket by memcmp of the symbol's name.
// Sort each bucket by memcmp of the symbol's name. It's important that
// we use the same sorting algorithm as is used by the reference
// implementation to ensure that the search for a record within a bucket
// can properly early-out when it detects the record won't be found. The
// algorithm used here corredsponds to the function
// caseInsensitiveComparePchPchCchCch in the reference implementation.
std::sort(Bucket.begin(), Bucket.end(),
[](const std::pair<StringRef, PSHashRecord> &Left,
const std::pair<StringRef, PSHashRecord> &Right) {
size_t LS = Left.first.size();
size_t RS = Right.first.size();
if (LS < RS)
return true;
if (LS > RS)
return false;
return Left.first < Right.first;
return gsiRecordLess(Left.first, Right.first);
});
for (const auto &Entry : Bucket)

View File

@@ -35,7 +35,7 @@ void ExternalSymbolDumper::dump(const PDBSymbolPublicSymbol &Symbol) {
Printer.NewLine();
uint64_t Addr = Symbol.getVirtualAddress();
Printer << "[";
Printer << "public [";
WithColor(Printer, PDB_ColorItem::Address).get() << format_hex(Addr, 10);
Printer << "] ";
WithColor(Printer, PDB_ColorItem::Identifier).get() << LinkageName;

View File

@@ -20,10 +20,13 @@
#include "InputFile.h"
#include "LinePrinter.h"
#include "OutputStyle.h"
#include "PrettyClassDefinitionDumper.h"
#include "PrettyCompilandDumper.h"
#include "PrettyEnumDumper.h"
#include "PrettyExternalSymbolDumper.h"
#include "PrettyFunctionDumper.h"
#include "PrettyTypeDumper.h"
#include "PrettyTypedefDumper.h"
#include "PrettyVariableDumper.h"
#include "YAMLOutputStyle.h"
@@ -65,7 +68,11 @@
#include "llvm/DebugInfo/PDB/PDBSymbolData.h"
#include "llvm/DebugInfo/PDB/PDBSymbolExe.h"
#include "llvm/DebugInfo/PDB/PDBSymbolFunc.h"
#include "llvm/DebugInfo/PDB/PDBSymbolPublicSymbol.h"
#include "llvm/DebugInfo/PDB/PDBSymbolThunk.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeEnum.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeTypedef.h"
#include "llvm/DebugInfo/PDB/PDBSymbolTypeUDT.h"
#include "llvm/Support/BinaryByteStream.h"
#include "llvm/Support/COM.h"
#include "llvm/Support/CommandLine.h"
@@ -161,6 +168,11 @@ cl::opt<bool> ShowInjectedSourceContent(
cl::desc("When displaying an injected source, display the file content"),
cl::cat(OtherOptions), cl::sub(PrettySubcommand));
cl::list<std::string> WithName(
"with-name",
cl::desc("Display any symbol or type with the specified exact name"),
cl::cat(TypeCategory), cl::ZeroOrMore, cl::sub(PrettySubcommand));
cl::opt<bool> Compilands("compilands", cl::desc("Display compilands"),
cl::cat(TypeCategory), cl::sub(PrettySubcommand));
cl::opt<bool> Symbols("module-syms",
@@ -963,6 +975,82 @@ static void dumpPretty(StringRef Path) {
outs() << "HasPrivateSymbols ";
Printer.Unindent();
if (!opts::pretty::WithName.empty()) {
Printer.NewLine();
WithColor(Printer, PDB_ColorItem::SectionHeader).get()
<< "---SYMBOLS & TYPES BY NAME---";
for (StringRef Name : opts::pretty::WithName) {
auto Symbols = GlobalScope->findChildren(
PDB_SymType::None, Name, PDB_NameSearchFlags::NS_CaseSensitive);
if (!Symbols || Symbols->getChildCount() == 0) {
Printer.formatLine("[not found] - {0}", Name);
continue;
}
Printer.formatLine("[{0} occurrences] - {1}", Symbols->getChildCount(),
Name);
AutoIndent Indent(Printer);
Printer.NewLine();
while (auto Symbol = Symbols->getNext()) {
switch (Symbol->getSymTag()) {
case PDB_SymType::Typedef: {
TypedefDumper TD(Printer);
std::unique_ptr<PDBSymbolTypeTypedef> T =
llvm::unique_dyn_cast<PDBSymbolTypeTypedef>(std::move(Symbol));
TD.start(*T);
break;
}
case PDB_SymType::Enum: {
EnumDumper ED(Printer);
std::unique_ptr<PDBSymbolTypeEnum> E =
llvm::unique_dyn_cast<PDBSymbolTypeEnum>(std::move(Symbol));
ED.start(*E);
break;
}
case PDB_SymType::UDT: {
ClassDefinitionDumper CD(Printer);
std::unique_ptr<PDBSymbolTypeUDT> C =
llvm::unique_dyn_cast<PDBSymbolTypeUDT>(std::move(Symbol));
CD.start(*C);
break;
}
case PDB_SymType::BaseClass:
case PDB_SymType::Friend: {
TypeDumper TD(Printer);
Symbol->dump(TD);
break;
}
case PDB_SymType::Function: {
FunctionDumper FD(Printer);
std::unique_ptr<PDBSymbolFunc> F =
llvm::unique_dyn_cast<PDBSymbolFunc>(std::move(Symbol));
FD.start(*F, FunctionDumper::PointerType::None);
break;
}
case PDB_SymType::Data: {
VariableDumper VD(Printer);
std::unique_ptr<PDBSymbolData> D =
llvm::unique_dyn_cast<PDBSymbolData>(std::move(Symbol));
VD.start(*D);
break;
}
case PDB_SymType::PublicSymbol: {
ExternalSymbolDumper ED(Printer);
std::unique_ptr<PDBSymbolPublicSymbol> PS =
llvm::unique_dyn_cast<PDBSymbolPublicSymbol>(std::move(Symbol));
ED.dump(*PS);
break;
}
default:
llvm_unreachable("Unexpected symbol tag!");
}
}
}
llvm::outs().flush();
}
if (opts::pretty::Compilands) {
Printer.NewLine();
WithColor(Printer, PDB_ColorItem::SectionHeader).get()

View File

@@ -75,6 +75,8 @@ bool compareFunctionSymbols(
bool compareDataSymbols(const std::unique_ptr<llvm::pdb::PDBSymbolData> &F1,
const std::unique_ptr<llvm::pdb::PDBSymbolData> &F2);
extern llvm::cl::list<std::string> WithName;
extern llvm::cl::opt<bool> Compilands;
extern llvm::cl::opt<bool> Symbols;
extern llvm::cl::opt<bool> Globals;