From 4116dd5c9ec8349e2cf37999a16aee9f0265ca60 Mon Sep 17 00:00:00 2001 From: "Chodor, Jaroslaw" Date: Mon, 20 May 2024 19:39:24 +0000 Subject: [PATCH] feature: Improving elf rewriter - preserving strings Original string section needs to be preserved so that symbol table does not get broken. Related-To: NEO-10190 Signed-off-by: Chodor, Jaroslaw --- .../device_binary_format/elf/elf_encoder.h | 30 +++++++++++++ .../device_binary_format/elf/elf_rewriter.h | 43 +++++++++++-------- .../elf/elf_encoder_tests.cpp | 25 ++++++++++- .../elf/elf_rewriter_tests.cpp | 38 +++++++++++++--- 4 files changed, 109 insertions(+), 27 deletions(-) diff --git a/shared/source/device_binary_format/elf/elf_encoder.h b/shared/source/device_binary_format/elf/elf_encoder.h index 77fe04ace1..3a5e51266f 100644 --- a/shared/source/device_binary_format/elf/elf_encoder.h +++ b/shared/source/device_binary_format/elf/elf_encoder.h @@ -13,6 +13,7 @@ #include #include +#include #include namespace NEO { @@ -25,15 +26,39 @@ struct StringSectionBuilder { undefStringIdx = 0U; } + void setInitialStringsTab(ArrayRef data) { + DEBUG_BREAK_IF(stringTable.size() > 1); + stringTable.assign(reinterpret_cast(data.begin()), reinterpret_cast(data.end())); + if (stringTable.size() < 1) { + stringTable.push_back('\0'); + } + if (*stringTable.rbegin() != '\0') { + stringTable.push_back('\0'); + } + auto it = stringTable.begin() + 1; + while (it != stringTable.end()) { + stringOffsetsMap[std::string(&*it)] = static_cast(it - stringTable.begin()); + while (*it != '\0') { + ++it; + } + ++it; + } + } + uint32_t appendString(ConstStringRef str) { if (str.empty()) { return undefStringIdx; } + auto existingEntry = stringOffsetsMap.find(str.str()); + if (stringOffsetsMap.end() != existingEntry) { + return existingEntry->second; + } uint32_t offset = static_cast(stringTable.size()); stringTable.insert(stringTable.end(), str.begin(), str.end()); if (str[str.size() - 1] != '\0') { stringTable.push_back('\0'); } + stringOffsetsMap[str.str()] = offset; return offset; } @@ -47,6 +72,7 @@ struct StringSectionBuilder { protected: std::vector stringTable; + std::unordered_map stringOffsetsMap; uint32_t undefStringIdx; }; @@ -55,6 +81,10 @@ struct ElfEncoder { ElfEncoder(bool addUndefSectionHeader = true, bool addHeaderSectionNamesSection = true, typename ElfSectionHeaderTypes::AddrAlign defaultDataAlignment = 8U); + void setInitialStringsTab(ArrayRef data) { + strSecBuilder.setInitialStringsTab(data); + } + ElfSectionHeader &appendSection(const ElfSectionHeader §ionHeader, const ArrayRef sectionData); ElfProgramHeader &appendSegment(const ElfProgramHeader &programHeader, const ArrayRef segmentData); diff --git a/shared/source/device_binary_format/elf/elf_rewriter.h b/shared/source/device_binary_format/elf/elf_rewriter.h index 14b4a2af91..975ca27200 100644 --- a/shared/source/device_binary_format/elf/elf_rewriter.h +++ b/shared/source/device_binary_format/elf/elf_rewriter.h @@ -22,37 +22,37 @@ namespace NEO { namespace Elf { -template +template struct MutableSectionHeader { - MutableSectionHeader(const std::string &name, const NEO::Elf::ElfSectionHeader &header, const std::vector &data) + MutableSectionHeader(const std::string &name, const NEO::Elf::ElfSectionHeader &header, const std::vector &data) : name(name), header(header), data(data) { } std::string name; - NEO::Elf::ElfSectionHeader header{}; + NEO::Elf::ElfSectionHeader header{}; std::vector data; }; -template +template struct MutableProgramHeader { - MutableProgramHeader(const NEO::Elf::ElfProgramHeader &header, const std::vector &data) + MutableProgramHeader(const NEO::Elf::ElfProgramHeader &header, const std::vector &data) : header(header), data(data) { } - NEO::Elf::ElfProgramHeader header = {}; + NEO::Elf::ElfProgramHeader header = {}; std::vector data; - MutableSectionHeader *referencedSectionData = nullptr; + MutableSectionHeader *referencedSectionData = nullptr; }; -template +template struct ElfRewriter { using SectionId = uint32_t; - ElfRewriter(NEO::Elf::Elf &src) { + ElfRewriter(NEO::Elf::Elf &src) { elfFileHeader = *src.elfFileHeader; for (const auto &sh : src.sectionHeaders) { - this->sectionHeaders.push_back(std::make_unique>(src.getName(sh.header->name), *sh.header, std::vector{sh.data.begin(), sh.data.end()})); + this->sectionHeaders.push_back(std::make_unique>(src.getName(sh.header->name), *sh.header, std::vector{sh.data.begin(), sh.data.end()})); } for (const auto &ph : src.programHeaders) { - this->programHeaders.push_back(std::make_unique>(*ph.header, std::vector{ph.data.begin(), ph.data.end()})); + this->programHeaders.push_back(std::make_unique>(*ph.header, std::vector{ph.data.begin(), ph.data.end()})); for (const auto &sh : this->sectionHeaders) { if ((sh->header.offset == ph.header->offset) && (sh->header.size == ph.header->fileSz)) { (*this->programHeaders.rbegin())->referencedSectionData = sh.get(); @@ -62,15 +62,20 @@ struct ElfRewriter { } std::vector encode() const { - NEO::Elf::ElfEncoder encoder; + NEO::Elf::ElfEncoder encoder{}; + for (const auto &sh : this->sectionHeaders) { + if (sh->header.type == SHT_STRTAB) { + encoder.setInitialStringsTab(sh->data); + } + } encoder.getElfFileHeader() = elfFileHeader; - std::unordered_map *, decltype(NEO::Elf::ElfSectionHeader::offset)> encodedSectionsOffsets; + std::unordered_map *, decltype(NEO::Elf::ElfSectionHeader::offset)> encodedSectionsOffsets; for (const auto &sh : this->sectionHeaders) { if ((sh->header.type == SHT_NULL) || (sh->header.type == SHT_STRTAB)) { continue; } auto nameIdx = encoder.appendSectionName(sh->name); - NEO::Elf::ElfSectionHeader header = sh->header; + NEO::Elf::ElfSectionHeader header = sh->header; header.name = nameIdx; encodedSectionsOffsets[sh.get()] = encoder.appendSection(header, sh->data).offset; } @@ -86,7 +91,7 @@ struct ElfRewriter { return encoder.encode(); } - StackVec findSections(typename ElfSectionHeaderTypes::Type type, ConstStringRef name) { + StackVec findSections(typename ElfSectionHeaderTypes::Type type, ConstStringRef name) { StackVec ret; for (size_t i = 0; i < this->sectionHeaders.size(); i++) { auto §ion = this->sectionHeaders[i]; @@ -97,7 +102,7 @@ struct ElfRewriter { return ret; } - MutableSectionHeader &getSection(SectionId idx) { + MutableSectionHeader &getSection(SectionId idx) { return *sectionHeaders[idx]; } @@ -114,11 +119,11 @@ struct ElfRewriter { } } - ElfFileHeader elfFileHeader = {}; + ElfFileHeader elfFileHeader = {}; protected: - StackVec>, 32> sectionHeaders; - StackVec>, 32> programHeaders; + StackVec>, 32> sectionHeaders; + StackVec>, 32> programHeaders; }; } // namespace Elf diff --git a/shared/test/unit_test/device_binary_format/elf/elf_encoder_tests.cpp b/shared/test/unit_test/device_binary_format/elf/elf_encoder_tests.cpp index 383bca814c..79a2684a94 100644 --- a/shared/test/unit_test/device_binary_format/elf/elf_encoder_tests.cpp +++ b/shared/test/unit_test/device_binary_format/elf/elf_encoder_tests.cpp @@ -501,12 +501,33 @@ TEST(ElfEncoder, WhenGetSectionHeaderIndexIsCalledThenCorrectSectionIdxIsReturne EXPECT_EQ(1U, elfEncoder64.getSectionHeaderIndex(sec1)); } -TEST(DecodeElfNoteSection, givenZeroNotesToEncodeThenReturnsEmptyDataVector) { +TEST(ElfEncoder, givenEmptyArrayWhenSetInitialStringTabIsUsedThenResetsStringTabToEmpty) { + ElfEncoder elfEncoder; + elfEncoder.setInitialStringsTab({}); + EXPECT_EQ(0U, elfEncoder.appendSectionName({})); +} + +TEST(ElfEncoder, givenUnterminatedArrayWhenSetInitialStringTabIsUsedThenTerminateUnderlyingArray) { + ElfEncoder elfEncoder; + char data[2] = {'\0', 's'}; + elfEncoder.setInitialStringsTab(ArrayRef::fromAny(data, sizeof(data))); + EXPECT_EQ(1U, elfEncoder.appendSectionName("s")); +} + +TEST(ElfEncoder, givenArrayWhenSetInitialStringTabIsUsedThenIncorporateItToInternalStringTab) { + ElfEncoder elfEncoder; + char data[] = "\0string0\0string1"; + elfEncoder.setInitialStringsTab(ArrayRef::fromAny(data, sizeof(data))); + EXPECT_EQ(1U, elfEncoder.appendSectionName("string0")); + EXPECT_EQ(9U, elfEncoder.appendSectionName("string1")); +} + +TEST(EncodeElfNoteSection, givenZeroNotesToEncodeThenReturnsEmptyDataVector) { auto encoded = NEO::Elf::encodeNoteSectionData({}); EXPECT_TRUE(encoded.empty()); } -TEST(DecodeElfNoteSection, givenValidNotesToEncodeThenReturnsProperlyEncodedData) { +TEST(EncodeElfNoteSection, givenValidNotesToEncodeThenReturnsProperlyEncodedData) { std::string unalignedDescName = "note" "Type"; std::string unalignedDesc = "some" diff --git a/shared/test/unit_test/device_binary_format/elf/elf_rewriter_tests.cpp b/shared/test/unit_test/device_binary_format/elf/elf_rewriter_tests.cpp index fb42202134..296eb7c704 100644 --- a/shared/test/unit_test/device_binary_format/elf/elf_rewriter_tests.cpp +++ b/shared/test/unit_test/device_binary_format/elf/elf_rewriter_tests.cpp @@ -18,18 +18,18 @@ inline bool operator==(const ElfFileHeaderIdentity &lhs, const ElfFileHeaderIden return (lhs.abiVersion == rhs.abiVersion) && (lhs.data == rhs.data) && (lhs.eClass == rhs.eClass) && (lhs.osAbi == rhs.osAbi) && (lhs.version == rhs.version) && (lhs.magic[0] == rhs.magic[0]) && (lhs.magic[1] == rhs.magic[1]) && (lhs.magic[2] == rhs.magic[2]) && (lhs.magic[3] == rhs.magic[3]); } -template -inline bool operator==(const ElfFileHeader &lhs, const ElfFileHeader &rhs) { +template +inline bool operator==(const ElfFileHeader &lhs, const ElfFileHeader &rhs) { return (lhs.identity == rhs.identity) && (lhs.type == rhs.type) && (lhs.machine == rhs.machine) && (lhs.version == rhs.version) && (lhs.entry == rhs.entry) && (lhs.phOff == rhs.phOff) && (lhs.shOff == rhs.shOff) && (lhs.flags == rhs.flags) && (lhs.ehSize == rhs.ehSize) && (lhs.phEntSize == rhs.phEntSize) && (lhs.phNum == rhs.phNum) && (lhs.shEntSize == rhs.shEntSize) && (lhs.shNum == rhs.shNum) && (lhs.shStrNdx == rhs.shStrNdx); } -template -inline bool operator==(const SectionHeaderAndData &lhs, const SectionHeaderAndData &rhs) { +template +inline bool operator==(const SectionHeaderAndData &lhs, const SectionHeaderAndData &rhs) { return (lhs.header->name == rhs.header->name) && (lhs.header->type == rhs.header->type) && (lhs.header->flags == rhs.header->flags) && (lhs.header->addr == rhs.header->addr) && (lhs.header->offset == rhs.header->offset) && (lhs.header->size == rhs.header->size) && (lhs.header->link == rhs.header->link) && (lhs.header->info == rhs.header->info) && (lhs.header->addralign == rhs.header->addralign) && (lhs.header->entsize == rhs.header->entsize) && (lhs.data.size() == rhs.data.size()) && ((lhs.data.size() == 0) || (0 == memcmp(lhs.data.begin(), rhs.data.begin(), lhs.data.size()))); } -template -inline bool operator==(const ProgramHeaderAndData &lhs, const ProgramHeaderAndData &rhs) { +template +inline bool operator==(const ProgramHeaderAndData &lhs, const ProgramHeaderAndData &rhs) { return (lhs.header->type == rhs.header->type) && (lhs.header->offset == rhs.header->offset) && (lhs.header->vAddr == rhs.header->vAddr) && (lhs.header->pAddr == rhs.header->pAddr) && (lhs.header->fileSz == rhs.header->fileSz) && (lhs.header->memSz == rhs.header->memSz) && (lhs.header->flags == rhs.header->flags) && (lhs.header->align == rhs.header->align) && (lhs.data.size() == rhs.data.size()) && ((lhs.data.size() == 0) || (0 == memcmp(lhs.data.begin(), rhs.data.begin(), lhs.data.size()))); } @@ -122,3 +122,29 @@ TEST(ElfRewriterRemoveSection, GivenSectionIndexThenRemovesThatSection) { EXPECT_TRUE(err.empty()) << err; EXPECT_TRUE(warn.empty()) << warn; } + +TEST(ElfRewriter, GivenElfThenPreservesNamesOffsets) { + NEO::Elf::ElfEncoder<> encoder; + std::vector txtData; + txtData.resize(4096U, 7); + auto name0Idx = encoder.appendSectionName("name0"); + auto &txtSection = encoder.appendSection(NEO::Elf::SectionHeaderType::SHT_PROGBITS, ".txt", txtData); + auto name2Idx = encoder.appendSectionName("name2"); + encoder.appendProgramHeaderLoad(encoder.getSectionHeaderIndex(txtSection), 4096U, 4096U); + auto elfBinSrc = encoder.encode(); + + std::string err, warn; + auto decodedElfSrc = NEO::Elf::decodeElf(elfBinSrc, err, warn); + ASSERT_TRUE(err.empty()) << err; + ASSERT_TRUE(warn.empty()) << warn; + NEO::Elf::ElfRewriter<> rewriter{decodedElfSrc}; + auto elfBinOut = rewriter.encode(); + + ASSERT_EQ(elfBinSrc.size(), elfBinOut.size()); + auto decodedElfOut = NEO::Elf::decodeElf(elfBinOut, err, warn); + ASSERT_TRUE(err.empty()) << err; + ASSERT_TRUE(warn.empty()) << warn; + EXPECT_EQ(1U, rewriter.findSections(NEO::Elf::SectionHeaderType::SHT_PROGBITS, ".txt").size()); + EXPECT_EQ("name0", decodedElfOut.getName(name0Idx)); + EXPECT_EQ("name2", decodedElfOut.getName(name2Idx)); +} \ No newline at end of file