Linker: Add support for relocation type PerThreadPayloadOffset

Signed-off-by: Kacper Nowak <kacper.nowak@intel.com>
This commit is contained in:
Kacper Nowak
2022-08-16 15:17:10 +00:00
committed by Compute-Runtime-Automation
parent 5e58104f5a
commit 44d218e52e
3 changed files with 52 additions and 21 deletions

View File

@@ -318,7 +318,7 @@ void Linker::patchAddress(void *relocAddress, const uint64_t value, const Linker
}
}
void Linker::patchInstructionsSegments(const std::vector<PatchableSegment> &instructionsSegments, std::vector<UnresolvedExternal> &outUnresolvedExternals) {
void Linker::patchInstructionsSegments(const std::vector<PatchableSegment> &instructionsSegments, std::vector<UnresolvedExternal> &outUnresolvedExternals, const KernelDescriptorsT &kernelDescriptors) {
if (false == data.getTraits().requiresPatchingOfInstructionSegments) {
return;
}
@@ -330,11 +330,15 @@ void Linker::patchInstructionsSegments(const std::vector<PatchableSegment> &inst
auto &thisSegmentRelocs = *relocsIt;
const PatchableSegment &instSeg = *segIt;
for (const auto &relocation : thisSegmentRelocs) {
if (shouldIgnoreRelocation(relocation)) {
continue;
}
UNRECOVERABLE_IF(nullptr == instSeg.hostPointer);
bool invalidOffset = relocation.offset + addressSizeInBytes(relocation.type) > instSeg.segmentSize;
DEBUG_BREAK_IF(invalidOffset);
auto relocAddress = ptrOffset(instSeg.hostPointer, static_cast<uintptr_t>(relocation.offset));
if (relocation.type == LinkerInput::RelocationInfo::Type::PerThreadPayloadOffset) {
*reinterpret_cast<uint32_t *>(relocAddress) = kernelDescriptors.at(segId)->kernelAttributes.crossThreadDataSize;
continue;
};
if (relocation.symbolName == implicitArgsRelocationSymbolName) {
if (pImplicitArgsRelocationAddresses.find(segId) == pImplicitArgsRelocationAddresses.end()) {
pImplicitArgsRelocationAddresses.insert({segId, {}});
@@ -343,17 +347,12 @@ void Linker::patchInstructionsSegments(const std::vector<PatchableSegment> &inst
continue;
}
auto symbolIt = relocatedSymbols.find(relocation.symbolName);
bool invalidOffset = relocation.offset + addressSizeInBytes(relocation.type) > instSeg.segmentSize;
bool unresolvedExternal = (symbolIt == relocatedSymbols.end());
DEBUG_BREAK_IF(invalidOffset);
if (invalidOffset || unresolvedExternal) {
uint32_t segId = static_cast<uint32_t>(segIt - instructionsSegments.begin());
outUnresolvedExternals.push_back(UnresolvedExternal{relocation, segId, invalidOffset});
continue;
}
uint64_t patchValue = symbolIt->second.gpuAddress + relocation.addend;
patchAddress(relocAddress, patchValue, relocation);
}

View File

@@ -211,7 +211,7 @@ struct Linker {
if (!success) {
return LinkingStatus::Error;
}
patchInstructionsSegments(instructionsSegments, outUnresolvedExternals);
patchInstructionsSegments(instructionsSegments, outUnresolvedExternals, kernelDescriptors);
patchDataSegments(globalVariablesSegInfo, globalConstantsSegInfo, globalVariablesSeg, globalConstantsSeg,
outUnresolvedExternals, pDevice, constantsInitData, variablesInitData);
resolveImplicitArgs(kernelDescriptors, pDevice);
@@ -241,7 +241,7 @@ struct Linker {
bool processRelocations(const SegmentInfo &globalVariables, const SegmentInfo &globalConstants, const SegmentInfo &exportedFunctions, const SegmentInfo &globalStrings);
void patchInstructionsSegments(const std::vector<PatchableSegment> &instructionsSegments, std::vector<UnresolvedExternal> &outUnresolvedExternals);
void patchInstructionsSegments(const std::vector<PatchableSegment> &instructionsSegments, std::vector<UnresolvedExternal> &outUnresolvedExternals, const KernelDescriptorsT &kernelDescriptors);
void patchDataSegments(const SegmentInfo &globalVariablesSegInfo, const SegmentInfo &globalConstantsSegInfo,
GraphicsAllocation *globalVariablesSeg, GraphicsAllocation *globalConstantsSeg,
@@ -260,9 +260,6 @@ struct Linker {
std::string constructLinkerErrorMessage(const Linker::UnresolvedExternals &unresolvedExternals, const std::vector<std::string> &instructionsSegmentsNames);
std::string constructRelocationsDebugMessage(const Linker::RelocatedSymbolsMap &relocatedSymbols);
constexpr bool shouldIgnoreRelocation(const LinkerInput::RelocationInfo &relocation) {
return LinkerInput::RelocationInfo::Type::PerThreadPayloadOffset == relocation.type;
}
inline bool isDataSegment(const SegmentType &segment) {
return segment == SegmentType::GlobalConstants || segment == SegmentType::GlobalVariables;
}

View File

@@ -1252,12 +1252,12 @@ TEST(LinkerTests, givenValidSymbolsAndRelocationsThenInstructionSegmentsArePrope
relocCPartLow.r_offset = 36;
relocCPartLow.r_type = vISA::GenRelocType::R_SYM_ADDR_32;
vISA::GenRelocEntry relocIgnore = {};
relocIgnore.r_symbol[0] = 'X';
relocIgnore.r_offset = 36;
relocIgnore.r_type = vISA::GenRelocType::R_PER_THREAD_PAYLOAD_OFFSET_32;
vISA::GenRelocEntry relocPerThreadPayloadOffset = {};
relocPerThreadPayloadOffset.r_symbol[0] = 'X';
relocPerThreadPayloadOffset.r_offset = 44;
relocPerThreadPayloadOffset.r_type = vISA::GenRelocType::R_PER_THREAD_PAYLOAD_OFFSET_32;
vISA::GenRelocEntry relocs[] = {relocA, relocB, relocC, relocCPartHigh, relocCPartLow, relocIgnore};
vISA::GenRelocEntry relocs[] = {relocA, relocB, relocC, relocCPartHigh, relocCPartLow, relocPerThreadPayloadOffset};
constexpr uint32_t numRelocations = sizeof(relocs) / sizeof(relocs[0]);
bool decodeRelocSuccess = linkerInput.decodeRelocationTable(&relocs, numRelocations, 0);
EXPECT_TRUE(decodeRelocSuccess);
@@ -1284,6 +1284,10 @@ TEST(LinkerTests, givenValidSymbolsAndRelocationsThenInstructionSegmentsArePrope
NEO::Linker::KernelDescriptorsT kernelDescriptors;
NEO::Linker::ExternalFunctionsT externalFunctions;
KernelDescriptor kd;
kd.kernelAttributes.crossThreadDataSize = 0x20;
kernelDescriptors.push_back(&kd);
auto linkResult = linker.link(
globalVarSegment, globalConstSegment, exportedFuncSegment, {},
patchableGlobalVarSeg, patchableConstVarSeg, patchableInstructionSegments, unresolvedExternals,
@@ -1295,7 +1299,7 @@ TEST(LinkerTests, givenValidSymbolsAndRelocationsThenInstructionSegmentsArePrope
ASSERT_EQ(1U, relocatedSymbols.count(symGlobalVariable.s_name));
ASSERT_EQ(1U, relocatedSymbols.count(symGlobalConstant.s_name));
ASSERT_EQ(1U, relocatedSymbols.count(symGlobalVariable.s_name));
ASSERT_EQ(1U, relocatedSymbols.count(symExportedFunc.s_name));
EXPECT_EQ(relocatedSymbols[symGlobalVariable.s_name].gpuAddress, globalVarSegment.gpuAddress + symGlobalVariable.s_offset);
EXPECT_EQ(relocatedSymbols[symGlobalConstant.s_name].gpuAddress, globalConstSegment.gpuAddress + symGlobalConstant.s_offset);
@@ -1315,6 +1319,9 @@ TEST(LinkerTests, givenValidSymbolsAndRelocationsThenInstructionSegmentsArePrope
EXPECT_EQ(funcAddressHigh, *reinterpret_cast<const uint32_t *>(instructionSegment.data() + relocCPartHigh.r_offset));
EXPECT_EQ(initData, *reinterpret_cast<const uint32_t *>(instructionSegment.data() + relocCPartHigh.r_offset - sizeof(uint32_t)));
EXPECT_EQ(initData, *reinterpret_cast<const uint32_t *>(instructionSegment.data() + relocCPartHigh.r_offset + sizeof(uint32_t)));
auto perThreadPayloadOffsetPatchedValue = *reinterpret_cast<uint32_t *>(instructionSegment.data() + relocPerThreadPayloadOffset.r_offset);
EXPECT_EQ(kd.kernelAttributes.crossThreadDataSize, perThreadPayloadOffsetPatchedValue);
}
TEST(LinkerTests, givenInvalidSymbolOffsetWhenPatchingInstructionsThenRelocationFails) {
@@ -2453,7 +2460,8 @@ TEST(LinkerTests, givenRelaWhenPatchingInstructionsSegmentThenAddendIsAdded) {
segmentToPatch.segmentSize = sizeof(segmentData);
NEO::Linker::UnresolvedExternals unresolvedExternals;
linker.patchInstructionsSegments({segmentToPatch}, unresolvedExternals);
NEO::Linker::KernelDescriptorsT kernelDescriptors;
linker.patchInstructionsSegments({segmentToPatch}, unresolvedExternals, kernelDescriptors);
EXPECT_EQ(static_cast<uint64_t>(rela.addend + symValue), segmentData);
}
@@ -2484,3 +2492,30 @@ TEST(LinkerTests, givenRelaWhenPatchingDataSegmentThenAddendIsAdded) {
linker.patchDataSegments({}, globalConstantsSegmentInfo, {}, &globalConstantsPatchableSegment, unresolvedExternals, device.get(), &globalConstantSegmentData, nullptr);
EXPECT_EQ(static_cast<uint64_t>(rela.addend + symValue), globalConstantSegmentData);
}
TEST(LinkerTests, givenPerThreadPayloadOffsetRelocationWhenPatchingInstructionSegmentsThenPatchItWithCTDSize) {
WhiteBox<NEO::LinkerInput> linkerInput;
linkerInput.traits.requiresPatchingOfInstructionSegments = true;
NEO::LinkerInput::RelocationInfo rel;
rel.offset = 0x4;
rel.type = NEO::LinkerInput::RelocationInfo::Type::PerThreadPayloadOffset;
rel.relocationSegment = NEO::SegmentType::Instructions;
linkerInput.textRelocations.push_back({rel});
NEO::Linker::KernelDescriptorsT kernelDescriptors;
KernelDescriptor kd;
kd.kernelAttributes.crossThreadDataSize = 0x20;
kernelDescriptors.push_back(&kd);
WhiteBox<NEO::Linker> linker(linkerInput);
uint64_t segmentData{0};
NEO::Linker::PatchableSegment segmentToPatch;
segmentToPatch.hostPointer = reinterpret_cast<void *>(&segmentData);
segmentToPatch.segmentSize = sizeof(segmentData);
NEO::Linker::UnresolvedExternals unresolvedExternals;
linker.patchInstructionsSegments({segmentToPatch}, unresolvedExternals, kernelDescriptors);
auto perThreadPayloadOffsetPatchedValue = reinterpret_cast<uint32_t *>(ptrOffset(segmentToPatch.hostPointer, static_cast<size_t>(rel.offset)));
EXPECT_EQ(kd.kernelAttributes.crossThreadDataSize, static_cast<uint32_t>(*perThreadPayloadOffsetPatchedValue));
}