mirror of
https://github.com/intel/compute-runtime.git
synced 2025-11-15 10:14:56 +08:00
Linker: Add support for relocation type PerThreadPayloadOffset
Signed-off-by: Kacper Nowak <kacper.nowak@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
5e58104f5a
commit
44d218e52e
@@ -318,7 +318,7 @@ void Linker::patchAddress(void *relocAddress, const uint64_t value, const Linker
|
||||
}
|
||||
}
|
||||
|
||||
void Linker::patchInstructionsSegments(const std::vector<PatchableSegment> &instructionsSegments, std::vector<UnresolvedExternal> &outUnresolvedExternals) {
|
||||
void Linker::patchInstructionsSegments(const std::vector<PatchableSegment> &instructionsSegments, std::vector<UnresolvedExternal> &outUnresolvedExternals, const KernelDescriptorsT &kernelDescriptors) {
|
||||
if (false == data.getTraits().requiresPatchingOfInstructionSegments) {
|
||||
return;
|
||||
}
|
||||
@@ -330,11 +330,15 @@ void Linker::patchInstructionsSegments(const std::vector<PatchableSegment> &inst
|
||||
auto &thisSegmentRelocs = *relocsIt;
|
||||
const PatchableSegment &instSeg = *segIt;
|
||||
for (const auto &relocation : thisSegmentRelocs) {
|
||||
if (shouldIgnoreRelocation(relocation)) {
|
||||
continue;
|
||||
}
|
||||
UNRECOVERABLE_IF(nullptr == instSeg.hostPointer);
|
||||
bool invalidOffset = relocation.offset + addressSizeInBytes(relocation.type) > instSeg.segmentSize;
|
||||
DEBUG_BREAK_IF(invalidOffset);
|
||||
|
||||
auto relocAddress = ptrOffset(instSeg.hostPointer, static_cast<uintptr_t>(relocation.offset));
|
||||
if (relocation.type == LinkerInput::RelocationInfo::Type::PerThreadPayloadOffset) {
|
||||
*reinterpret_cast<uint32_t *>(relocAddress) = kernelDescriptors.at(segId)->kernelAttributes.crossThreadDataSize;
|
||||
continue;
|
||||
};
|
||||
if (relocation.symbolName == implicitArgsRelocationSymbolName) {
|
||||
if (pImplicitArgsRelocationAddresses.find(segId) == pImplicitArgsRelocationAddresses.end()) {
|
||||
pImplicitArgsRelocationAddresses.insert({segId, {}});
|
||||
@@ -343,17 +347,12 @@ void Linker::patchInstructionsSegments(const std::vector<PatchableSegment> &inst
|
||||
continue;
|
||||
}
|
||||
auto symbolIt = relocatedSymbols.find(relocation.symbolName);
|
||||
|
||||
bool invalidOffset = relocation.offset + addressSizeInBytes(relocation.type) > instSeg.segmentSize;
|
||||
bool unresolvedExternal = (symbolIt == relocatedSymbols.end());
|
||||
|
||||
DEBUG_BREAK_IF(invalidOffset);
|
||||
if (invalidOffset || unresolvedExternal) {
|
||||
uint32_t segId = static_cast<uint32_t>(segIt - instructionsSegments.begin());
|
||||
outUnresolvedExternals.push_back(UnresolvedExternal{relocation, segId, invalidOffset});
|
||||
continue;
|
||||
}
|
||||
|
||||
uint64_t patchValue = symbolIt->second.gpuAddress + relocation.addend;
|
||||
patchAddress(relocAddress, patchValue, relocation);
|
||||
}
|
||||
|
||||
@@ -211,7 +211,7 @@ struct Linker {
|
||||
if (!success) {
|
||||
return LinkingStatus::Error;
|
||||
}
|
||||
patchInstructionsSegments(instructionsSegments, outUnresolvedExternals);
|
||||
patchInstructionsSegments(instructionsSegments, outUnresolvedExternals, kernelDescriptors);
|
||||
patchDataSegments(globalVariablesSegInfo, globalConstantsSegInfo, globalVariablesSeg, globalConstantsSeg,
|
||||
outUnresolvedExternals, pDevice, constantsInitData, variablesInitData);
|
||||
resolveImplicitArgs(kernelDescriptors, pDevice);
|
||||
@@ -241,7 +241,7 @@ struct Linker {
|
||||
|
||||
bool processRelocations(const SegmentInfo &globalVariables, const SegmentInfo &globalConstants, const SegmentInfo &exportedFunctions, const SegmentInfo &globalStrings);
|
||||
|
||||
void patchInstructionsSegments(const std::vector<PatchableSegment> &instructionsSegments, std::vector<UnresolvedExternal> &outUnresolvedExternals);
|
||||
void patchInstructionsSegments(const std::vector<PatchableSegment> &instructionsSegments, std::vector<UnresolvedExternal> &outUnresolvedExternals, const KernelDescriptorsT &kernelDescriptors);
|
||||
|
||||
void patchDataSegments(const SegmentInfo &globalVariablesSegInfo, const SegmentInfo &globalConstantsSegInfo,
|
||||
GraphicsAllocation *globalVariablesSeg, GraphicsAllocation *globalConstantsSeg,
|
||||
@@ -260,9 +260,6 @@ struct Linker {
|
||||
|
||||
std::string constructLinkerErrorMessage(const Linker::UnresolvedExternals &unresolvedExternals, const std::vector<std::string> &instructionsSegmentsNames);
|
||||
std::string constructRelocationsDebugMessage(const Linker::RelocatedSymbolsMap &relocatedSymbols);
|
||||
constexpr bool shouldIgnoreRelocation(const LinkerInput::RelocationInfo &relocation) {
|
||||
return LinkerInput::RelocationInfo::Type::PerThreadPayloadOffset == relocation.type;
|
||||
}
|
||||
inline bool isDataSegment(const SegmentType &segment) {
|
||||
return segment == SegmentType::GlobalConstants || segment == SegmentType::GlobalVariables;
|
||||
}
|
||||
|
||||
@@ -1252,12 +1252,12 @@ TEST(LinkerTests, givenValidSymbolsAndRelocationsThenInstructionSegmentsArePrope
|
||||
relocCPartLow.r_offset = 36;
|
||||
relocCPartLow.r_type = vISA::GenRelocType::R_SYM_ADDR_32;
|
||||
|
||||
vISA::GenRelocEntry relocIgnore = {};
|
||||
relocIgnore.r_symbol[0] = 'X';
|
||||
relocIgnore.r_offset = 36;
|
||||
relocIgnore.r_type = vISA::GenRelocType::R_PER_THREAD_PAYLOAD_OFFSET_32;
|
||||
vISA::GenRelocEntry relocPerThreadPayloadOffset = {};
|
||||
relocPerThreadPayloadOffset.r_symbol[0] = 'X';
|
||||
relocPerThreadPayloadOffset.r_offset = 44;
|
||||
relocPerThreadPayloadOffset.r_type = vISA::GenRelocType::R_PER_THREAD_PAYLOAD_OFFSET_32;
|
||||
|
||||
vISA::GenRelocEntry relocs[] = {relocA, relocB, relocC, relocCPartHigh, relocCPartLow, relocIgnore};
|
||||
vISA::GenRelocEntry relocs[] = {relocA, relocB, relocC, relocCPartHigh, relocCPartLow, relocPerThreadPayloadOffset};
|
||||
constexpr uint32_t numRelocations = sizeof(relocs) / sizeof(relocs[0]);
|
||||
bool decodeRelocSuccess = linkerInput.decodeRelocationTable(&relocs, numRelocations, 0);
|
||||
EXPECT_TRUE(decodeRelocSuccess);
|
||||
@@ -1284,6 +1284,10 @@ TEST(LinkerTests, givenValidSymbolsAndRelocationsThenInstructionSegmentsArePrope
|
||||
NEO::Linker::KernelDescriptorsT kernelDescriptors;
|
||||
NEO::Linker::ExternalFunctionsT externalFunctions;
|
||||
|
||||
KernelDescriptor kd;
|
||||
kd.kernelAttributes.crossThreadDataSize = 0x20;
|
||||
kernelDescriptors.push_back(&kd);
|
||||
|
||||
auto linkResult = linker.link(
|
||||
globalVarSegment, globalConstSegment, exportedFuncSegment, {},
|
||||
patchableGlobalVarSeg, patchableConstVarSeg, patchableInstructionSegments, unresolvedExternals,
|
||||
@@ -1295,7 +1299,7 @@ TEST(LinkerTests, givenValidSymbolsAndRelocationsThenInstructionSegmentsArePrope
|
||||
|
||||
ASSERT_EQ(1U, relocatedSymbols.count(symGlobalVariable.s_name));
|
||||
ASSERT_EQ(1U, relocatedSymbols.count(symGlobalConstant.s_name));
|
||||
ASSERT_EQ(1U, relocatedSymbols.count(symGlobalVariable.s_name));
|
||||
ASSERT_EQ(1U, relocatedSymbols.count(symExportedFunc.s_name));
|
||||
|
||||
EXPECT_EQ(relocatedSymbols[symGlobalVariable.s_name].gpuAddress, globalVarSegment.gpuAddress + symGlobalVariable.s_offset);
|
||||
EXPECT_EQ(relocatedSymbols[symGlobalConstant.s_name].gpuAddress, globalConstSegment.gpuAddress + symGlobalConstant.s_offset);
|
||||
@@ -1315,6 +1319,9 @@ TEST(LinkerTests, givenValidSymbolsAndRelocationsThenInstructionSegmentsArePrope
|
||||
EXPECT_EQ(funcAddressHigh, *reinterpret_cast<const uint32_t *>(instructionSegment.data() + relocCPartHigh.r_offset));
|
||||
EXPECT_EQ(initData, *reinterpret_cast<const uint32_t *>(instructionSegment.data() + relocCPartHigh.r_offset - sizeof(uint32_t)));
|
||||
EXPECT_EQ(initData, *reinterpret_cast<const uint32_t *>(instructionSegment.data() + relocCPartHigh.r_offset + sizeof(uint32_t)));
|
||||
|
||||
auto perThreadPayloadOffsetPatchedValue = *reinterpret_cast<uint32_t *>(instructionSegment.data() + relocPerThreadPayloadOffset.r_offset);
|
||||
EXPECT_EQ(kd.kernelAttributes.crossThreadDataSize, perThreadPayloadOffsetPatchedValue);
|
||||
}
|
||||
|
||||
TEST(LinkerTests, givenInvalidSymbolOffsetWhenPatchingInstructionsThenRelocationFails) {
|
||||
@@ -2453,7 +2460,8 @@ TEST(LinkerTests, givenRelaWhenPatchingInstructionsSegmentThenAddendIsAdded) {
|
||||
segmentToPatch.segmentSize = sizeof(segmentData);
|
||||
|
||||
NEO::Linker::UnresolvedExternals unresolvedExternals;
|
||||
linker.patchInstructionsSegments({segmentToPatch}, unresolvedExternals);
|
||||
NEO::Linker::KernelDescriptorsT kernelDescriptors;
|
||||
linker.patchInstructionsSegments({segmentToPatch}, unresolvedExternals, kernelDescriptors);
|
||||
EXPECT_EQ(static_cast<uint64_t>(rela.addend + symValue), segmentData);
|
||||
}
|
||||
|
||||
@@ -2484,3 +2492,30 @@ TEST(LinkerTests, givenRelaWhenPatchingDataSegmentThenAddendIsAdded) {
|
||||
linker.patchDataSegments({}, globalConstantsSegmentInfo, {}, &globalConstantsPatchableSegment, unresolvedExternals, device.get(), &globalConstantSegmentData, nullptr);
|
||||
EXPECT_EQ(static_cast<uint64_t>(rela.addend + symValue), globalConstantSegmentData);
|
||||
}
|
||||
|
||||
TEST(LinkerTests, givenPerThreadPayloadOffsetRelocationWhenPatchingInstructionSegmentsThenPatchItWithCTDSize) {
|
||||
WhiteBox<NEO::LinkerInput> linkerInput;
|
||||
linkerInput.traits.requiresPatchingOfInstructionSegments = true;
|
||||
NEO::LinkerInput::RelocationInfo rel;
|
||||
rel.offset = 0x4;
|
||||
rel.type = NEO::LinkerInput::RelocationInfo::Type::PerThreadPayloadOffset;
|
||||
rel.relocationSegment = NEO::SegmentType::Instructions;
|
||||
linkerInput.textRelocations.push_back({rel});
|
||||
|
||||
NEO::Linker::KernelDescriptorsT kernelDescriptors;
|
||||
KernelDescriptor kd;
|
||||
kd.kernelAttributes.crossThreadDataSize = 0x20;
|
||||
kernelDescriptors.push_back(&kd);
|
||||
|
||||
WhiteBox<NEO::Linker> linker(linkerInput);
|
||||
|
||||
uint64_t segmentData{0};
|
||||
NEO::Linker::PatchableSegment segmentToPatch;
|
||||
segmentToPatch.hostPointer = reinterpret_cast<void *>(&segmentData);
|
||||
segmentToPatch.segmentSize = sizeof(segmentData);
|
||||
|
||||
NEO::Linker::UnresolvedExternals unresolvedExternals;
|
||||
linker.patchInstructionsSegments({segmentToPatch}, unresolvedExternals, kernelDescriptors);
|
||||
auto perThreadPayloadOffsetPatchedValue = reinterpret_cast<uint32_t *>(ptrOffset(segmentToPatch.hostPointer, static_cast<size_t>(rel.offset)));
|
||||
EXPECT_EQ(kd.kernelAttributes.crossThreadDataSize, static_cast<uint32_t>(*perThreadPayloadOffsetPatchedValue));
|
||||
}
|
||||
Reference in New Issue
Block a user