fix: add support for __INTEL_PER_THREAD_OFF

Related-To: NEO-10368
Signed-off-by: Fabian Zwoliński <fabian.zwolinski@intel.com>
This commit is contained in:
Fabian Zwoliński
2024-05-21 16:48:50 +00:00
committed by Compute-Runtime-Automation
parent 3a73fcd7c8
commit 04af8bc5b4
3 changed files with 125 additions and 4 deletions

View File

@@ -185,6 +185,7 @@ bool LinkerInput::addRelocation(Elf::Elf<numBits> &elf, const SectionNameToSegme
relocationInfo.symbolName = reloc.symbolName;
relocationInfo.type = static_cast<LinkerInput::RelocationInfo::Type>(reloc.relocType);
relocationInfo.relocationSegment = getSegmentForSection(sectionName);
relocationInfo.relocationSegmentName = sectionName;
if (SegmentType::instructions == relocationInfo.relocationSegment) {
auto kernelName = sectionName.substr(Zebin::Elf::SectionNames::textPrefix.length());
@@ -326,7 +327,7 @@ LinkingStatus Linker::link(const SegmentInfo &globalVariablesSegInfo, const Segm
outUnresolvedExternals, pDevice, constantsInitData, constantsInitDataSize, variablesInitData, variablesInitDataSize);
removeLocalSymbolsFromRelocatedSymbols();
resolveImplicitArgs(kernelDescriptors, pDevice);
resolveBuiltins(pDevice, outUnresolvedExternals, instructionsSegments);
resolveBuiltins(pDevice, outUnresolvedExternals, instructionsSegments, kernelDescriptors);
if (initialUnresolvedExternalsCount < outUnresolvedExternals.size()) {
return LinkingStatus::linkedPartially;
@@ -406,6 +407,9 @@ void Linker::patchAddress(void *relocAddress, const uint64_t value, const Linker
case RelocationInfo::Type::addressHigh:
*reinterpret_cast<uint32_t *>(relocAddress) = static_cast<uint32_t>((value >> 32) & 0xffffffff);
break;
case RelocationInfo::Type::address16:
*reinterpret_cast<uint16_t *>(relocAddress) = static_cast<uint16_t>(value);
break;
}
}
@@ -656,7 +660,7 @@ void Linker::resolveImplicitArgs(const KernelDescriptorsT &kernelDescriptors, De
}
}
void Linker::resolveBuiltins(Device *pDevice, UnresolvedExternals &outUnresolvedExternals, const std::vector<PatchableSegment> &instructionsSegments) {
void Linker::resolveBuiltins(Device *pDevice, UnresolvedExternals &outUnresolvedExternals, const std::vector<PatchableSegment> &instructionsSegments, const KernelDescriptorsT &kernelDescriptors) {
auto &productHelper = pDevice->getProductHelper();
auto releaseHelper = pDevice->getReleaseHelper();
@@ -673,6 +677,22 @@ void Linker::resolveBuiltins(Device *pDevice, UnresolvedExternals &outUnresolved
}
outUnresolvedExternals[vecIndex] = outUnresolvedExternals[outUnresolvedExternals.size() - 1u];
outUnresolvedExternals.resize(outUnresolvedExternals.size() - 1u);
} else if (outUnresolvedExternals[vecIndex].unresolvedRelocation.symbolName == perThreadOff) {
RelocatedSymbol<SymbolInfo> symbol;
auto kernelName = outUnresolvedExternals[vecIndex].unresolvedRelocation.relocationSegmentName.substr(Zebin::Elf::SectionNames::textPrefix.length());
auto kernelDescriptor = std::find_if(kernelDescriptors.begin(), kernelDescriptors.end(), [&kernelName](const KernelDescriptor *obj) { return obj->kernelMetadata.kernelName == kernelName; });
if (kernelDescriptor != std::end(kernelDescriptors)) {
uint64_t crossThreadDataSize = (*kernelDescriptor)->kernelAttributes.crossThreadDataSize - (*kernelDescriptor)->kernelAttributes.inlineDataPayloadSize;
symbol.gpuAddress = crossThreadDataSize;
auto relocAddress = ptrOffset(instructionsSegments[outUnresolvedExternals[vecIndex].instructionsSegmentId].hostPointer,
static_cast<uintptr_t>(outUnresolvedExternals[vecIndex].unresolvedRelocation.offset));
NEO::Linker::patchAddress(relocAddress, symbol.gpuAddress, outUnresolvedExternals[vecIndex].unresolvedRelocation);
outUnresolvedExternals[vecIndex] = outUnresolvedExternals[outUnresolvedExternals.size() - 1u];
outUnresolvedExternals.resize(outUnresolvedExternals.size() - 1u);
}
}
}
}

View File

@@ -92,6 +92,7 @@ struct LinkerInput {
addressLow,
addressHigh,
perThreadPayloadOffset,
address16 = 7,
relocTypeMax
};
@@ -99,6 +100,7 @@ struct LinkerInput {
uint64_t offset = std::numeric_limits<uint64_t>::max();
Type type = Type::unknown;
SegmentType relocationSegment = SegmentType::unknown;
std::string relocationSegmentName;
int64_t addend = 0U;
};
@@ -189,6 +191,7 @@ struct LinkerInput {
struct Linker {
inline static const std::string subDeviceID = "__SubDeviceID";
inline static const std::string perThreadOff = "__INTEL_PER_THREAD_OFF";
using RelocationInfo = LinkerInput::RelocationInfo;
@@ -257,7 +260,7 @@ struct Linker {
bool resolveExternalFunctions(const KernelDescriptorsT &kernelDescriptors, std::vector<ExternalFunctionInfo> &externalFunctions);
void resolveImplicitArgs(const KernelDescriptorsT &kernelDescriptors, Device *pDevice);
void resolveBuiltins(Device *pDevice, UnresolvedExternals &outUnresolvedExternals, const std::vector<PatchableSegment> &instructionsSegments);
void resolveBuiltins(Device *pDevice, UnresolvedExternals &outUnresolvedExternals, const std::vector<PatchableSegment> &instructionsSegments, const KernelDescriptorsT &kernelDescriptors);
template <typename PatchSizeT>
void patchIncrement(void *dstAllocation, size_t relocationOffset, const void *initData, uint64_t incrementValue);