mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-08 14:02:58 +08:00
fix: add support for __INTEL_PER_THREAD_OFF
Related-To: NEO-10368 Signed-off-by: Fabian Zwoliński <fabian.zwolinski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
3a73fcd7c8
commit
04af8bc5b4
@@ -185,6 +185,7 @@ bool LinkerInput::addRelocation(Elf::Elf<numBits> &elf, const SectionNameToSegme
|
||||
relocationInfo.symbolName = reloc.symbolName;
|
||||
relocationInfo.type = static_cast<LinkerInput::RelocationInfo::Type>(reloc.relocType);
|
||||
relocationInfo.relocationSegment = getSegmentForSection(sectionName);
|
||||
relocationInfo.relocationSegmentName = sectionName;
|
||||
|
||||
if (SegmentType::instructions == relocationInfo.relocationSegment) {
|
||||
auto kernelName = sectionName.substr(Zebin::Elf::SectionNames::textPrefix.length());
|
||||
@@ -326,7 +327,7 @@ LinkingStatus Linker::link(const SegmentInfo &globalVariablesSegInfo, const Segm
|
||||
outUnresolvedExternals, pDevice, constantsInitData, constantsInitDataSize, variablesInitData, variablesInitDataSize);
|
||||
removeLocalSymbolsFromRelocatedSymbols();
|
||||
resolveImplicitArgs(kernelDescriptors, pDevice);
|
||||
resolveBuiltins(pDevice, outUnresolvedExternals, instructionsSegments);
|
||||
resolveBuiltins(pDevice, outUnresolvedExternals, instructionsSegments, kernelDescriptors);
|
||||
|
||||
if (initialUnresolvedExternalsCount < outUnresolvedExternals.size()) {
|
||||
return LinkingStatus::linkedPartially;
|
||||
@@ -406,6 +407,9 @@ void Linker::patchAddress(void *relocAddress, const uint64_t value, const Linker
|
||||
case RelocationInfo::Type::addressHigh:
|
||||
*reinterpret_cast<uint32_t *>(relocAddress) = static_cast<uint32_t>((value >> 32) & 0xffffffff);
|
||||
break;
|
||||
case RelocationInfo::Type::address16:
|
||||
*reinterpret_cast<uint16_t *>(relocAddress) = static_cast<uint16_t>(value);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -656,7 +660,7 @@ void Linker::resolveImplicitArgs(const KernelDescriptorsT &kernelDescriptors, De
|
||||
}
|
||||
}
|
||||
|
||||
void Linker::resolveBuiltins(Device *pDevice, UnresolvedExternals &outUnresolvedExternals, const std::vector<PatchableSegment> &instructionsSegments) {
|
||||
void Linker::resolveBuiltins(Device *pDevice, UnresolvedExternals &outUnresolvedExternals, const std::vector<PatchableSegment> &instructionsSegments, const KernelDescriptorsT &kernelDescriptors) {
|
||||
auto &productHelper = pDevice->getProductHelper();
|
||||
auto releaseHelper = pDevice->getReleaseHelper();
|
||||
|
||||
@@ -673,6 +677,22 @@ void Linker::resolveBuiltins(Device *pDevice, UnresolvedExternals &outUnresolved
|
||||
}
|
||||
outUnresolvedExternals[vecIndex] = outUnresolvedExternals[outUnresolvedExternals.size() - 1u];
|
||||
outUnresolvedExternals.resize(outUnresolvedExternals.size() - 1u);
|
||||
} else if (outUnresolvedExternals[vecIndex].unresolvedRelocation.symbolName == perThreadOff) {
|
||||
RelocatedSymbol<SymbolInfo> symbol;
|
||||
|
||||
auto kernelName = outUnresolvedExternals[vecIndex].unresolvedRelocation.relocationSegmentName.substr(Zebin::Elf::SectionNames::textPrefix.length());
|
||||
|
||||
auto kernelDescriptor = std::find_if(kernelDescriptors.begin(), kernelDescriptors.end(), [&kernelName](const KernelDescriptor *obj) { return obj->kernelMetadata.kernelName == kernelName; });
|
||||
if (kernelDescriptor != std::end(kernelDescriptors)) {
|
||||
uint64_t crossThreadDataSize = (*kernelDescriptor)->kernelAttributes.crossThreadDataSize - (*kernelDescriptor)->kernelAttributes.inlineDataPayloadSize;
|
||||
symbol.gpuAddress = crossThreadDataSize;
|
||||
auto relocAddress = ptrOffset(instructionsSegments[outUnresolvedExternals[vecIndex].instructionsSegmentId].hostPointer,
|
||||
static_cast<uintptr_t>(outUnresolvedExternals[vecIndex].unresolvedRelocation.offset));
|
||||
|
||||
NEO::Linker::patchAddress(relocAddress, symbol.gpuAddress, outUnresolvedExternals[vecIndex].unresolvedRelocation);
|
||||
outUnresolvedExternals[vecIndex] = outUnresolvedExternals[outUnresolvedExternals.size() - 1u];
|
||||
outUnresolvedExternals.resize(outUnresolvedExternals.size() - 1u);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -92,6 +92,7 @@ struct LinkerInput {
|
||||
addressLow,
|
||||
addressHigh,
|
||||
perThreadPayloadOffset,
|
||||
address16 = 7,
|
||||
relocTypeMax
|
||||
};
|
||||
|
||||
@@ -99,6 +100,7 @@ struct LinkerInput {
|
||||
uint64_t offset = std::numeric_limits<uint64_t>::max();
|
||||
Type type = Type::unknown;
|
||||
SegmentType relocationSegment = SegmentType::unknown;
|
||||
std::string relocationSegmentName;
|
||||
int64_t addend = 0U;
|
||||
};
|
||||
|
||||
@@ -189,6 +191,7 @@ struct LinkerInput {
|
||||
|
||||
struct Linker {
|
||||
inline static const std::string subDeviceID = "__SubDeviceID";
|
||||
inline static const std::string perThreadOff = "__INTEL_PER_THREAD_OFF";
|
||||
|
||||
using RelocationInfo = LinkerInput::RelocationInfo;
|
||||
|
||||
@@ -257,7 +260,7 @@ struct Linker {
|
||||
|
||||
bool resolveExternalFunctions(const KernelDescriptorsT &kernelDescriptors, std::vector<ExternalFunctionInfo> &externalFunctions);
|
||||
void resolveImplicitArgs(const KernelDescriptorsT &kernelDescriptors, Device *pDevice);
|
||||
void resolveBuiltins(Device *pDevice, UnresolvedExternals &outUnresolvedExternals, const std::vector<PatchableSegment> &instructionsSegments);
|
||||
void resolveBuiltins(Device *pDevice, UnresolvedExternals &outUnresolvedExternals, const std::vector<PatchableSegment> &instructionsSegments, const KernelDescriptorsT &kernelDescriptors);
|
||||
|
||||
template <typename PatchSizeT>
|
||||
void patchIncrement(void *dstAllocation, size_t relocationOffset, const void *initData, uint64_t incrementValue);
|
||||
|
||||
Reference in New Issue
Block a user