Patch global variables and constants once

This change replaces mechanism of patching global constants and
variables in kernel per relocation to patching them only once. This
would improve linking time performance for kernels with multiple global
symbols.

Signed-off-by: Luzynski, Sebastian Jozef <sebastian.jozef.luzynski@intel.com>
This commit is contained in:
Luzynski, Sebastian Jozef
2022-10-27 15:09:41 +00:00
committed by Compute-Runtime-Automation
parent 257967e2f6
commit 91c69e0fe7
11 changed files with 161 additions and 119 deletions

View File

@@ -17,7 +17,6 @@ set(NEO_CORE_COMPILER_INTERFACE
${CMAKE_CURRENT_SOURCE_DIR}/external_functions.h
${CMAKE_CURRENT_SOURCE_DIR}/intermediate_representations.h
${CMAKE_CURRENT_SOURCE_DIR}/linker.h
${CMAKE_CURRENT_SOURCE_DIR}/linker.inl
${CMAKE_CURRENT_SOURCE_DIR}/linker.cpp
${CMAKE_CURRENT_SOURCE_DIR}/compiler_options.h
${CMAKE_CURRENT_SOURCE_DIR}/compiler_options.cpp

View File

@@ -8,13 +8,13 @@
#include "shared/source/compiler_interface/linker.h"
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/compiler_interface/linker.inl"
#include "shared/source/device/device.h"
#include "shared/source/device_binary_format/elf/zebin_elf.h"
#include "shared/source/helpers/blit_commands_helper.h"
#include "shared/source/helpers/debug_helpers.h"
#include "shared/source/helpers/hw_helper.h"
#include "shared/source/helpers/ptr_math.h"
#include "shared/source/helpers/string.h"
#include "shared/source/kernel/implicit_args.h"
#include "shared/source/kernel/kernel_descriptor.h"
#include "shared/source/memory_manager/graphics_allocation.h"
@@ -278,6 +278,32 @@ void LinkerInput::parseRelocationForExtFuncUsage(const RelocationInfo &relocInfo
}
}
LinkingStatus Linker::link(const SegmentInfo &globalVariablesSegInfo, const SegmentInfo &globalConstantsSegInfo, const SegmentInfo &exportedFunctionsSegInfo,
const SegmentInfo &globalStringsSegInfo, GraphicsAllocation *globalVariablesSeg, GraphicsAllocation *globalConstantsSeg,
const PatchableSegments &instructionsSegments, UnresolvedExternals &outUnresolvedExternals, Device *pDevice, const void *constantsInitData,
size_t constantsInitDataSize, const void *variablesInitData, size_t variablesInitDataSize, const KernelDescriptorsT &kernelDescriptors,
ExternalFunctionsT &externalFunctions) {
bool success = data.isValid();
auto initialUnresolvedExternalsCount = outUnresolvedExternals.size();
success = success && processRelocations(globalVariablesSegInfo, globalConstantsSegInfo, exportedFunctionsSegInfo, globalStringsSegInfo, instructionsSegments);
if (!success) {
return LinkingStatus::Error;
}
patchInstructionsSegments(instructionsSegments, outUnresolvedExternals, kernelDescriptors);
patchDataSegments(globalVariablesSegInfo, globalConstantsSegInfo, globalVariablesSeg, globalConstantsSeg,
outUnresolvedExternals, pDevice, constantsInitData, constantsInitDataSize, variablesInitData, variablesInitDataSize);
resolveImplicitArgs(kernelDescriptors, pDevice);
resolveBuiltins(pDevice, outUnresolvedExternals, instructionsSegments);
if (initialUnresolvedExternalsCount < outUnresolvedExternals.size()) {
return LinkingStatus::LinkedPartially;
}
success = resolveExternalFunctions(kernelDescriptors, externalFunctions);
if (!success) {
return LinkingStatus::Error;
}
return LinkingStatus::LinkedFully;
}
bool Linker::processRelocations(const SegmentInfo &globalVariables, const SegmentInfo &globalConstants, const SegmentInfo &exportedFunctions, const SegmentInfo &globalStrings,
const PatchableSegments &instructionsSegments) {
relocatedSymbols.reserve(data.getSymbols().size());
@@ -397,7 +423,13 @@ void Linker::patchInstructionsSegments(const std::vector<PatchableSegment> &inst
void Linker::patchDataSegments(const SegmentInfo &globalVariablesSegInfo, const SegmentInfo &globalConstantsSegInfo,
GraphicsAllocation *globalVariablesSeg, GraphicsAllocation *globalConstantsSeg,
std::vector<UnresolvedExternal> &outUnresolvedExternals, Device *pDevice,
const void *constantsInitData, const void *variablesInitData) {
const void *constantsInitData, size_t constantsInitDataSize, const void *variablesInitData, size_t variablesInitDataSize) {
std::vector<uint8_t> constantsInitDataCopy(constantsInitDataSize);
memcpy_s(constantsInitDataCopy.data(), constantsInitDataCopy.size(), constantsInitData, constantsInitDataSize);
std::vector<uint8_t> variablesInitDataCopy(variablesInitDataSize);
memcpy_s(variablesInitDataCopy.data(), variablesInitDataCopy.size(), variablesInitData, variablesInitDataSize);
bool isAnySymbolRelocated = false;
for (const auto &relocation : data.getDataRelocations()) {
auto symbolIt = relocatedSymbols.find(relocation.symbolName);
if (symbolIt == relocatedSymbols.end()) {
@@ -406,14 +438,14 @@ void Linker::patchDataSegments(const SegmentInfo &globalVariablesSegInfo, const
}
uint64_t srcGpuAddressAs64Bit = symbolIt->second.gpuAddress;
GraphicsAllocation *dst = nullptr;
std::vector<uint8_t> *dst = nullptr;
const void *initData = nullptr;
if (SegmentType::GlobalVariables == relocation.relocationSegment) {
dst = globalVariablesSeg;
initData = variablesInitData;
} else if (SegmentType::GlobalConstants == relocation.relocationSegment) {
dst = globalConstantsSeg;
if (SegmentType::GlobalConstants == relocation.relocationSegment) {
dst = &constantsInitDataCopy;
initData = constantsInitData;
} else if (SegmentType::GlobalVariables == relocation.relocationSegment) {
dst = &variablesInitDataCopy;
initData = variablesInitData;
} else {
outUnresolvedExternals.push_back(UnresolvedExternal{relocation});
continue;
@@ -421,7 +453,7 @@ void Linker::patchDataSegments(const SegmentInfo &globalVariablesSegInfo, const
UNRECOVERABLE_IF(nullptr == dst);
auto relocType = (LinkerInput::Traits::PointerSize::Ptr32bit == data.getTraits().pointerSize) ? RelocationInfo::Type::AddressLow : relocation.type;
bool invalidOffset = relocation.offset + addressSizeInBytes(relocType) > dst->getUnderlyingBufferSize();
bool invalidOffset = relocation.offset + addressSizeInBytes(relocType) > dst->size();
DEBUG_BREAK_IF(invalidOffset);
if (invalidOffset) {
outUnresolvedExternals.push_back(UnresolvedExternal{relocation});
@@ -429,21 +461,35 @@ void Linker::patchDataSegments(const SegmentInfo &globalVariablesSegInfo, const
}
uint64_t incrementValue = srcGpuAddressAs64Bit + relocation.addend;
isAnySymbolRelocated = true;
switch (relocType) {
default:
UNRECOVERABLE_IF(RelocationInfo::Type::Address != relocType);
patchIncrement<uint64_t>(pDevice, dst, static_cast<size_t>(relocation.offset), initData, incrementValue);
patchIncrement<uint64_t>(dst->data(), static_cast<size_t>(relocation.offset), initData, incrementValue);
break;
case RelocationInfo::Type::AddressLow:
incrementValue = incrementValue & 0xffffffff;
patchIncrement<uint32_t>(pDevice, dst, static_cast<size_t>(relocation.offset), initData, incrementValue);
patchIncrement<uint32_t>(dst->data(), static_cast<size_t>(relocation.offset), initData, incrementValue);
break;
case RelocationInfo::Type::AddressHigh:
incrementValue = (incrementValue >> 32) & 0xffffffff;
patchIncrement<uint32_t>(pDevice, dst, static_cast<size_t>(relocation.offset), initData, incrementValue);
patchIncrement<uint32_t>(dst->data(), static_cast<size_t>(relocation.offset), initData, incrementValue);
break;
}
}
if (isAnySymbolRelocated) {
auto &hwInfo = pDevice->getHardwareInfo();
auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily);
if (globalConstantsSeg) {
bool useBlitter = hwInfoConfig.isBlitCopyRequiredForLocalMemory(hwInfo, *globalConstantsSeg);
MemoryTransferHelper::transferMemoryToAllocation(useBlitter, *pDevice, globalConstantsSeg, 0, constantsInitDataCopy.data(), constantsInitDataCopy.size());
}
if (globalVariablesSeg) {
bool useBlitter = hwInfoConfig.isBlitCopyRequiredForLocalMemory(hwInfo, *globalVariablesSeg);
MemoryTransferHelper::transferMemoryToAllocation(useBlitter, *pDevice, globalVariablesSeg, 0, variablesInitDataCopy.data(), variablesInitDataCopy.size());
}
}
}
std::string constructLinkerErrorMessage(const Linker::UnresolvedExternals &unresolvedExternals, const std::vector<std::string> &instructionsSegmentsNames) {
@@ -583,4 +629,15 @@ void Linker::resolveBuiltins(Device *pDevice, UnresolvedExternals &outUnresolved
}
}
}
template <typename PatchSizeT>
void Linker::patchIncrement(void *dstBegin, size_t relocationOffset, const void *initData, uint64_t incrementValue) {
auto initValue = ptrOffset(initData, relocationOffset);
PatchSizeT value = 0;
memcpy_s(&value, sizeof(PatchSizeT), initValue, sizeof(PatchSizeT));
value += static_cast<PatchSizeT>(incrementValue);
auto destination = ptrOffset(dstBegin, relocationOffset);
memcpy_s(destination, sizeof(PatchSizeT), &value, sizeof(PatchSizeT));
}
} // namespace NEO

View File

@@ -21,6 +21,7 @@ namespace NEO {
class Device;
class GraphicsAllocation;
struct KernelDescriptor;
struct ProgramInfo;
enum class SegmentType : uint32_t {
Unknown,
@@ -219,30 +220,12 @@ struct Linker {
: data(data) {
}
LinkingStatus link(const SegmentInfo &globalVariablesSegInfo, const SegmentInfo &globalConstantsSegInfo, const SegmentInfo &exportedFunctionsSegInfo, const SegmentInfo &globalStringsSegInfo,
GraphicsAllocation *globalVariablesSeg, GraphicsAllocation *globalConstantsSeg, const PatchableSegments &instructionsSegments,
UnresolvedExternals &outUnresolvedExternals, Device *pDevice, const void *constantsInitData, const void *variablesInitData,
const KernelDescriptorsT &kernelDescriptors, ExternalFunctionsT &externalFunctions) {
bool success = data.isValid();
auto initialUnresolvedExternalsCount = outUnresolvedExternals.size();
success = success && processRelocations(globalVariablesSegInfo, globalConstantsSegInfo, exportedFunctionsSegInfo, globalStringsSegInfo, instructionsSegments);
if (!success) {
return LinkingStatus::Error;
}
patchInstructionsSegments(instructionsSegments, outUnresolvedExternals, kernelDescriptors);
patchDataSegments(globalVariablesSegInfo, globalConstantsSegInfo, globalVariablesSeg, globalConstantsSeg,
outUnresolvedExternals, pDevice, constantsInitData, variablesInitData);
resolveImplicitArgs(kernelDescriptors, pDevice);
resolveBuiltins(pDevice, outUnresolvedExternals, instructionsSegments);
if (initialUnresolvedExternalsCount < outUnresolvedExternals.size()) {
return LinkingStatus::LinkedPartially;
}
success = resolveExternalFunctions(kernelDescriptors, externalFunctions);
if (!success) {
return LinkingStatus::Error;
}
return LinkingStatus::LinkedFully;
}
LinkingStatus link(const SegmentInfo &globalVariablesSegInfo, const SegmentInfo &globalConstantsSegInfo, const SegmentInfo &exportedFunctionsSegInfo,
const SegmentInfo &globalStringsSegInfo, GraphicsAllocation *globalVariablesSeg, GraphicsAllocation *globalConstantsSeg,
const PatchableSegments &instructionsSegments, UnresolvedExternals &outUnresolvedExternals, Device *pDevice, const void *constantsInitData,
size_t constantsInitDataSize, const void *variablesInitData, size_t variablesInitDataSize, const KernelDescriptorsT &kernelDescriptors,
ExternalFunctionsT &externalFunctions);
static void patchAddress(void *relocAddress, const uint64_t value, const RelocationInfo &relocation);
RelocatedSymbolsMap extractRelocatedSymbols() {
return RelocatedSymbolsMap(std::move(relocatedSymbols));
@@ -265,14 +248,14 @@ struct Linker {
void patchDataSegments(const SegmentInfo &globalVariablesSegInfo, const SegmentInfo &globalConstantsSegInfo,
GraphicsAllocation *globalVariablesSeg, GraphicsAllocation *globalConstantsSeg,
std::vector<UnresolvedExternal> &outUnresolvedExternals, Device *pDevice,
const void *constantsInitData, const void *variablesInitData);
const void *constantsInitData, size_t constantsInitDataSize, const void *variablesInitData, size_t variablesInitDataSize);
bool resolveExternalFunctions(const KernelDescriptorsT &kernelDescriptors, std::vector<ExternalFunctionInfo> &externalFunctions);
void resolveImplicitArgs(const KernelDescriptorsT &kernelDescriptors, Device *pDevice);
void resolveBuiltins(Device *pDevice, UnresolvedExternals &outUnresolvedExternals, const std::vector<PatchableSegment> &instructionsSegments);
template <typename PatchSizeT>
void patchIncrement(Device *pDevice, GraphicsAllocation *dstAllocation, size_t relocationOffset, const void *initData, uint64_t incrementValue);
void patchIncrement(void *dstAllocation, size_t relocationOffset, const void *initData, uint64_t incrementValue);
std::unordered_map<uint32_t /*ISA segment id*/, StackVec<uint32_t *, 2> /*implicit args relocation address to patch*/> pImplicitArgsRelocationAddresses;
};

View File

@@ -1,36 +0,0 @@
/*
* Copyright (C) 2020-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/compiler_interface/linker.h"
#include "shared/source/device/device.h"
#include "shared/source/helpers/blit_commands_helper.h"
#include "shared/source/helpers/hw_helper.h"
#include "shared/source/helpers/ptr_math.h"
#include "shared/source/helpers/string.h"
#include "shared/source/memory_manager/graphics_allocation.h"
#include "shared/source/memory_manager/memory_manager.h"
namespace NEO {
template <typename PatchSizeT>
void Linker::patchIncrement(Device *pDevice, GraphicsAllocation *dstAllocation, size_t relocationOffset, const void *initData, uint64_t incrementValue) {
auto &hwInfo = pDevice->getHardwareInfo();
auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily);
bool useBlitter = hwInfoConfig.isBlitCopyRequiredForLocalMemory(hwInfo, *dstAllocation);
auto initValue = ptrOffset(initData, relocationOffset);
PatchSizeT value = 0;
memcpy_s(&value, sizeof(PatchSizeT), initValue, sizeof(PatchSizeT));
value += static_cast<PatchSizeT>(incrementValue);
MemoryTransferHelper::transferMemoryToAllocation(useBlitter, *pDevice, dstAllocation, relocationOffset, &value, sizeof(PatchSizeT));
}
} // namespace NEO