mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-20 08:53:55 +08:00
Patch global variables and constants once
This change replaces mechanism of patching global constants and variables in kernel per relocation to patching them only once. This would improve linking time performance for kernels with multiple global symbols. Signed-off-by: Luzynski, Sebastian Jozef <sebastian.jozef.luzynski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
257967e2f6
commit
91c69e0fe7
@@ -17,7 +17,6 @@ set(NEO_CORE_COMPILER_INTERFACE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/external_functions.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/intermediate_representations.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/linker.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/linker.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/linker.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/compiler_options.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/compiler_options.cpp
|
||||
|
||||
@@ -8,13 +8,13 @@
|
||||
#include "shared/source/compiler_interface/linker.h"
|
||||
|
||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
#include "shared/source/compiler_interface/linker.inl"
|
||||
#include "shared/source/device/device.h"
|
||||
#include "shared/source/device_binary_format/elf/zebin_elf.h"
|
||||
#include "shared/source/helpers/blit_commands_helper.h"
|
||||
#include "shared/source/helpers/debug_helpers.h"
|
||||
#include "shared/source/helpers/hw_helper.h"
|
||||
#include "shared/source/helpers/ptr_math.h"
|
||||
#include "shared/source/helpers/string.h"
|
||||
#include "shared/source/kernel/implicit_args.h"
|
||||
#include "shared/source/kernel/kernel_descriptor.h"
|
||||
#include "shared/source/memory_manager/graphics_allocation.h"
|
||||
@@ -278,6 +278,32 @@ void LinkerInput::parseRelocationForExtFuncUsage(const RelocationInfo &relocInfo
|
||||
}
|
||||
}
|
||||
|
||||
LinkingStatus Linker::link(const SegmentInfo &globalVariablesSegInfo, const SegmentInfo &globalConstantsSegInfo, const SegmentInfo &exportedFunctionsSegInfo,
|
||||
const SegmentInfo &globalStringsSegInfo, GraphicsAllocation *globalVariablesSeg, GraphicsAllocation *globalConstantsSeg,
|
||||
const PatchableSegments &instructionsSegments, UnresolvedExternals &outUnresolvedExternals, Device *pDevice, const void *constantsInitData,
|
||||
size_t constantsInitDataSize, const void *variablesInitData, size_t variablesInitDataSize, const KernelDescriptorsT &kernelDescriptors,
|
||||
ExternalFunctionsT &externalFunctions) {
|
||||
bool success = data.isValid();
|
||||
auto initialUnresolvedExternalsCount = outUnresolvedExternals.size();
|
||||
success = success && processRelocations(globalVariablesSegInfo, globalConstantsSegInfo, exportedFunctionsSegInfo, globalStringsSegInfo, instructionsSegments);
|
||||
if (!success) {
|
||||
return LinkingStatus::Error;
|
||||
}
|
||||
patchInstructionsSegments(instructionsSegments, outUnresolvedExternals, kernelDescriptors);
|
||||
patchDataSegments(globalVariablesSegInfo, globalConstantsSegInfo, globalVariablesSeg, globalConstantsSeg,
|
||||
outUnresolvedExternals, pDevice, constantsInitData, constantsInitDataSize, variablesInitData, variablesInitDataSize);
|
||||
resolveImplicitArgs(kernelDescriptors, pDevice);
|
||||
resolveBuiltins(pDevice, outUnresolvedExternals, instructionsSegments);
|
||||
if (initialUnresolvedExternalsCount < outUnresolvedExternals.size()) {
|
||||
return LinkingStatus::LinkedPartially;
|
||||
}
|
||||
success = resolveExternalFunctions(kernelDescriptors, externalFunctions);
|
||||
if (!success) {
|
||||
return LinkingStatus::Error;
|
||||
}
|
||||
return LinkingStatus::LinkedFully;
|
||||
}
|
||||
|
||||
bool Linker::processRelocations(const SegmentInfo &globalVariables, const SegmentInfo &globalConstants, const SegmentInfo &exportedFunctions, const SegmentInfo &globalStrings,
|
||||
const PatchableSegments &instructionsSegments) {
|
||||
relocatedSymbols.reserve(data.getSymbols().size());
|
||||
@@ -397,7 +423,13 @@ void Linker::patchInstructionsSegments(const std::vector<PatchableSegment> &inst
|
||||
void Linker::patchDataSegments(const SegmentInfo &globalVariablesSegInfo, const SegmentInfo &globalConstantsSegInfo,
|
||||
GraphicsAllocation *globalVariablesSeg, GraphicsAllocation *globalConstantsSeg,
|
||||
std::vector<UnresolvedExternal> &outUnresolvedExternals, Device *pDevice,
|
||||
const void *constantsInitData, const void *variablesInitData) {
|
||||
const void *constantsInitData, size_t constantsInitDataSize, const void *variablesInitData, size_t variablesInitDataSize) {
|
||||
std::vector<uint8_t> constantsInitDataCopy(constantsInitDataSize);
|
||||
memcpy_s(constantsInitDataCopy.data(), constantsInitDataCopy.size(), constantsInitData, constantsInitDataSize);
|
||||
std::vector<uint8_t> variablesInitDataCopy(variablesInitDataSize);
|
||||
memcpy_s(variablesInitDataCopy.data(), variablesInitDataCopy.size(), variablesInitData, variablesInitDataSize);
|
||||
bool isAnySymbolRelocated = false;
|
||||
|
||||
for (const auto &relocation : data.getDataRelocations()) {
|
||||
auto symbolIt = relocatedSymbols.find(relocation.symbolName);
|
||||
if (symbolIt == relocatedSymbols.end()) {
|
||||
@@ -406,14 +438,14 @@ void Linker::patchDataSegments(const SegmentInfo &globalVariablesSegInfo, const
|
||||
}
|
||||
uint64_t srcGpuAddressAs64Bit = symbolIt->second.gpuAddress;
|
||||
|
||||
GraphicsAllocation *dst = nullptr;
|
||||
std::vector<uint8_t> *dst = nullptr;
|
||||
const void *initData = nullptr;
|
||||
if (SegmentType::GlobalVariables == relocation.relocationSegment) {
|
||||
dst = globalVariablesSeg;
|
||||
initData = variablesInitData;
|
||||
} else if (SegmentType::GlobalConstants == relocation.relocationSegment) {
|
||||
dst = globalConstantsSeg;
|
||||
if (SegmentType::GlobalConstants == relocation.relocationSegment) {
|
||||
dst = &constantsInitDataCopy;
|
||||
initData = constantsInitData;
|
||||
} else if (SegmentType::GlobalVariables == relocation.relocationSegment) {
|
||||
dst = &variablesInitDataCopy;
|
||||
initData = variablesInitData;
|
||||
} else {
|
||||
outUnresolvedExternals.push_back(UnresolvedExternal{relocation});
|
||||
continue;
|
||||
@@ -421,7 +453,7 @@ void Linker::patchDataSegments(const SegmentInfo &globalVariablesSegInfo, const
|
||||
UNRECOVERABLE_IF(nullptr == dst);
|
||||
|
||||
auto relocType = (LinkerInput::Traits::PointerSize::Ptr32bit == data.getTraits().pointerSize) ? RelocationInfo::Type::AddressLow : relocation.type;
|
||||
bool invalidOffset = relocation.offset + addressSizeInBytes(relocType) > dst->getUnderlyingBufferSize();
|
||||
bool invalidOffset = relocation.offset + addressSizeInBytes(relocType) > dst->size();
|
||||
DEBUG_BREAK_IF(invalidOffset);
|
||||
if (invalidOffset) {
|
||||
outUnresolvedExternals.push_back(UnresolvedExternal{relocation});
|
||||
@@ -429,21 +461,35 @@ void Linker::patchDataSegments(const SegmentInfo &globalVariablesSegInfo, const
|
||||
}
|
||||
|
||||
uint64_t incrementValue = srcGpuAddressAs64Bit + relocation.addend;
|
||||
isAnySymbolRelocated = true;
|
||||
switch (relocType) {
|
||||
default:
|
||||
UNRECOVERABLE_IF(RelocationInfo::Type::Address != relocType);
|
||||
patchIncrement<uint64_t>(pDevice, dst, static_cast<size_t>(relocation.offset), initData, incrementValue);
|
||||
patchIncrement<uint64_t>(dst->data(), static_cast<size_t>(relocation.offset), initData, incrementValue);
|
||||
break;
|
||||
case RelocationInfo::Type::AddressLow:
|
||||
incrementValue = incrementValue & 0xffffffff;
|
||||
patchIncrement<uint32_t>(pDevice, dst, static_cast<size_t>(relocation.offset), initData, incrementValue);
|
||||
patchIncrement<uint32_t>(dst->data(), static_cast<size_t>(relocation.offset), initData, incrementValue);
|
||||
break;
|
||||
case RelocationInfo::Type::AddressHigh:
|
||||
incrementValue = (incrementValue >> 32) & 0xffffffff;
|
||||
patchIncrement<uint32_t>(pDevice, dst, static_cast<size_t>(relocation.offset), initData, incrementValue);
|
||||
patchIncrement<uint32_t>(dst->data(), static_cast<size_t>(relocation.offset), initData, incrementValue);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (isAnySymbolRelocated) {
|
||||
auto &hwInfo = pDevice->getHardwareInfo();
|
||||
auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily);
|
||||
if (globalConstantsSeg) {
|
||||
bool useBlitter = hwInfoConfig.isBlitCopyRequiredForLocalMemory(hwInfo, *globalConstantsSeg);
|
||||
MemoryTransferHelper::transferMemoryToAllocation(useBlitter, *pDevice, globalConstantsSeg, 0, constantsInitDataCopy.data(), constantsInitDataCopy.size());
|
||||
}
|
||||
if (globalVariablesSeg) {
|
||||
bool useBlitter = hwInfoConfig.isBlitCopyRequiredForLocalMemory(hwInfo, *globalVariablesSeg);
|
||||
MemoryTransferHelper::transferMemoryToAllocation(useBlitter, *pDevice, globalVariablesSeg, 0, variablesInitDataCopy.data(), variablesInitDataCopy.size());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::string constructLinkerErrorMessage(const Linker::UnresolvedExternals &unresolvedExternals, const std::vector<std::string> &instructionsSegmentsNames) {
|
||||
@@ -583,4 +629,15 @@ void Linker::resolveBuiltins(Device *pDevice, UnresolvedExternals &outUnresolved
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename PatchSizeT>
|
||||
void Linker::patchIncrement(void *dstBegin, size_t relocationOffset, const void *initData, uint64_t incrementValue) {
|
||||
auto initValue = ptrOffset(initData, relocationOffset);
|
||||
PatchSizeT value = 0;
|
||||
memcpy_s(&value, sizeof(PatchSizeT), initValue, sizeof(PatchSizeT));
|
||||
value += static_cast<PatchSizeT>(incrementValue);
|
||||
|
||||
auto destination = ptrOffset(dstBegin, relocationOffset);
|
||||
memcpy_s(destination, sizeof(PatchSizeT), &value, sizeof(PatchSizeT));
|
||||
}
|
||||
} // namespace NEO
|
||||
|
||||
@@ -21,6 +21,7 @@ namespace NEO {
|
||||
class Device;
|
||||
class GraphicsAllocation;
|
||||
struct KernelDescriptor;
|
||||
struct ProgramInfo;
|
||||
|
||||
enum class SegmentType : uint32_t {
|
||||
Unknown,
|
||||
@@ -219,30 +220,12 @@ struct Linker {
|
||||
: data(data) {
|
||||
}
|
||||
|
||||
LinkingStatus link(const SegmentInfo &globalVariablesSegInfo, const SegmentInfo &globalConstantsSegInfo, const SegmentInfo &exportedFunctionsSegInfo, const SegmentInfo &globalStringsSegInfo,
|
||||
GraphicsAllocation *globalVariablesSeg, GraphicsAllocation *globalConstantsSeg, const PatchableSegments &instructionsSegments,
|
||||
UnresolvedExternals &outUnresolvedExternals, Device *pDevice, const void *constantsInitData, const void *variablesInitData,
|
||||
const KernelDescriptorsT &kernelDescriptors, ExternalFunctionsT &externalFunctions) {
|
||||
bool success = data.isValid();
|
||||
auto initialUnresolvedExternalsCount = outUnresolvedExternals.size();
|
||||
success = success && processRelocations(globalVariablesSegInfo, globalConstantsSegInfo, exportedFunctionsSegInfo, globalStringsSegInfo, instructionsSegments);
|
||||
if (!success) {
|
||||
return LinkingStatus::Error;
|
||||
}
|
||||
patchInstructionsSegments(instructionsSegments, outUnresolvedExternals, kernelDescriptors);
|
||||
patchDataSegments(globalVariablesSegInfo, globalConstantsSegInfo, globalVariablesSeg, globalConstantsSeg,
|
||||
outUnresolvedExternals, pDevice, constantsInitData, variablesInitData);
|
||||
resolveImplicitArgs(kernelDescriptors, pDevice);
|
||||
resolveBuiltins(pDevice, outUnresolvedExternals, instructionsSegments);
|
||||
if (initialUnresolvedExternalsCount < outUnresolvedExternals.size()) {
|
||||
return LinkingStatus::LinkedPartially;
|
||||
}
|
||||
success = resolveExternalFunctions(kernelDescriptors, externalFunctions);
|
||||
if (!success) {
|
||||
return LinkingStatus::Error;
|
||||
}
|
||||
return LinkingStatus::LinkedFully;
|
||||
}
|
||||
LinkingStatus link(const SegmentInfo &globalVariablesSegInfo, const SegmentInfo &globalConstantsSegInfo, const SegmentInfo &exportedFunctionsSegInfo,
|
||||
const SegmentInfo &globalStringsSegInfo, GraphicsAllocation *globalVariablesSeg, GraphicsAllocation *globalConstantsSeg,
|
||||
const PatchableSegments &instructionsSegments, UnresolvedExternals &outUnresolvedExternals, Device *pDevice, const void *constantsInitData,
|
||||
size_t constantsInitDataSize, const void *variablesInitData, size_t variablesInitDataSize, const KernelDescriptorsT &kernelDescriptors,
|
||||
ExternalFunctionsT &externalFunctions);
|
||||
|
||||
static void patchAddress(void *relocAddress, const uint64_t value, const RelocationInfo &relocation);
|
||||
RelocatedSymbolsMap extractRelocatedSymbols() {
|
||||
return RelocatedSymbolsMap(std::move(relocatedSymbols));
|
||||
@@ -265,14 +248,14 @@ struct Linker {
|
||||
void patchDataSegments(const SegmentInfo &globalVariablesSegInfo, const SegmentInfo &globalConstantsSegInfo,
|
||||
GraphicsAllocation *globalVariablesSeg, GraphicsAllocation *globalConstantsSeg,
|
||||
std::vector<UnresolvedExternal> &outUnresolvedExternals, Device *pDevice,
|
||||
const void *constantsInitData, const void *variablesInitData);
|
||||
const void *constantsInitData, size_t constantsInitDataSize, const void *variablesInitData, size_t variablesInitDataSize);
|
||||
|
||||
bool resolveExternalFunctions(const KernelDescriptorsT &kernelDescriptors, std::vector<ExternalFunctionInfo> &externalFunctions);
|
||||
void resolveImplicitArgs(const KernelDescriptorsT &kernelDescriptors, Device *pDevice);
|
||||
void resolveBuiltins(Device *pDevice, UnresolvedExternals &outUnresolvedExternals, const std::vector<PatchableSegment> &instructionsSegments);
|
||||
|
||||
template <typename PatchSizeT>
|
||||
void patchIncrement(Device *pDevice, GraphicsAllocation *dstAllocation, size_t relocationOffset, const void *initData, uint64_t incrementValue);
|
||||
void patchIncrement(void *dstAllocation, size_t relocationOffset, const void *initData, uint64_t incrementValue);
|
||||
|
||||
std::unordered_map<uint32_t /*ISA segment id*/, StackVec<uint32_t *, 2> /*implicit args relocation address to patch*/> pImplicitArgsRelocationAddresses;
|
||||
};
|
||||
|
||||
@@ -1,36 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/compiler_interface/linker.h"
|
||||
#include "shared/source/device/device.h"
|
||||
#include "shared/source/helpers/blit_commands_helper.h"
|
||||
#include "shared/source/helpers/hw_helper.h"
|
||||
#include "shared/source/helpers/ptr_math.h"
|
||||
#include "shared/source/helpers/string.h"
|
||||
#include "shared/source/memory_manager/graphics_allocation.h"
|
||||
#include "shared/source/memory_manager/memory_manager.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <typename PatchSizeT>
|
||||
void Linker::patchIncrement(Device *pDevice, GraphicsAllocation *dstAllocation, size_t relocationOffset, const void *initData, uint64_t incrementValue) {
|
||||
|
||||
auto &hwInfo = pDevice->getHardwareInfo();
|
||||
auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily);
|
||||
|
||||
bool useBlitter = hwInfoConfig.isBlitCopyRequiredForLocalMemory(hwInfo, *dstAllocation);
|
||||
|
||||
auto initValue = ptrOffset(initData, relocationOffset);
|
||||
|
||||
PatchSizeT value = 0;
|
||||
memcpy_s(&value, sizeof(PatchSizeT), initValue, sizeof(PatchSizeT));
|
||||
value += static_cast<PatchSizeT>(incrementValue);
|
||||
|
||||
MemoryTransferHelper::transferMemoryToAllocation(useBlitter, *pDevice, dstAllocation, relocationOffset, &value, sizeof(PatchSizeT));
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
Reference in New Issue
Block a user