mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-05 09:09:04 +08:00
Linker: Fix incremental patching for local memory allocations
Change-Id: Ib85e4a2abc8a62477003853aa0c35f8107444f4e Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
sys_ocldev
parent
2643346b48
commit
fb0651521d
@@ -15,6 +15,7 @@ set(NEO_COMPILER_INTERFACE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/default_cache_config.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/intermediate_representations.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/linker.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/linker.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/linker.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/compiler_options/compiler_options_base.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/compiler_options/compiler_options_base.cpp
|
||||
|
||||
@@ -5,8 +5,9 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "linker.h"
|
||||
#include "shared/source/compiler_interface/linker.h"
|
||||
|
||||
#include "shared/source/compiler_interface/linker.inl"
|
||||
#include "shared/source/device/device.h"
|
||||
#include "shared/source/helpers/blit_commands_helper.h"
|
||||
#include "shared/source/helpers/debug_helpers.h"
|
||||
@@ -269,29 +270,13 @@ void Linker::patchDataSegments(const SegmentInfo &globalVariablesSegInfo, const
|
||||
? static_cast<uint32_t>(gpuAddressAs64bit & 0xffffffff)
|
||||
: gpuAddressAs64bit;
|
||||
|
||||
bool useBlitter = false;
|
||||
if (pDevice && initData) {
|
||||
auto &hwInfo = pDevice->getHardwareInfo();
|
||||
auto &helper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
if (dst->isAllocatedInLocalMemoryPool() && (helper.isBlitCopyRequiredForLocalMemory(hwInfo) || helper.forceBlitterUseForGlobalBuffers(hwInfo, dst))) {
|
||||
useBlitter = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (useBlitter) {
|
||||
auto initValue = ptrOffset(initData, static_cast<uintptr_t>(relocation.offset));
|
||||
if (initData) {
|
||||
if (patchSize == sizeof(uint64_t)) {
|
||||
uint64_t value = *reinterpret_cast<const uint64_t *>(initValue) + incrementValue;
|
||||
BlitHelperFunctions::blitMemoryToAllocation(*pDevice, dst, static_cast<size_t>(relocation.offset),
|
||||
&value, {sizeof(value), 1, 1});
|
||||
patchIncrement<uint64_t>(pDevice, dst, static_cast<size_t>(relocation.offset), initData, incrementValue);
|
||||
} else {
|
||||
uint32_t value = *reinterpret_cast<const uint32_t *>(initValue) + static_cast<uint32_t>(incrementValue);
|
||||
BlitHelperFunctions::blitMemoryToAllocation(*pDevice, dst, static_cast<size_t>(relocation.offset),
|
||||
&value, {sizeof(value), 1, 1});
|
||||
UNRECOVERABLE_IF(patchSize != sizeof(uint32_t));
|
||||
patchIncrement<uint32_t>(pDevice, dst, static_cast<size_t>(relocation.offset), initData, incrementValue);
|
||||
}
|
||||
} else {
|
||||
auto relocAddress = ptrOffset(dst->getUnderlyingBuffer(), static_cast<uintptr_t>(relocation.offset));
|
||||
patchIncrement(relocAddress, patchSize, incrementValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -204,6 +204,9 @@ struct Linker {
|
||||
GraphicsAllocation *globalVariablesSeg, GraphicsAllocation *globalConstantsSeg,
|
||||
std::vector<UnresolvedExternal> &outUnresolvedExternals, Device *pDevice,
|
||||
const void *constantsInitData, const void *variablesInitData);
|
||||
|
||||
template <typename PatchSizeT>
|
||||
void patchIncrement(Device *pDevice, GraphicsAllocation *dstAllocation, size_t relocationOffset, const void *initData, uint64_t incrementValue);
|
||||
};
|
||||
|
||||
std::string constructLinkerErrorMessage(const Linker::UnresolvedExternals &unresolvedExternals, const std::vector<std::string> &instructionsSegmentsNames);
|
||||
|
||||
42
shared/source/compiler_interface/linker.inl
Normal file
42
shared/source/compiler_interface/linker.inl
Normal file
@@ -0,0 +1,42 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/compiler_interface/linker.h"
|
||||
#include "shared/source/device/device.h"
|
||||
#include "shared/source/helpers/blit_commands_helper.h"
|
||||
#include "shared/source/helpers/hw_helper.h"
|
||||
#include "shared/source/helpers/ptr_math.h"
|
||||
#include "shared/source/helpers/string.h"
|
||||
#include "shared/source/memory_manager/graphics_allocation.h"
|
||||
#include "shared/source/memory_manager/memory_manager.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <typename PatchSizeT>
|
||||
void Linker::patchIncrement(Device *pDevice, GraphicsAllocation *dstAllocation, size_t relocationOffset, const void *initData, uint64_t incrementValue) {
|
||||
bool useBlitter = false;
|
||||
|
||||
auto &hwInfo = pDevice->getHardwareInfo();
|
||||
auto &helper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
if (dstAllocation->isAllocatedInLocalMemoryPool() && (helper.isBlitCopyRequiredForLocalMemory(hwInfo) || helper.forceBlitterUseForGlobalBuffers(hwInfo, dstAllocation))) {
|
||||
useBlitter = true;
|
||||
}
|
||||
|
||||
auto initValue = ptrOffset(initData, relocationOffset);
|
||||
|
||||
PatchSizeT value = 0;
|
||||
memcpy_s(&value, sizeof(PatchSizeT), initValue, sizeof(PatchSizeT));
|
||||
value += static_cast<PatchSizeT>(incrementValue);
|
||||
|
||||
if (useBlitter) {
|
||||
BlitHelperFunctions::blitMemoryToAllocation(*pDevice, dstAllocation, relocationOffset, &value, {sizeof(PatchSizeT), 1, 1});
|
||||
} else {
|
||||
pDevice->getMemoryManager()->copyMemoryToAllocation(dstAllocation, relocationOffset, &value, sizeof(PatchSizeT));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
@@ -54,28 +54,16 @@ struct PatchStoreOperation {
|
||||
}
|
||||
};
|
||||
|
||||
struct PatchIncrementOperation {
|
||||
template <typename T>
|
||||
void operator()(T *memory, T value) {
|
||||
*memory += value;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename PatchOperationT = PatchStoreOperation>
|
||||
inline void patchWithRequiredSize(void *memoryToBePatched, uint32_t patchSize, uint64_t patchValue) {
|
||||
if (patchSize == sizeof(uint64_t)) {
|
||||
uint64_t *curbeAddress = reinterpret_cast<uint64_t *>(memoryToBePatched);
|
||||
PatchOperationT{}(curbeAddress, patchValue);
|
||||
PatchStoreOperation{}(curbeAddress, patchValue);
|
||||
} else {
|
||||
uint32_t *curbeAddress = reinterpret_cast<uint32_t *>(memoryToBePatched);
|
||||
PatchOperationT{}(curbeAddress, static_cast<uint32_t>(patchValue));
|
||||
PatchStoreOperation{}(curbeAddress, static_cast<uint32_t>(patchValue));
|
||||
}
|
||||
}
|
||||
|
||||
inline void patchIncrement(void *memoryToBePatched, uint32_t patchSize, uint64_t patchIncrementValue) {
|
||||
patchWithRequiredSize<PatchIncrementOperation>(memoryToBePatched, patchSize, patchIncrementValue);
|
||||
}
|
||||
|
||||
inline uint64_t castToUint64(const void *address) {
|
||||
return static_cast<uint64_t>(reinterpret_cast<uintptr_t>(const_cast<void *>(address)));
|
||||
}
|
||||
|
||||
@@ -571,11 +571,11 @@ HeapIndex MemoryManager::selectHeap(const GraphicsAllocation *allocation, bool h
|
||||
return HeapIndex::HEAP_STANDARD;
|
||||
}
|
||||
|
||||
bool MemoryManager::copyMemoryToAllocation(GraphicsAllocation *graphicsAllocation, const void *memoryToCopy, size_t sizeToCopy) {
|
||||
bool MemoryManager::copyMemoryToAllocation(GraphicsAllocation *graphicsAllocation, size_t destinationOffset, const void *memoryToCopy, size_t sizeToCopy) {
|
||||
if (!graphicsAllocation->getUnderlyingBuffer()) {
|
||||
return false;
|
||||
}
|
||||
memcpy_s(graphicsAllocation->getUnderlyingBuffer(), graphicsAllocation->getUnderlyingBufferSize(), memoryToCopy, sizeToCopy);
|
||||
memcpy_s(ptrOffset(graphicsAllocation->getUnderlyingBuffer(), destinationOffset), (graphicsAllocation->getUnderlyingBufferSize() - destinationOffset), memoryToCopy, sizeToCopy);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@@ -169,7 +169,7 @@ class MemoryManager {
|
||||
void unregisterEngineForCsr(CommandStreamReceiver *commandStreamReceiver);
|
||||
HostPtrManager *getHostPtrManager() const { return hostPtrManager.get(); }
|
||||
void setDefaultEngineIndex(uint32_t index) { defaultEngineIndex = index; }
|
||||
virtual bool copyMemoryToAllocation(GraphicsAllocation *graphicsAllocation, const void *memoryToCopy, size_t sizeToCopy);
|
||||
virtual bool copyMemoryToAllocation(GraphicsAllocation *graphicsAllocation, size_t destinationOffset, const void *memoryToCopy, size_t sizeToCopy);
|
||||
HeapIndex selectHeap(const GraphicsAllocation *allocation, bool hasPointer, bool isFullRangeSVM, bool useExternalWindow);
|
||||
static std::unique_ptr<MemoryManager> createMemoryManager(ExecutionEnvironment &executionEnvironment);
|
||||
virtual void *reserveCpuAddressRange(size_t size, uint32_t rootDeviceIndex) { return nullptr; };
|
||||
|
||||
@@ -53,7 +53,7 @@ class DrmMemoryManager : public MemoryManager {
|
||||
}
|
||||
|
||||
DrmGemCloseWorker *peekGemCloseWorker() const { return this->gemCloseWorker.get(); }
|
||||
bool copyMemoryToAllocation(GraphicsAllocation *graphicsAllocation, const void *memoryToCopy, size_t sizeToCopy) override;
|
||||
bool copyMemoryToAllocation(GraphicsAllocation *graphicsAllocation, size_t destinationOffset, const void *memoryToCopy, size_t sizeToCopy) override;
|
||||
|
||||
int obtainFdFromHandle(int boHandle, uint32_t rootDeviceindex);
|
||||
AddressRange reserveGpuAddress(size_t size, uint32_t rootDeviceIndex) override;
|
||||
|
||||
@@ -33,8 +33,8 @@ void *DrmMemoryManager::lockResourceInLocalMemoryImpl(BufferObject *bo) {
|
||||
void DrmMemoryManager::unlockResourceInLocalMemoryImpl(BufferObject *bo) {
|
||||
}
|
||||
|
||||
bool DrmMemoryManager::copyMemoryToAllocation(GraphicsAllocation *graphicsAllocation, const void *memoryToCopy, size_t sizeToCopy) {
|
||||
return MemoryManager::copyMemoryToAllocation(graphicsAllocation, memoryToCopy, sizeToCopy);
|
||||
bool DrmMemoryManager::copyMemoryToAllocation(GraphicsAllocation *graphicsAllocation, size_t destinationOffset, const void *memoryToCopy, size_t sizeToCopy) {
|
||||
return MemoryManager::copyMemoryToAllocation(graphicsAllocation, destinationOffset, memoryToCopy, sizeToCopy);
|
||||
}
|
||||
|
||||
uint64_t DrmMemoryManager::getLocalMemorySize(uint32_t rootDeviceIndex) {
|
||||
|
||||
@@ -231,9 +231,9 @@ void DrmMemoryManager::unlockResourceInLocalMemoryImpl(BufferObject *bo) {
|
||||
bo->setLockedAddress(nullptr);
|
||||
}
|
||||
|
||||
bool DrmMemoryManager::copyMemoryToAllocation(GraphicsAllocation *graphicsAllocation, const void *memoryToCopy, size_t sizeToCopy) {
|
||||
bool DrmMemoryManager::copyMemoryToAllocation(GraphicsAllocation *graphicsAllocation, size_t destinationOffset, const void *memoryToCopy, size_t sizeToCopy) {
|
||||
if (graphicsAllocation->getUnderlyingBuffer()) {
|
||||
return MemoryManager::copyMemoryToAllocation(graphicsAllocation, memoryToCopy, sizeToCopy);
|
||||
return MemoryManager::copyMemoryToAllocation(graphicsAllocation, destinationOffset, memoryToCopy, sizeToCopy);
|
||||
}
|
||||
auto drmAllocation = static_cast<DrmAllocation *>(graphicsAllocation);
|
||||
for (auto handleId = 0u; handleId < graphicsAllocation->storageInfo.getNumBanks(); handleId++) {
|
||||
@@ -241,7 +241,7 @@ bool DrmMemoryManager::copyMemoryToAllocation(GraphicsAllocation *graphicsAlloca
|
||||
if (!ptr) {
|
||||
return false;
|
||||
}
|
||||
memcpy_s(ptr, graphicsAllocation->getUnderlyingBufferSize(), memoryToCopy, sizeToCopy);
|
||||
memcpy_s(ptrOffset(ptr, destinationOffset), graphicsAllocation->getUnderlyingBufferSize() - destinationOffset, memoryToCopy, sizeToCopy);
|
||||
this->unlockResourceInLocalMemoryImpl(drmAllocation->getBOs()[handleId]);
|
||||
}
|
||||
return true;
|
||||
|
||||
@@ -56,7 +56,7 @@ class WddmMemoryManager : public MemoryManager {
|
||||
|
||||
AlignedMallocRestrictions *getAlignedMallocRestrictions() override;
|
||||
|
||||
bool copyMemoryToAllocation(GraphicsAllocation *graphicsAllocation, const void *memoryToCopy, size_t sizeToCopy) override;
|
||||
bool copyMemoryToAllocation(GraphicsAllocation *graphicsAllocation, size_t destinationOffset, const void *memoryToCopy, size_t sizeToCopy) override;
|
||||
void *reserveCpuAddressRange(size_t size, uint32_t rootDeviceIndex) override;
|
||||
void releaseReservedCpuAddressRange(void *reserved, size_t size, uint32_t rootDeviceIndex) override;
|
||||
bool isCpuCopyRequired(const void *ptr) override;
|
||||
|
||||
@@ -13,8 +13,8 @@ GraphicsAllocation *WddmMemoryManager::allocateGraphicsMemoryInDevicePool(const
|
||||
status = AllocationStatus::RetryInNonDevicePool;
|
||||
return nullptr;
|
||||
}
|
||||
bool WddmMemoryManager::copyMemoryToAllocation(GraphicsAllocation *graphicsAllocation, const void *memoryToCopy, size_t sizeToCopy) {
|
||||
return MemoryManager::copyMemoryToAllocation(graphicsAllocation, memoryToCopy, sizeToCopy);
|
||||
bool WddmMemoryManager::copyMemoryToAllocation(GraphicsAllocation *graphicsAllocation, size_t destinationOffset, const void *memoryToCopy, size_t sizeToCopy) {
|
||||
return MemoryManager::copyMemoryToAllocation(graphicsAllocation, destinationOffset, memoryToCopy, sizeToCopy);
|
||||
}
|
||||
bool WddmMemoryManager::mapGpuVirtualAddress(WddmAllocation *allocation, const void *requiredPtr) {
|
||||
if (allocation->getNumGmms() > 1) {
|
||||
|
||||
@@ -60,7 +60,7 @@ GraphicsAllocation *allocateGlobalsSurface(NEO::SVMAllocsManager *const svmAlloc
|
||||
if (gpuAllocation->isAllocatedInLocalMemoryPool() && helper.isBlitCopyRequiredForLocalMemory(hwInfo)) {
|
||||
success = (BlitHelperFunctions::blitMemoryToAllocation(device, gpuAllocation, 0, initData, {size, 1, 1}) == BlitOperationResult::Success);
|
||||
} else {
|
||||
success = device.getMemoryManager()->copyMemoryToAllocation(gpuAllocation, initData, static_cast<uint32_t>(size));
|
||||
success = device.getMemoryManager()->copyMemoryToAllocation(gpuAllocation, 0, initData, static_cast<uint32_t>(size));
|
||||
}
|
||||
|
||||
UNRECOVERABLE_IF(!success);
|
||||
|
||||
Reference in New Issue
Block a user