Linker: Fix incremental patching for local memory allocations

Change-Id: Ib85e4a2abc8a62477003853aa0c35f8107444f4e
Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski
2020-10-07 15:32:03 +02:00
committed by sys_ocldev
parent 2643346b48
commit fb0651521d
24 changed files with 115 additions and 73 deletions

View File

@@ -15,6 +15,7 @@ set(NEO_COMPILER_INTERFACE
${CMAKE_CURRENT_SOURCE_DIR}/default_cache_config.h
${CMAKE_CURRENT_SOURCE_DIR}/intermediate_representations.h
${CMAKE_CURRENT_SOURCE_DIR}/linker.h
${CMAKE_CURRENT_SOURCE_DIR}/linker.inl
${CMAKE_CURRENT_SOURCE_DIR}/linker.cpp
${CMAKE_CURRENT_SOURCE_DIR}/compiler_options/compiler_options_base.h
${CMAKE_CURRENT_SOURCE_DIR}/compiler_options/compiler_options_base.cpp

View File

@@ -5,8 +5,9 @@
*
*/
#include "linker.h"
#include "shared/source/compiler_interface/linker.h"
#include "shared/source/compiler_interface/linker.inl"
#include "shared/source/device/device.h"
#include "shared/source/helpers/blit_commands_helper.h"
#include "shared/source/helpers/debug_helpers.h"
@@ -269,29 +270,13 @@ void Linker::patchDataSegments(const SegmentInfo &globalVariablesSegInfo, const
? static_cast<uint32_t>(gpuAddressAs64bit & 0xffffffff)
: gpuAddressAs64bit;
bool useBlitter = false;
if (pDevice && initData) {
auto &hwInfo = pDevice->getHardwareInfo();
auto &helper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
if (dst->isAllocatedInLocalMemoryPool() && (helper.isBlitCopyRequiredForLocalMemory(hwInfo) || helper.forceBlitterUseForGlobalBuffers(hwInfo, dst))) {
useBlitter = true;
}
}
if (useBlitter) {
auto initValue = ptrOffset(initData, static_cast<uintptr_t>(relocation.offset));
if (initData) {
if (patchSize == sizeof(uint64_t)) {
uint64_t value = *reinterpret_cast<const uint64_t *>(initValue) + incrementValue;
BlitHelperFunctions::blitMemoryToAllocation(*pDevice, dst, static_cast<size_t>(relocation.offset),
&value, {sizeof(value), 1, 1});
patchIncrement<uint64_t>(pDevice, dst, static_cast<size_t>(relocation.offset), initData, incrementValue);
} else {
uint32_t value = *reinterpret_cast<const uint32_t *>(initValue) + static_cast<uint32_t>(incrementValue);
BlitHelperFunctions::blitMemoryToAllocation(*pDevice, dst, static_cast<size_t>(relocation.offset),
&value, {sizeof(value), 1, 1});
UNRECOVERABLE_IF(patchSize != sizeof(uint32_t));
patchIncrement<uint32_t>(pDevice, dst, static_cast<size_t>(relocation.offset), initData, incrementValue);
}
} else {
auto relocAddress = ptrOffset(dst->getUnderlyingBuffer(), static_cast<uintptr_t>(relocation.offset));
patchIncrement(relocAddress, patchSize, incrementValue);
}
}
}

View File

@@ -204,6 +204,9 @@ struct Linker {
GraphicsAllocation *globalVariablesSeg, GraphicsAllocation *globalConstantsSeg,
std::vector<UnresolvedExternal> &outUnresolvedExternals, Device *pDevice,
const void *constantsInitData, const void *variablesInitData);
template <typename PatchSizeT>
void patchIncrement(Device *pDevice, GraphicsAllocation *dstAllocation, size_t relocationOffset, const void *initData, uint64_t incrementValue);
};
std::string constructLinkerErrorMessage(const Linker::UnresolvedExternals &unresolvedExternals, const std::vector<std::string> &instructionsSegmentsNames);

View File

@@ -0,0 +1,42 @@
/*
* Copyright (C) 2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/compiler_interface/linker.h"
#include "shared/source/device/device.h"
#include "shared/source/helpers/blit_commands_helper.h"
#include "shared/source/helpers/hw_helper.h"
#include "shared/source/helpers/ptr_math.h"
#include "shared/source/helpers/string.h"
#include "shared/source/memory_manager/graphics_allocation.h"
#include "shared/source/memory_manager/memory_manager.h"
namespace NEO {
template <typename PatchSizeT>
void Linker::patchIncrement(Device *pDevice, GraphicsAllocation *dstAllocation, size_t relocationOffset, const void *initData, uint64_t incrementValue) {
bool useBlitter = false;
auto &hwInfo = pDevice->getHardwareInfo();
auto &helper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
if (dstAllocation->isAllocatedInLocalMemoryPool() && (helper.isBlitCopyRequiredForLocalMemory(hwInfo) || helper.forceBlitterUseForGlobalBuffers(hwInfo, dstAllocation))) {
useBlitter = true;
}
auto initValue = ptrOffset(initData, relocationOffset);
PatchSizeT value = 0;
memcpy_s(&value, sizeof(PatchSizeT), initValue, sizeof(PatchSizeT));
value += static_cast<PatchSizeT>(incrementValue);
if (useBlitter) {
BlitHelperFunctions::blitMemoryToAllocation(*pDevice, dstAllocation, relocationOffset, &value, {sizeof(PatchSizeT), 1, 1});
} else {
pDevice->getMemoryManager()->copyMemoryToAllocation(dstAllocation, relocationOffset, &value, sizeof(PatchSizeT));
}
}
} // namespace NEO

View File

@@ -54,28 +54,16 @@ struct PatchStoreOperation {
}
};
struct PatchIncrementOperation {
template <typename T>
void operator()(T *memory, T value) {
*memory += value;
}
};
template <typename PatchOperationT = PatchStoreOperation>
inline void patchWithRequiredSize(void *memoryToBePatched, uint32_t patchSize, uint64_t patchValue) {
if (patchSize == sizeof(uint64_t)) {
uint64_t *curbeAddress = reinterpret_cast<uint64_t *>(memoryToBePatched);
PatchOperationT{}(curbeAddress, patchValue);
PatchStoreOperation{}(curbeAddress, patchValue);
} else {
uint32_t *curbeAddress = reinterpret_cast<uint32_t *>(memoryToBePatched);
PatchOperationT{}(curbeAddress, static_cast<uint32_t>(patchValue));
PatchStoreOperation{}(curbeAddress, static_cast<uint32_t>(patchValue));
}
}
inline void patchIncrement(void *memoryToBePatched, uint32_t patchSize, uint64_t patchIncrementValue) {
patchWithRequiredSize<PatchIncrementOperation>(memoryToBePatched, patchSize, patchIncrementValue);
}
inline uint64_t castToUint64(const void *address) {
return static_cast<uint64_t>(reinterpret_cast<uintptr_t>(const_cast<void *>(address)));
}

View File

@@ -571,11 +571,11 @@ HeapIndex MemoryManager::selectHeap(const GraphicsAllocation *allocation, bool h
return HeapIndex::HEAP_STANDARD;
}
bool MemoryManager::copyMemoryToAllocation(GraphicsAllocation *graphicsAllocation, const void *memoryToCopy, size_t sizeToCopy) {
bool MemoryManager::copyMemoryToAllocation(GraphicsAllocation *graphicsAllocation, size_t destinationOffset, const void *memoryToCopy, size_t sizeToCopy) {
if (!graphicsAllocation->getUnderlyingBuffer()) {
return false;
}
memcpy_s(graphicsAllocation->getUnderlyingBuffer(), graphicsAllocation->getUnderlyingBufferSize(), memoryToCopy, sizeToCopy);
memcpy_s(ptrOffset(graphicsAllocation->getUnderlyingBuffer(), destinationOffset), (graphicsAllocation->getUnderlyingBufferSize() - destinationOffset), memoryToCopy, sizeToCopy);
return true;
}

View File

@@ -169,7 +169,7 @@ class MemoryManager {
void unregisterEngineForCsr(CommandStreamReceiver *commandStreamReceiver);
HostPtrManager *getHostPtrManager() const { return hostPtrManager.get(); }
void setDefaultEngineIndex(uint32_t index) { defaultEngineIndex = index; }
virtual bool copyMemoryToAllocation(GraphicsAllocation *graphicsAllocation, const void *memoryToCopy, size_t sizeToCopy);
virtual bool copyMemoryToAllocation(GraphicsAllocation *graphicsAllocation, size_t destinationOffset, const void *memoryToCopy, size_t sizeToCopy);
HeapIndex selectHeap(const GraphicsAllocation *allocation, bool hasPointer, bool isFullRangeSVM, bool useExternalWindow);
static std::unique_ptr<MemoryManager> createMemoryManager(ExecutionEnvironment &executionEnvironment);
virtual void *reserveCpuAddressRange(size_t size, uint32_t rootDeviceIndex) { return nullptr; };

View File

@@ -53,7 +53,7 @@ class DrmMemoryManager : public MemoryManager {
}
DrmGemCloseWorker *peekGemCloseWorker() const { return this->gemCloseWorker.get(); }
bool copyMemoryToAllocation(GraphicsAllocation *graphicsAllocation, const void *memoryToCopy, size_t sizeToCopy) override;
bool copyMemoryToAllocation(GraphicsAllocation *graphicsAllocation, size_t destinationOffset, const void *memoryToCopy, size_t sizeToCopy) override;
int obtainFdFromHandle(int boHandle, uint32_t rootDeviceindex);
AddressRange reserveGpuAddress(size_t size, uint32_t rootDeviceIndex) override;

View File

@@ -33,8 +33,8 @@ void *DrmMemoryManager::lockResourceInLocalMemoryImpl(BufferObject *bo) {
void DrmMemoryManager::unlockResourceInLocalMemoryImpl(BufferObject *bo) {
}
bool DrmMemoryManager::copyMemoryToAllocation(GraphicsAllocation *graphicsAllocation, const void *memoryToCopy, size_t sizeToCopy) {
return MemoryManager::copyMemoryToAllocation(graphicsAllocation, memoryToCopy, sizeToCopy);
bool DrmMemoryManager::copyMemoryToAllocation(GraphicsAllocation *graphicsAllocation, size_t destinationOffset, const void *memoryToCopy, size_t sizeToCopy) {
return MemoryManager::copyMemoryToAllocation(graphicsAllocation, destinationOffset, memoryToCopy, sizeToCopy);
}
uint64_t DrmMemoryManager::getLocalMemorySize(uint32_t rootDeviceIndex) {

View File

@@ -231,9 +231,9 @@ void DrmMemoryManager::unlockResourceInLocalMemoryImpl(BufferObject *bo) {
bo->setLockedAddress(nullptr);
}
bool DrmMemoryManager::copyMemoryToAllocation(GraphicsAllocation *graphicsAllocation, const void *memoryToCopy, size_t sizeToCopy) {
bool DrmMemoryManager::copyMemoryToAllocation(GraphicsAllocation *graphicsAllocation, size_t destinationOffset, const void *memoryToCopy, size_t sizeToCopy) {
if (graphicsAllocation->getUnderlyingBuffer()) {
return MemoryManager::copyMemoryToAllocation(graphicsAllocation, memoryToCopy, sizeToCopy);
return MemoryManager::copyMemoryToAllocation(graphicsAllocation, destinationOffset, memoryToCopy, sizeToCopy);
}
auto drmAllocation = static_cast<DrmAllocation *>(graphicsAllocation);
for (auto handleId = 0u; handleId < graphicsAllocation->storageInfo.getNumBanks(); handleId++) {
@@ -241,7 +241,7 @@ bool DrmMemoryManager::copyMemoryToAllocation(GraphicsAllocation *graphicsAlloca
if (!ptr) {
return false;
}
memcpy_s(ptr, graphicsAllocation->getUnderlyingBufferSize(), memoryToCopy, sizeToCopy);
memcpy_s(ptrOffset(ptr, destinationOffset), graphicsAllocation->getUnderlyingBufferSize() - destinationOffset, memoryToCopy, sizeToCopy);
this->unlockResourceInLocalMemoryImpl(drmAllocation->getBOs()[handleId]);
}
return true;

View File

@@ -56,7 +56,7 @@ class WddmMemoryManager : public MemoryManager {
AlignedMallocRestrictions *getAlignedMallocRestrictions() override;
bool copyMemoryToAllocation(GraphicsAllocation *graphicsAllocation, const void *memoryToCopy, size_t sizeToCopy) override;
bool copyMemoryToAllocation(GraphicsAllocation *graphicsAllocation, size_t destinationOffset, const void *memoryToCopy, size_t sizeToCopy) override;
void *reserveCpuAddressRange(size_t size, uint32_t rootDeviceIndex) override;
void releaseReservedCpuAddressRange(void *reserved, size_t size, uint32_t rootDeviceIndex) override;
bool isCpuCopyRequired(const void *ptr) override;

View File

@@ -13,8 +13,8 @@ GraphicsAllocation *WddmMemoryManager::allocateGraphicsMemoryInDevicePool(const
status = AllocationStatus::RetryInNonDevicePool;
return nullptr;
}
bool WddmMemoryManager::copyMemoryToAllocation(GraphicsAllocation *graphicsAllocation, const void *memoryToCopy, size_t sizeToCopy) {
return MemoryManager::copyMemoryToAllocation(graphicsAllocation, memoryToCopy, sizeToCopy);
bool WddmMemoryManager::copyMemoryToAllocation(GraphicsAllocation *graphicsAllocation, size_t destinationOffset, const void *memoryToCopy, size_t sizeToCopy) {
return MemoryManager::copyMemoryToAllocation(graphicsAllocation, destinationOffset, memoryToCopy, sizeToCopy);
}
bool WddmMemoryManager::mapGpuVirtualAddress(WddmAllocation *allocation, const void *requiredPtr) {
if (allocation->getNumGmms() > 1) {

View File

@@ -60,7 +60,7 @@ GraphicsAllocation *allocateGlobalsSurface(NEO::SVMAllocsManager *const svmAlloc
if (gpuAllocation->isAllocatedInLocalMemoryPool() && helper.isBlitCopyRequiredForLocalMemory(hwInfo)) {
success = (BlitHelperFunctions::blitMemoryToAllocation(device, gpuAllocation, 0, initData, {size, 1, 1}) == BlitOperationResult::Success);
} else {
success = device.getMemoryManager()->copyMemoryToAllocation(gpuAllocation, initData, static_cast<uint32_t>(size));
success = device.getMemoryManager()->copyMemoryToAllocation(gpuAllocation, 0, initData, static_cast<uint32_t>(size));
}
UNRECOVERABLE_IF(!success);