Fix OCL: don't set multi storage resource for system memory buffers

respect pointer size when patching surfaces in 32 bit applications

Related-To: NEO-5735
Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
Mateusz Jablonski
2023-02-15 11:02:02 +00:00
committed by Compute-Runtime-Automation
parent 4b7782d2ab
commit 90cd433766
14 changed files with 86 additions and 46 deletions

View File

@@ -127,13 +127,12 @@ inline void patch(const SrcT &src, void *dst, CrossThreadDataOffset dstOffsetByt
}
}
void Kernel::patchWithImplicitSurface(void *ptrToPatchInCrossThreadData, GraphicsAllocation &allocation, const ArgDescPointer &arg) {
void Kernel::patchWithImplicitSurface(uint64_t ptrToPatchInCrossThreadData, GraphicsAllocation &allocation, const ArgDescPointer &arg) {
if ((nullptr != crossThreadData) && isValidOffset(arg.stateless)) {
auto pp = ptrOffset(crossThreadData, arg.stateless);
uintptr_t addressToPatch = reinterpret_cast<uintptr_t>(ptrToPatchInCrossThreadData);
patchWithRequiredSize(pp, arg.pointerSize, addressToPatch);
patchWithRequiredSize(pp, arg.pointerSize, ptrToPatchInCrossThreadData);
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
PatchInfoData patchInfoData(addressToPatch, 0u, PatchInfoAllocationType::KernelArg, reinterpret_cast<uint64_t>(crossThreadData), arg.stateless, PatchInfoAllocationType::IndirectObjectHeap, arg.pointerSize);
PatchInfoData patchInfoData(ptrToPatchInCrossThreadData, 0u, PatchInfoAllocationType::KernelArg, reinterpret_cast<uint64_t>(crossThreadData), arg.stateless, PatchInfoAllocationType::IndirectObjectHeap, arg.pointerSize);
this->patchInfoDataList.push_back(patchInfoData);
}
}
@@ -227,18 +226,18 @@ cl_int Kernel::initialize() {
if (isValidOffset(kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.stateless)) {
DEBUG_BREAK_IF(program->getConstantSurface(rootDeviceIndex) == nullptr);
uintptr_t constMemory = isBuiltIn ? (uintptr_t)program->getConstantSurface(rootDeviceIndex)->getUnderlyingBuffer() : (uintptr_t)program->getConstantSurface(rootDeviceIndex)->getGpuAddressToPatch();
uint64_t constMemory = isBuiltIn ? castToUint64(program->getConstantSurface(rootDeviceIndex)->getUnderlyingBuffer()) : program->getConstantSurface(rootDeviceIndex)->getGpuAddressToPatch();
const auto &arg = kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress;
patchWithImplicitSurface(reinterpret_cast<void *>(constMemory), *program->getConstantSurface(rootDeviceIndex), arg);
patchWithImplicitSurface(constMemory, *program->getConstantSurface(rootDeviceIndex), arg);
}
if (isValidOffset(kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.stateless)) {
DEBUG_BREAK_IF(program->getGlobalSurface(rootDeviceIndex) == nullptr);
uintptr_t globalMemory = isBuiltIn ? (uintptr_t)program->getGlobalSurface(rootDeviceIndex)->getUnderlyingBuffer() : (uintptr_t)program->getGlobalSurface(rootDeviceIndex)->getGpuAddressToPatch();
uint64_t globalMemory = isBuiltIn ? castToUint64(program->getGlobalSurface(rootDeviceIndex)->getUnderlyingBuffer()) : program->getGlobalSurface(rootDeviceIndex)->getGpuAddressToPatch();
const auto &arg = kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress;
patchWithImplicitSurface(reinterpret_cast<void *>(globalMemory), *program->getGlobalSurface(rootDeviceIndex), arg);
patchWithImplicitSurface(globalMemory, *program->getGlobalSurface(rootDeviceIndex), arg);
}
// Patch Surface State Heap
@@ -364,7 +363,7 @@ cl_int Kernel::patchPrivateSurface() {
}
const auto &privateMemoryAddress = kernelDescriptor.payloadMappings.implicitArgs.privateMemoryAddress;
patchWithImplicitSurface(reinterpret_cast<void *>(privateSurface->getGpuAddressToPatch()), *privateSurface, privateMemoryAddress);
patchWithImplicitSurface(privateSurface->getGpuAddressToPatch(), *privateSurface, privateMemoryAddress);
}
return CL_SUCCESS;
}

View File

@@ -465,7 +465,7 @@ class Kernel : public ReferenceTrackedObject<Kernel> {
void *patchBufferOffset(const ArgDescPointer &argAsPtr, void *svmPtr, GraphicsAllocation *svmAlloc);
void patchWithImplicitSurface(void *ptrToPatchInCrossThreadData, GraphicsAllocation &allocation, const ArgDescPointer &arg);
void patchWithImplicitSurface(uint64_t ptrToPatchInCrossThreadData, GraphicsAllocation &allocation, const ArgDescPointer &arg);
void provideInitializationHints();

View File

@@ -470,7 +470,7 @@ Buffer *Buffer::create(Context *context,
auto allocation = allocationInfo.memory;
auto memoryStorage = allocation->getUnderlyingBuffer();
if (context->getRootDeviceIndices().size() > 1) {
multiGraphicsAllocation.setMultiStorage(true);
multiGraphicsAllocation.setMultiStorage(!MemoryPoolHelper::isSystemMemoryPool(allocation->getMemoryPool()));
}
auto pBuffer = createBufferHw(context,
@@ -669,7 +669,7 @@ Buffer *Buffer::createSubBuffer(cl_mem_flags flags,
uint64_t Buffer::setArgStateless(void *memory, uint32_t patchSize, uint32_t rootDeviceIndex, bool set32BitAddressing) {
// Subbuffers have offset that graphicsAllocation is not aware of
auto graphicsAllocation = multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex);
uintptr_t addressToPatch = ((set32BitAddressing) ? static_cast<uintptr_t>(graphicsAllocation->getGpuAddressToPatch()) : static_cast<uintptr_t>(graphicsAllocation->getGpuAddress())) + this->offset;
auto addressToPatch = ((set32BitAddressing) ? graphicsAllocation->getGpuAddressToPatch() : graphicsAllocation->getGpuAddress()) + this->offset;
DEBUG_BREAK_IF(!(graphicsAllocation->isLocked() || (addressToPatch != 0) || (graphicsAllocation->getGpuBaseAddress() != 0) ||
(this->getCpuAddress() == nullptr && graphicsAllocation->peekSharedHandle())));

View File

@@ -126,7 +126,7 @@ cl_int Pipe::getPipeInfo(cl_image_info paramName,
}
void Pipe::setPipeArg(void *memory, uint32_t patchSize, uint32_t rootDeviceIndex) {
patchWithRequiredSize(memory, patchSize, static_cast<uintptr_t>(multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex)->getGpuAddressToPatch()));
patchWithRequiredSize(memory, patchSize, multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex)->getGpuAddressToPatch());
}
Pipe::~Pipe() = default;