mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-09 22:43:00 +08:00
Fix OCL: don't set multi storage resource for system memory buffers
respect pointer size when patching surfaces in 32 bit applications Related-To: NEO-5735 Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
4b7782d2ab
commit
90cd433766
@@ -127,13 +127,12 @@ inline void patch(const SrcT &src, void *dst, CrossThreadDataOffset dstOffsetByt
|
||||
}
|
||||
}
|
||||
|
||||
void Kernel::patchWithImplicitSurface(void *ptrToPatchInCrossThreadData, GraphicsAllocation &allocation, const ArgDescPointer &arg) {
|
||||
void Kernel::patchWithImplicitSurface(uint64_t ptrToPatchInCrossThreadData, GraphicsAllocation &allocation, const ArgDescPointer &arg) {
|
||||
if ((nullptr != crossThreadData) && isValidOffset(arg.stateless)) {
|
||||
auto pp = ptrOffset(crossThreadData, arg.stateless);
|
||||
uintptr_t addressToPatch = reinterpret_cast<uintptr_t>(ptrToPatchInCrossThreadData);
|
||||
patchWithRequiredSize(pp, arg.pointerSize, addressToPatch);
|
||||
patchWithRequiredSize(pp, arg.pointerSize, ptrToPatchInCrossThreadData);
|
||||
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
|
||||
PatchInfoData patchInfoData(addressToPatch, 0u, PatchInfoAllocationType::KernelArg, reinterpret_cast<uint64_t>(crossThreadData), arg.stateless, PatchInfoAllocationType::IndirectObjectHeap, arg.pointerSize);
|
||||
PatchInfoData patchInfoData(ptrToPatchInCrossThreadData, 0u, PatchInfoAllocationType::KernelArg, reinterpret_cast<uint64_t>(crossThreadData), arg.stateless, PatchInfoAllocationType::IndirectObjectHeap, arg.pointerSize);
|
||||
this->patchInfoDataList.push_back(patchInfoData);
|
||||
}
|
||||
}
|
||||
@@ -227,18 +226,18 @@ cl_int Kernel::initialize() {
|
||||
|
||||
if (isValidOffset(kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress.stateless)) {
|
||||
DEBUG_BREAK_IF(program->getConstantSurface(rootDeviceIndex) == nullptr);
|
||||
uintptr_t constMemory = isBuiltIn ? (uintptr_t)program->getConstantSurface(rootDeviceIndex)->getUnderlyingBuffer() : (uintptr_t)program->getConstantSurface(rootDeviceIndex)->getGpuAddressToPatch();
|
||||
uint64_t constMemory = isBuiltIn ? castToUint64(program->getConstantSurface(rootDeviceIndex)->getUnderlyingBuffer()) : program->getConstantSurface(rootDeviceIndex)->getGpuAddressToPatch();
|
||||
|
||||
const auto &arg = kernelDescriptor.payloadMappings.implicitArgs.globalConstantsSurfaceAddress;
|
||||
patchWithImplicitSurface(reinterpret_cast<void *>(constMemory), *program->getConstantSurface(rootDeviceIndex), arg);
|
||||
patchWithImplicitSurface(constMemory, *program->getConstantSurface(rootDeviceIndex), arg);
|
||||
}
|
||||
|
||||
if (isValidOffset(kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress.stateless)) {
|
||||
DEBUG_BREAK_IF(program->getGlobalSurface(rootDeviceIndex) == nullptr);
|
||||
uintptr_t globalMemory = isBuiltIn ? (uintptr_t)program->getGlobalSurface(rootDeviceIndex)->getUnderlyingBuffer() : (uintptr_t)program->getGlobalSurface(rootDeviceIndex)->getGpuAddressToPatch();
|
||||
uint64_t globalMemory = isBuiltIn ? castToUint64(program->getGlobalSurface(rootDeviceIndex)->getUnderlyingBuffer()) : program->getGlobalSurface(rootDeviceIndex)->getGpuAddressToPatch();
|
||||
|
||||
const auto &arg = kernelDescriptor.payloadMappings.implicitArgs.globalVariablesSurfaceAddress;
|
||||
patchWithImplicitSurface(reinterpret_cast<void *>(globalMemory), *program->getGlobalSurface(rootDeviceIndex), arg);
|
||||
patchWithImplicitSurface(globalMemory, *program->getGlobalSurface(rootDeviceIndex), arg);
|
||||
}
|
||||
|
||||
// Patch Surface State Heap
|
||||
@@ -364,7 +363,7 @@ cl_int Kernel::patchPrivateSurface() {
|
||||
}
|
||||
|
||||
const auto &privateMemoryAddress = kernelDescriptor.payloadMappings.implicitArgs.privateMemoryAddress;
|
||||
patchWithImplicitSurface(reinterpret_cast<void *>(privateSurface->getGpuAddressToPatch()), *privateSurface, privateMemoryAddress);
|
||||
patchWithImplicitSurface(privateSurface->getGpuAddressToPatch(), *privateSurface, privateMemoryAddress);
|
||||
}
|
||||
return CL_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -465,7 +465,7 @@ class Kernel : public ReferenceTrackedObject<Kernel> {
|
||||
|
||||
void *patchBufferOffset(const ArgDescPointer &argAsPtr, void *svmPtr, GraphicsAllocation *svmAlloc);
|
||||
|
||||
void patchWithImplicitSurface(void *ptrToPatchInCrossThreadData, GraphicsAllocation &allocation, const ArgDescPointer &arg);
|
||||
void patchWithImplicitSurface(uint64_t ptrToPatchInCrossThreadData, GraphicsAllocation &allocation, const ArgDescPointer &arg);
|
||||
|
||||
void provideInitializationHints();
|
||||
|
||||
|
||||
@@ -470,7 +470,7 @@ Buffer *Buffer::create(Context *context,
|
||||
auto allocation = allocationInfo.memory;
|
||||
auto memoryStorage = allocation->getUnderlyingBuffer();
|
||||
if (context->getRootDeviceIndices().size() > 1) {
|
||||
multiGraphicsAllocation.setMultiStorage(true);
|
||||
multiGraphicsAllocation.setMultiStorage(!MemoryPoolHelper::isSystemMemoryPool(allocation->getMemoryPool()));
|
||||
}
|
||||
|
||||
auto pBuffer = createBufferHw(context,
|
||||
@@ -669,7 +669,7 @@ Buffer *Buffer::createSubBuffer(cl_mem_flags flags,
|
||||
uint64_t Buffer::setArgStateless(void *memory, uint32_t patchSize, uint32_t rootDeviceIndex, bool set32BitAddressing) {
|
||||
// Subbuffers have offset that graphicsAllocation is not aware of
|
||||
auto graphicsAllocation = multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex);
|
||||
uintptr_t addressToPatch = ((set32BitAddressing) ? static_cast<uintptr_t>(graphicsAllocation->getGpuAddressToPatch()) : static_cast<uintptr_t>(graphicsAllocation->getGpuAddress())) + this->offset;
|
||||
auto addressToPatch = ((set32BitAddressing) ? graphicsAllocation->getGpuAddressToPatch() : graphicsAllocation->getGpuAddress()) + this->offset;
|
||||
DEBUG_BREAK_IF(!(graphicsAllocation->isLocked() || (addressToPatch != 0) || (graphicsAllocation->getGpuBaseAddress() != 0) ||
|
||||
(this->getCpuAddress() == nullptr && graphicsAllocation->peekSharedHandle())));
|
||||
|
||||
|
||||
@@ -126,7 +126,7 @@ cl_int Pipe::getPipeInfo(cl_image_info paramName,
|
||||
}
|
||||
|
||||
void Pipe::setPipeArg(void *memory, uint32_t patchSize, uint32_t rootDeviceIndex) {
|
||||
patchWithRequiredSize(memory, patchSize, static_cast<uintptr_t>(multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex)->getGpuAddressToPatch()));
|
||||
patchWithRequiredSize(memory, patchSize, multiGraphicsAllocation.getGraphicsAllocation(rootDeviceIndex)->getGpuAddressToPatch());
|
||||
}
|
||||
|
||||
Pipe::~Pipe() = default;
|
||||
|
||||
Reference in New Issue
Block a user