performance: apply stateful programming only if needed

Signed-off-by: Michal Mrozek <michal.mrozek@intel.com>
This commit is contained in:
Michal Mrozek
2025-10-20 15:09:57 +00:00
committed by Compute-Runtime-Automation
parent 5df120ddbc
commit 6cdb73ccca

View File

@@ -984,7 +984,11 @@ cl_int Kernel::setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocatio
patchWithRequiredSize(patchLocation, argAsPtr.pointerSize, reinterpret_cast<uintptr_t>(svmPtr));
auto &kernelArgInfo = kernelArguments[argIndex];
const bool argWasUncacheable = kernelArgInfo.isStatelessUncacheable;
const bool argIsUncacheable = svmAlloc ? svmAlloc->isUncacheable() : false;
statelessUncacheableArgsCount += (argIsUncacheable ? 1 : 0) - (argWasUncacheable ? 1 : 0);
if (isValidOffset(argAsPtr.bindful) || isValidOffset(argAsPtr.bindless)) {
bool disableL3 = false;
bool forceNonAuxMode = false;
const bool isAuxTranslationKernel = (AuxTranslationDirection::none != auxTranslationDirection);
@@ -1001,10 +1005,6 @@ cl_int Kernel::setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocatio
forceNonAuxMode = true;
}
const bool argWasUncacheable = kernelArgInfo.isStatelessUncacheable;
const bool argIsUncacheable = svmAlloc ? svmAlloc->isUncacheable() : false;
statelessUncacheableArgsCount += (argIsUncacheable ? 1 : 0) - (argWasUncacheable ? 1 : 0);
void *ptrToPatch = patchBufferOffset(argAsPtr, svmPtr, svmAlloc);
if (isValidOffset(argAsPtr.bindful)) {
auto surfaceState = ptrOffset(getSurfaceStateHeap(), argAsPtr.bindful);
@@ -1017,7 +1017,7 @@ cl_int Kernel::setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocatio
}
Buffer::setSurfaceState(&getDevice().getDevice(), surfaceState, forceNonAuxMode, disableL3, allocSize, ptrToPatch, offset, svmAlloc, 0, 0,
areMultipleSubDevicesInContext());
} else if (isValidOffset(argAsPtr.bindless)) {
} else { // bindless
size_t allocSize = 0;
size_t offset = 0;
if (svmAlloc != nullptr) {
@@ -1036,6 +1036,7 @@ cl_int Kernel::setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocatio
areMultipleSubDevicesInContext());
}
}
}
storeKernelArg(argIndex, SVM_ALLOC_OBJ, svmAlloc, svmPtr, sizeof(uintptr_t));
kernelArgInfo.allocId = allocId;