performance: reuse allocation data if available

Signed-off-by: Michal Mrozek <michal.mrozek@intel.com>
This commit is contained in:
Michal Mrozek
2025-08-08 08:09:52 +00:00
committed by Compute-Runtime-Automation
parent 04f7664242
commit 52e413f69e
5 changed files with 11 additions and 13 deletions

View File

@@ -127,7 +127,7 @@ struct Kernel : _ze_kernel_handle_t, virtual NEO::DispatchKernelEncoderI, NEO::N
virtual ze_result_t setArgumentValue(uint32_t argIndex, size_t argSize, const void *pArgValue) = 0;
virtual void setGroupCount(uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ) = 0;
virtual ze_result_t setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal, NEO::GraphicsAllocation *allocation, NEO::SvmAllocationData *peerAllocData) = 0;
virtual ze_result_t setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal, NEO::GraphicsAllocation *allocation, NEO::SvmAllocationData *allocData) = 0;
virtual ze_result_t setArgRedescribedImage(uint32_t argIndex, ze_image_handle_t argVal, bool isPacked) = 0;
virtual ze_result_t setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY,
uint32_t groupSizeZ) = 0;

View File

@@ -750,7 +750,7 @@ ze_result_t KernelImp::setArgRedescribedImage(uint32_t argIndex, ze_image_handle
return ZE_RESULT_SUCCESS;
}
ze_result_t KernelImp::setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal, NEO::GraphicsAllocation *allocation, NEO::SvmAllocationData *peerAllocData) {
ze_result_t KernelImp::setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal, NEO::GraphicsAllocation *allocation, NEO::SvmAllocationData *allocData) {
const auto &arg = kernelImmData->getDescriptor().payloadMappings.explicitArgs[argIndex].as<NEO::ArgDescPointer>();
const auto val = argVal;
const int64_t bufferSize = static_cast<int64_t>(allocation->getUnderlyingBufferSize() - (ptrDiff(argVal, allocation->getGpuAddress())));
@@ -767,10 +767,8 @@ ze_result_t KernelImp::setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal
setBufferSurfaceState(argIndex, reinterpret_cast<void *>(val), allocation);
}
NEO::SvmAllocationData *allocData = nullptr;
if (peerAllocData) {
allocData = peerAllocData;
} else {
if (!allocData) {
allocData = this->module->getDevice()->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(reinterpret_cast<void *>(allocation->getGpuAddress()));
}
if (allocData) {
@@ -903,7 +901,7 @@ ze_result_t KernelImp::setArgBuffer(uint32_t argIndex, size_t argSize, const voi
const uint32_t allocId = allocData->getAllocId();
state.kernelArgInfos[argIndex] = KernelArgInfo{requestedAddress, allocId, allocationsCounter, false};
return setArgBufferWithAlloc(argIndex, gpuAddress, alloc, peerAllocData);
return setArgBufferWithAlloc(argIndex, gpuAddress, alloc, peerAllocData ? peerAllocData : allocData);
}
ze_result_t KernelImp::setArgImage(uint32_t argIndex, size_t argSize, const void *argVal) {

View File

@@ -100,7 +100,7 @@ struct KernelImp : Kernel {
ze_result_t setArgRedescribedImage(uint32_t argIndex, ze_image_handle_t argVal, bool isPacked) override;
ze_result_t setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal, NEO::GraphicsAllocation *allocation, NEO::SvmAllocationData *peerAllocData) override;
ze_result_t setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal, NEO::GraphicsAllocation *allocation, NEO::SvmAllocationData *allocData) override;
ze_result_t setArgImage(uint32_t argIndex, size_t argSize, const void *argVal);

View File

@@ -95,7 +95,7 @@ struct Mock<::L0::KernelImp> : public WhiteBox<::L0::KernelImp> {
state.kernelRequiresGenerationOfLocalIdsByRuntime = !forceGenerateLocalIdByHw;
}
}
ze_result_t setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal, NEO::GraphicsAllocation *allocation, NEO::SvmAllocationData *peerAllocData) override {
ze_result_t setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal, NEO::GraphicsAllocation *allocation, NEO::SvmAllocationData *allocData) override {
return ZE_RESULT_SUCCESS;
}

View File

@@ -243,9 +243,9 @@ TEST(KernelArgTest, givenKernelWhenSetArgUnknownCalledThenSuccessRteurned) {
struct MockKernelWithCallTracking : Mock<::L0::KernelImp> {
using ::L0::KernelImp::state;
ze_result_t setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal, NEO::GraphicsAllocation *allocation, NEO::SvmAllocationData *peerAllocData) override {
ze_result_t setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal, NEO::GraphicsAllocation *allocation, NEO::SvmAllocationData *allocData) override {
++setArgBufferWithAllocCalled;
return KernelImp::setArgBufferWithAlloc(argIndex, argVal, allocation, peerAllocData);
return KernelImp::setArgBufferWithAlloc(argIndex, argVal, allocation, allocData);
}
size_t setArgBufferWithAllocCalled = 0u;
@@ -2683,8 +2683,8 @@ struct MyMockKernel : public Mock<KernelImp> {
void setBufferSurfaceState(uint32_t argIndex, void *address, NEO::GraphicsAllocation *alloc) override {
setSurfaceStateCalled = true;
}
ze_result_t setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal, NEO::GraphicsAllocation *allocation, NEO::SvmAllocationData *peerAllocData) override {
return KernelImp::setArgBufferWithAlloc(argIndex, argVal, allocation, peerAllocData);
ze_result_t setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal, NEO::GraphicsAllocation *allocation, NEO::SvmAllocationData *allocData) override {
return KernelImp::setArgBufferWithAlloc(argIndex, argVal, allocation, allocData);
}
bool setSurfaceStateCalled = false;
};