diff --git a/level_zero/core/source/kernel/kernel_imp.cpp b/level_zero/core/source/kernel/kernel_imp.cpp index a636a8df0f..75a20128cf 100644 --- a/level_zero/core/source/kernel/kernel_imp.cpp +++ b/level_zero/core/source/kernel/kernel_imp.cpp @@ -546,6 +546,33 @@ ze_result_t KernelImp::setArgUnknown(uint32_t argIndex, size_t argSize, const vo } ze_result_t KernelImp::setArgBuffer(uint32_t argIndex, size_t argSize, const void *argVal) { + const auto device = static_cast(this->module->getDevice()); + const auto driverHandle = static_cast(device->getDriverHandle()); + const auto svmAllocsManager = driverHandle->getSvmAllocsManager(); + const auto allocationsCounter = svmAllocsManager->allocationsCounter.load(); + NEO::SvmAllocationData *allocData = nullptr; + if (argVal != nullptr) { + const auto &argInfo = this->kernelArgInfos[argIndex]; + if (argInfo.allocId > 0 && argVal == argInfo.value) { + bool reuseFromCache = false; + if (allocationsCounter > 0) { + if (allocationsCounter == argInfo.allocIdMemoryManagerCounter) { + reuseFromCache = true; + } else { + const auto requestedAddress = *reinterpret_cast(argVal); + allocData = svmAllocsManager->getSVMAlloc(requestedAddress); + if (allocData && allocData->getAllocId() == argInfo.allocId) { + reuseFromCache = true; + this->kernelArgInfos[argIndex].allocIdMemoryManagerCounter = allocationsCounter; + } + } + if (reuseFromCache) { + return ZE_RESULT_SUCCESS; + } + } + } + } + const auto &allArgs = kernelImmData->getDescriptor().payloadMappings.explicitArgs; const auto &currArg = allArgs[argIndex]; if (currArg.getTraits().getAddressQualifier() == NEO::KernelArgMetadata::AddrLocal) { @@ -578,16 +605,16 @@ ze_result_t KernelImp::setArgBuffer(uint32_t argIndex, size_t argSize, const voi NEO::patchPointer(ArrayRef(crossThreadData.get(), crossThreadDataSize), arg, nullBufferValue); return ZE_RESULT_SUCCESS; } - - auto requestedAddress = *reinterpret_cast(argVal); + const auto requestedAddress = *reinterpret_cast(argVal); uintptr_t gpuAddress = 0u; - NEO::GraphicsAllocation *alloc = module->getDevice()->getDriverHandle()->getDriverSystemMemoryAllocation(requestedAddress, - 1u, - module->getDevice()->getRootDeviceIndex(), - &gpuAddress); - DeviceImp *device = static_cast(this->module->getDevice()); - DriverHandleImp *driverHandle = static_cast(device->getDriverHandle()); - auto allocData = driverHandle->getSvmAllocsManager()->getSVMAlloc(requestedAddress); + NEO::GraphicsAllocation *alloc = driverHandle->getDriverSystemMemoryAllocation(requestedAddress, + 1u, + device->getRootDeviceIndex(), + &gpuAddress); + + if (allocData == nullptr) { + allocData = svmAllocsManager->getSVMAlloc(requestedAddress); + } if (driverHandle->isRemoteResourceNeeded(requestedAddress, alloc, allocData, device)) { if (allocData == nullptr) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; @@ -603,6 +630,9 @@ ze_result_t KernelImp::setArgBuffer(uint32_t argIndex, size_t argSize, const voi gpuAddress += offset; } + const uint32_t allocId = allocData ? allocData->getAllocId() : 0u; + kernelArgInfos[argIndex] = KernelArgInfo{argVal, allocId, allocationsCounter}; + return setArgBufferWithAlloc(argIndex, gpuAddress, alloc); } @@ -786,7 +816,7 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) { } slmArgSizes.resize(this->kernelArgHandlers.size(), 0); - + kernelArgInfos.resize(this->kernelArgHandlers.size(), {}); isArgUncached.resize(this->kernelArgHandlers.size(), 0); if (kernelImmData->getSurfaceStateHeapSize() > 0) { diff --git a/level_zero/core/source/kernel/kernel_imp.h b/level_zero/core/source/kernel/kernel_imp.h index 95c117f0c5..7b92baea93 100644 --- a/level_zero/core/source/kernel/kernel_imp.h +++ b/level_zero/core/source/kernel/kernel_imp.h @@ -17,6 +17,12 @@ namespace L0 { +struct KernelArgInfo { + const void *value; + uint32_t allocId; + uint32_t allocIdMemoryManagerCounter; +}; + struct KernelImp : Kernel { KernelImp(Module *module); @@ -172,6 +178,7 @@ struct KernelImp : Kernel { Module *module = nullptr; typedef ze_result_t (KernelImp::*KernelArgHandler)(uint32_t argIndex, size_t argSize, const void *argVal); + std::vector kernelArgInfos; std::vector kernelArgHandlers; std::vector residencyContainer; diff --git a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp index 582130f79f..dd49679ede 100644 --- a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp +++ b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp @@ -67,6 +67,70 @@ TEST(KernelArgTest, givenKernelWhenSetArgUnknownCalledThenSuccessRteurned) { EXPECT_EQ(mockKernel.setArgUnknown(0, 0, nullptr), ZE_RESULT_SUCCESS); } +struct MockKernelWithCallTracking : Mock<::L0::Kernel> { + using ::L0::KernelImp::kernelArgInfos; + + ze_result_t setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal, NEO::GraphicsAllocation *allocation) override { + ++setArgBufferWithAllocCalled; + return KernelImp::setArgBufferWithAlloc(argIndex, argVal, allocation); + } + size_t setArgBufferWithAllocCalled = 0u; +}; + +using SetKernelArgCacheTest = Test; + +TEST_F(SetKernelArgCacheTest, givenValidBufferArgumentWhenSetMultipleTimesThenSetArgBufferWithAllocOnlyCalledIfNeeded) { + MockKernelWithCallTracking mockKernel; + mockKernel.module = module.get(); + ze_kernel_desc_t desc = {}; + desc.pKernelName = kernelName.c_str(); + mockKernel.initialize(&desc); + + auto svmAllocsManager = device->getDriverHandle()->getSvmAllocsManager(); + auto allocationProperties = NEO::SVMAllocsManager::SvmAllocationProperties{}; + auto svmAllocation = svmAllocsManager->createSVMAlloc(4096, allocationProperties, context->rootDeviceIndices, context->deviceBitfields); + + size_t callCounter = 0u; + + //first setArg - called + EXPECT_EQ(ZE_RESULT_SUCCESS, mockKernel.setArgBuffer(0, sizeof(svmAllocation), &svmAllocation)); + EXPECT_EQ(++callCounter, mockKernel.setArgBufferWithAllocCalled); + + //same setArg but allocationCounter == 0 - called + EXPECT_EQ(ZE_RESULT_SUCCESS, mockKernel.setArgBuffer(0, sizeof(svmAllocation), &svmAllocation)); + EXPECT_EQ(++callCounter, mockKernel.setArgBufferWithAllocCalled); + + //same setArg - not called and argInfo.allocationCounter is updated + ++svmAllocsManager->allocationsCounter; + EXPECT_EQ(0u, mockKernel.kernelArgInfos[0].allocIdMemoryManagerCounter); + EXPECT_EQ(ZE_RESULT_SUCCESS, mockKernel.setArgBuffer(0, sizeof(svmAllocation), &svmAllocation)); + EXPECT_EQ(callCounter, mockKernel.setArgBufferWithAllocCalled); + EXPECT_EQ(svmAllocsManager->allocationsCounter, mockKernel.kernelArgInfos[0].allocIdMemoryManagerCounter); + + //same setArg and allocationCounter - not called + EXPECT_EQ(ZE_RESULT_SUCCESS, mockKernel.setArgBuffer(0, sizeof(svmAllocation), &svmAllocation)); + EXPECT_EQ(callCounter, mockKernel.setArgBufferWithAllocCalled); + + //same setArg but different allocId - called + svmAllocsManager->getSVMAlloc(svmAllocation)->setAllocId(1u); + ++svmAllocsManager->allocationsCounter; + EXPECT_EQ(ZE_RESULT_SUCCESS, mockKernel.setArgBuffer(0, sizeof(svmAllocation), &svmAllocation)); + EXPECT_EQ(++callCounter, mockKernel.setArgBufferWithAllocCalled); + + //different value - called + auto secondSvmAllocation = svmAllocsManager->createSVMAlloc(4096, allocationProperties, context->rootDeviceIndices, context->deviceBitfields); + EXPECT_EQ(ZE_RESULT_SUCCESS, mockKernel.setArgBuffer(0, sizeof(secondSvmAllocation), &secondSvmAllocation)); + EXPECT_EQ(++callCounter, mockKernel.setArgBufferWithAllocCalled); + + //same value but no svmData - ZE_RESULT_ERROR_INVALID_ARGUMENT + svmAllocsManager->freeSVMAlloc(secondSvmAllocation); + ++svmAllocsManager->allocationsCounter; + EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, mockKernel.setArgBuffer(0, sizeof(secondSvmAllocation), &secondSvmAllocation)); + EXPECT_EQ(callCounter, mockKernel.setArgBufferWithAllocCalled); + + svmAllocsManager->freeSVMAlloc(svmAllocation); +} + using KernelImpSetGroupSizeTest = Test; TEST_F(KernelImpSetGroupSizeTest, WhenCalculatingLocalIdsThenGrfSizeIsTakenFromCapabilityTable) {