diff --git a/runtime/kernel/kernel.cpp b/runtime/kernel/kernel.cpp index 93af450c0a..e750b2bb9d 100644 --- a/runtime/kernel/kernel.cpp +++ b/runtime/kernel/kernel.cpp @@ -1206,9 +1206,16 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex, buffer->setArgStateful(surfaceState, forceNonAuxMode, disableL3, isAuxTranslationKernel, kernelArgInfo.isReadOnly); } - kernelArguments[argIndex].isStatelessUncacheable = !kernelArgInfo.pureStatefulBufferAccess ? buffer->isMemObjUncacheable() : false; + kernelArguments[argIndex].isStatelessUncacheable = kernelArgInfo.pureStatefulBufferAccess ? false : buffer->isMemObjUncacheable(); - addAllocationToCacheFlushVector(argIndex, buffer->getGraphicsAllocation()); + auto allocationForCacheFlush = buffer->getGraphicsAllocation(); + + //if we make object uncacheable for surface state and there are not stateless accessess , then ther is no need to flush caches + if (buffer->isMemObjUncacheableForSurfaceState() && kernelArgInfo.pureStatefulBufferAccess) { + allocationForCacheFlush = nullptr; + } + + addAllocationToCacheFlushVector(argIndex, allocationForCacheFlush); return CL_SUCCESS; } else { diff --git a/runtime/memory_manager/os_agnostic_memory_manager.cpp b/runtime/memory_manager/os_agnostic_memory_manager.cpp index 9af68d5b51..6f4741a150 100644 --- a/runtime/memory_manager/os_agnostic_memory_manager.cpp +++ b/runtime/memory_manager/os_agnostic_memory_manager.cpp @@ -137,7 +137,7 @@ GraphicsAllocation *OsAgnosticMemoryManager::allocate32BitGraphicsMemoryImpl(con if (ptrAlloc != nullptr) { memoryAllocation = new MemoryAllocation(allocationData.type, ptrAlloc, ptrAlloc, GmmHelper::canonize(gpuAddress), allocationData.size, counter, MemoryPool::System4KBPagesWith32BitGpuAddressing, false, - false, false); + false, allocationData.flags.flushL3); memoryAllocation->set32BitAllocation(true); memoryAllocation->setGpuBaseAddress(GmmHelper::canonize(gfxPartition->getHeapBase(heap))); diff --git a/unit_tests/api/cl_mem_locally_uncached_resource_tests.cpp b/unit_tests/api/cl_mem_locally_uncached_resource_tests.cpp index 2716be1f2f..4a764a2454 100644 --- a/unit_tests/api/cl_mem_locally_uncached_resource_tests.cpp +++ b/unit_tests/api/cl_mem_locally_uncached_resource_tests.cpp @@ -353,4 +353,41 @@ HWTEST_F(clMemLocallyUncachedResourceFixture, WhenUnsettingUncacheableResourceFr EXPECT_EQ(mocsUncacheable, cmdQueueMocs(pCmdQ)); } +HWTEST_F(clMemLocallyUncachedResourceFixture, givenBuffersThatAreUncachedInSurfaceStateAndAreNotUsedInStatelessFashionThenThoseResourcesAreNotRegistredAsResourcesForCacheFlush) { + cl_int retVal = CL_SUCCESS; + + MockKernelWithInternals mockKernel(*this->pDevice, context, true); + auto kernel = mockKernel.mockKernel; + mockKernel.kernelInfo.usesSsh = true; + mockKernel.kernelInfo.requiresSshForBuffers = true; + mockKernel.kernelInfo.kernelArgInfo[0].pureStatefulBufferAccess = true; + mockKernel.kernelInfo.kernelArgInfo[1].pureStatefulBufferAccess = true; + + EXPECT_EQ(CL_SUCCESS, retVal); + + auto bufferCacheable = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, n * sizeof(float), nullptr, nullptr); + + auto bufferUncacheableInSurfaceState = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheableInSurfaceState, n * sizeof(float), nullptr, nullptr); + auto bufferUncacheable = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheable, n * sizeof(float), nullptr, nullptr); + + retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferUncacheableInSurfaceState); + EXPECT_EQ(CL_SUCCESS, retVal); + + EXPECT_EQ(nullptr, kernel->kernelArgRequiresCacheFlush[0]); + + retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferCacheable); + EXPECT_EQ(CL_SUCCESS, retVal); + + EXPECT_NE(nullptr, kernel->kernelArgRequiresCacheFlush[0]); + + retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferUncacheable); + EXPECT_EQ(CL_SUCCESS, retVal); + + EXPECT_EQ(nullptr, kernel->kernelArgRequiresCacheFlush[0]); + + clReleaseMemObject(bufferUncacheableInSurfaceState); + clReleaseMemObject(bufferUncacheable); + clReleaseMemObject(bufferCacheable); +} + } // namespace clMemLocallyUncachedResourceTests diff --git a/unit_tests/mocks/mock_kernel.h b/unit_tests/mocks/mock_kernel.h index 1ff74be1b3..7947a0999d 100644 --- a/unit_tests/mocks/mock_kernel.h +++ b/unit_tests/mocks/mock_kernel.h @@ -29,6 +29,7 @@ class MockKernel : public Kernel { using Kernel::auxTranslationRequired; using Kernel::containsStatelessWrites; using Kernel::isSchedulerKernel; + using Kernel::kernelArgHandlers; using Kernel::kernelArgRequiresCacheFlush; using Kernel::kernelArguments; using Kernel::kernelSvmGfxAllocations; @@ -221,9 +222,6 @@ class MockKernel : public Kernel { std::vector mockCrossThreadData; std::vector mockSshLocal; - // Make protected members from base class publicly accessible in mock class - using Kernel::kernelArgHandlers; - void setUsingSharedArgs(bool usingSharedArgValue) { this->usingSharedObjArgs = usingSharedArgValue; } void makeResident(CommandStreamReceiver &commandStreamReceiver) override;