From ae201a47d34a6c76f74d3cf37a7141046fbfb29b Mon Sep 17 00:00:00 2001 From: "Mrozek, Michal" Date: Fri, 30 Aug 2019 09:55:44 +0200 Subject: [PATCH] Improve uncached resources handling. - Change kernel to properly detect true stateless resources - do not turn of stateless l3 if arg is used in pure stateful manner - refactor variable names to better reflect what they do - improve mock kernel with internal to have setKernelArg capabilties Change-Id: I2cdde04f2144d9b86dc1486126632db0fd7cad49 Signed-off-by: Mrozek, Michal --- runtime/command_queue/enqueue_common.h | 2 +- runtime/kernel/kernel.cpp | 16 +- runtime/kernel/kernel.h | 6 +- ...cl_mem_locally_uncached_resource_tests.cpp | 169 ++++++++++++++---- unit_tests/mocks/mock_kernel.h | 21 ++- 5 files changed, 164 insertions(+), 50 deletions(-) diff --git a/runtime/command_queue/enqueue_common.h b/runtime/command_queue/enqueue_common.h index 3a5035452d..d5af6747bb 100644 --- a/runtime/command_queue/enqueue_common.h +++ b/runtime/command_queue/enqueue_common.h @@ -621,7 +621,7 @@ CompletionStamp CommandQueueHw::enqueueNonBlocked( auto numGrfRequiredByKernel = kernel->getKernelInfo().patchInfo.executionEnvironment->NumGRFRequired; numGrfRequired = std::max(numGrfRequired, numGrfRequiredByKernel); specialPipelineSelectMode |= kernel->requiresSpecialPipelineSelectMode(); - if (kernel->hasUncacheableArgs()) { + if (kernel->hasUncacheableStatelessArgs()) { anyUncacheableArgs = true; } } diff --git a/runtime/kernel/kernel.cpp b/runtime/kernel/kernel.cpp index fa66af5ac7..93af450c0a 100644 --- a/runtime/kernel/kernel.cpp +++ b/runtime/kernel/kernel.cpp @@ -811,7 +811,7 @@ cl_int Kernel::setArg(uint32_t argIndex, size_t argSize, const void *argVal) { if (argIndex >= kernelArgHandlers.size()) { return CL_INVALID_ARG_INDEX; } - argWasUncacheable = kernelArguments[argIndex].isUncacheable; + argWasUncacheable = kernelArguments[argIndex].isStatelessUncacheable; auto argHandler = kernelArgHandlers[argIndex]; retVal = (this->*argHandler)(argIndex, argSize, argVal); } @@ -820,8 +820,8 @@ cl_int Kernel::setArg(uint32_t argIndex, size_t argSize, const void *argVal) { patchedArgumentsNum++; kernelArguments[argIndex].isPatched = true; } - auto argIsUncacheable = kernelArguments[argIndex].isUncacheable; - uncacheableArgsCount += (argIsUncacheable ? 1 : 0) - (argWasUncacheable ? 1 : 0); + auto argIsUncacheable = kernelArguments[argIndex].isStatelessUncacheable; + statelessUncacheableArgsCount += (argIsUncacheable ? 1 : 0) - (argWasUncacheable ? 1 : 0); resolveArgs(); } return retVal; @@ -1205,7 +1205,9 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex, auto surfaceState = ptrOffset(getSurfaceStateHeap(), kernelArgInfo.offsetHeap); buffer->setArgStateful(surfaceState, forceNonAuxMode, disableL3, isAuxTranslationKernel, kernelArgInfo.isReadOnly); } - kernelArguments[argIndex].isUncacheable = buffer->isMemObjUncacheable(); + + kernelArguments[argIndex].isStatelessUncacheable = !kernelArgInfo.pureStatefulBufferAccess ? buffer->isMemObjUncacheable() : false; + addAllocationToCacheFlushVector(argIndex, buffer->getGraphicsAllocation()); return CL_SUCCESS; } else { @@ -1510,9 +1512,9 @@ void Kernel::unsetArg(uint32_t argIndex) { if (kernelArguments[argIndex].isPatched) { patchedArgumentsNum--; kernelArguments[argIndex].isPatched = false; - if (kernelArguments[argIndex].isUncacheable) { - uncacheableArgsCount--; - kernelArguments[argIndex].isUncacheable = false; + if (kernelArguments[argIndex].isStatelessUncacheable) { + statelessUncacheableArgsCount--; + kernelArguments[argIndex].isStatelessUncacheable = false; } } } diff --git a/runtime/kernel/kernel.h b/runtime/kernel/kernel.h index 2e93d8933e..6a21a374a6 100644 --- a/runtime/kernel/kernel.h +++ b/runtime/kernel/kernel.h @@ -59,7 +59,7 @@ class Kernel : public BaseObject<_cl_kernel> { GraphicsAllocation *pSvmAlloc; cl_mem_flags svmFlags; bool isPatched = false; - bool isUncacheable = false; + bool isStatelessUncacheable = false; }; typedef int32_t (Kernel::*KernelArgHandler)(uint32_t argIndex, @@ -290,7 +290,7 @@ class Kernel : public BaseObject<_cl_kernel> { bool requiresCoherency(); void resetSharedObjectsPatchAddresses(); bool isUsingSharedObjArgs() const { return usingSharedObjArgs; } - bool hasUncacheableArgs() const { return uncacheableArgsCount > 0; } + bool hasUncacheableStatelessArgs() const { return statelessUncacheableArgsCount > 0; } bool hasPrintfOutput() const; @@ -515,7 +515,7 @@ class Kernel : public BaseObject<_cl_kernel> { bool containsStatelessWrites = true; uint32_t patchedArgumentsNum = 0; uint32_t startOffset = 0; - uint32_t uncacheableArgsCount = 0; + uint32_t statelessUncacheableArgsCount = 0; std::vector patchInfoDataList; std::unique_ptr imageTransformer; diff --git a/unit_tests/api/cl_mem_locally_uncached_resource_tests.cpp b/unit_tests/api/cl_mem_locally_uncached_resource_tests.cpp index 171d0a9425..2716be1f2f 100644 --- a/unit_tests/api/cl_mem_locally_uncached_resource_tests.cpp +++ b/unit_tests/api/cl_mem_locally_uncached_resource_tests.cpp @@ -59,8 +59,11 @@ using clMemLocallyUncachedResourceFixture = Test kernel(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer"), &retVal)); - EXPECT_EQ(CL_SUCCESS, retVal); + MockKernelWithInternals mockKernel(*this->pDevice, context, true); + mockKernel.kernelInfo.usesSsh = true; + mockKernel.kernelInfo.requiresSshForBuffers = true; + + auto kernel = mockKernel.mockKernel; auto bufferCacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, n * sizeof(float), nullptr, nullptr); auto pBufferCacheable1 = clUniquePtr(castToObject(bufferCacheable1)); @@ -75,59 +78,69 @@ HWTEST_F(clMemLocallyUncachedResourceFixture, GivenAtLeastOneLocallyUncacheableR auto mocsCacheable = kernel->getDevice().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); auto mocsUncacheable = kernel->getDevice().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED); - retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &bufferCacheable1); + retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferCacheable1); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, argMocs(*kernel, 0)); - retVal = clSetKernelArg(kernel.get(), 1, sizeof(cl_mem), &bufferCacheable2); + retVal = clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufferCacheable2); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, argMocs(*kernel, 1)); EXPECT_TRUE(kernel->isPatched()); - retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); + retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); + EXPECT_FALSE(kernel->hasUncacheableStatelessArgs()); - retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &bufferUncacheable1); + retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferUncacheable1); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsUncacheable, argMocs(*kernel, 0)); EXPECT_TRUE(kernel->isPatched()); - retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); + retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsUncacheable, cmdQueueMocs(pCmdQ)); + EXPECT_TRUE(kernel->hasUncacheableStatelessArgs()); - retVal = clSetKernelArg(kernel.get(), 1, sizeof(cl_mem), &bufferUncacheable2); + retVal = clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufferUncacheable2); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsUncacheable, argMocs(*kernel, 0)); EXPECT_TRUE(kernel->isPatched()); - retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); + retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsUncacheable, cmdQueueMocs(pCmdQ)); + EXPECT_TRUE(kernel->hasUncacheableStatelessArgs()); - retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &bufferCacheable1); + retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferCacheable1); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, argMocs(*kernel, 0)); EXPECT_TRUE(kernel->isPatched()); - retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); + retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsUncacheable, cmdQueueMocs(pCmdQ)); + EXPECT_TRUE(kernel->hasUncacheableStatelessArgs()); - retVal = clSetKernelArg(kernel.get(), 1, sizeof(cl_mem), &bufferCacheable2); + retVal = clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufferCacheable2); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, argMocs(*kernel, 1)); EXPECT_TRUE(kernel->isPatched()); - retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); + retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); + EXPECT_FALSE(kernel->hasUncacheableStatelessArgs()); } HWTEST_F(clMemLocallyUncachedResourceFixture, givenBuffersThatAreUncachedInSurfaceStateWhenStatelessIsProgrammedItIsCached) { cl_int retVal = CL_SUCCESS; - std::unique_ptr kernel(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer"), &retVal)); + + MockKernelWithInternals mockKernel(*this->pDevice, context, true); + auto kernel = mockKernel.mockKernel; + mockKernel.kernelInfo.usesSsh = true; + mockKernel.kernelInfo.requiresSshForBuffers = true; + EXPECT_EQ(CL_SUCCESS, retVal); auto bufferCacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, n * sizeof(float), nullptr, nullptr); @@ -143,59 +156,145 @@ HWTEST_F(clMemLocallyUncachedResourceFixture, givenBuffersThatAreUncachedInSurfa auto mocsCacheable = kernel->getDevice().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); auto mocsUncacheable = kernel->getDevice().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED); - retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &bufferCacheable1); + retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferCacheable1); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, argMocs(*kernel, 0)); - retVal = clSetKernelArg(kernel.get(), 1, sizeof(cl_mem), &bufferCacheable2); + retVal = clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufferCacheable2); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, argMocs(*kernel, 1)); EXPECT_TRUE(kernel->isPatched()); - retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); + retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); - retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &bufferUncacheable1); + retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferUncacheable1); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsUncacheable, argMocs(*kernel, 0)); + EXPECT_FALSE(kernel->hasUncacheableStatelessArgs()); EXPECT_TRUE(kernel->isPatched()); - retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); + retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); - retVal = clSetKernelArg(kernel.get(), 1, sizeof(cl_mem), &bufferUncacheable2); + retVal = clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufferUncacheable2); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsUncacheable, argMocs(*kernel, 0)); + EXPECT_FALSE(kernel->hasUncacheableStatelessArgs()); EXPECT_TRUE(kernel->isPatched()); - retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); + retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); - retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &bufferCacheable1); + retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferCacheable1); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, argMocs(*kernel, 0)); EXPECT_TRUE(kernel->isPatched()); - retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); + retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); - retVal = clSetKernelArg(kernel.get(), 1, sizeof(cl_mem), &bufferCacheable2); + retVal = clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufferCacheable2); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, argMocs(*kernel, 1)); EXPECT_TRUE(kernel->isPatched()); - retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); + retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); } +HWTEST_F(clMemLocallyUncachedResourceFixture, givenBuffersThatAreUncachedButKernelDoesntHaveAnyStatelessAccessessThenSurfacesAreNotRecordedAsUncacheable) { + cl_int retVal = CL_SUCCESS; + + MockKernelWithInternals mockKernel(*this->pDevice, context, true); + auto kernel = mockKernel.mockKernel; + mockKernel.kernelInfo.usesSsh = true; + mockKernel.kernelInfo.requiresSshForBuffers = true; + mockKernel.kernelInfo.kernelArgInfo[0].pureStatefulBufferAccess = true; + mockKernel.kernelInfo.kernelArgInfo[1].pureStatefulBufferAccess = true; + + EXPECT_EQ(CL_SUCCESS, retVal); + + auto bufferCacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, n * sizeof(float), nullptr, nullptr); + auto pBufferCacheable1 = clUniquePtr(castToObject(bufferCacheable1)); + auto bufferCacheable2 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, n * sizeof(float), nullptr, nullptr); + auto pBufferCacheable2 = clUniquePtr(castToObject(bufferCacheable2)); + + auto bufferUncacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheable, n * sizeof(float), nullptr, nullptr); + auto pBufferUncacheable1 = clUniquePtr(castToObject(bufferUncacheable1)); + auto bufferUncacheable2 = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheable, n * sizeof(float), nullptr, nullptr); + auto pBufferUncacheable2 = clUniquePtr(castToObject(bufferUncacheable2)); + + auto mocsCacheable = kernel->getDevice().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); + auto mocsUncacheable = kernel->getDevice().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED); + + retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferCacheable1); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(mocsCacheable, argMocs(*kernel, 0)); + + retVal = clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufferCacheable2); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(mocsCacheable, argMocs(*kernel, 1)); + + EXPECT_TRUE(kernel->isPatched()); + retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); + + retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferUncacheable1); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(mocsUncacheable, argMocs(*kernel, 0)); + EXPECT_FALSE(kernel->hasUncacheableStatelessArgs()); + + EXPECT_TRUE(kernel->isPatched()); + retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); + + retVal = clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufferUncacheable2); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(mocsUncacheable, argMocs(*kernel, 0)); + EXPECT_FALSE(kernel->hasUncacheableStatelessArgs()); + + EXPECT_TRUE(kernel->isPatched()); + retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); + + retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferCacheable1); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(mocsCacheable, argMocs(*kernel, 0)); + EXPECT_FALSE(kernel->hasUncacheableStatelessArgs()); + + EXPECT_TRUE(kernel->isPatched()); + retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); + + retVal = clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufferCacheable2); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(mocsCacheable, argMocs(*kernel, 1)); + EXPECT_FALSE(kernel->hasUncacheableStatelessArgs()); + + EXPECT_TRUE(kernel->isPatched()); + retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); + EXPECT_FALSE(kernel->hasUncacheableStatelessArgs()); +} + HWTEST_F(clMemLocallyUncachedResourceFixture, WhenUnsettingUncacheableResourceFromKernelThanKernelContinuesToCorrectlySetMocs) { cl_int retVal = CL_SUCCESS; - std::unique_ptr kernel(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer"), &retVal)); + MockKernelWithInternals mockKernel(*this->pDevice, context, true); + auto kernel = mockKernel.mockKernel; + mockKernel.kernelInfo.usesSsh = true; + mockKernel.kernelInfo.requiresSshForBuffers = true; + EXPECT_EQ(CL_SUCCESS, retVal); auto bufferCacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, n * sizeof(float), nullptr, nullptr); @@ -209,47 +308,47 @@ HWTEST_F(clMemLocallyUncachedResourceFixture, WhenUnsettingUncacheableResourceFr auto mocsCacheable = kernel->getDevice().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); auto mocsUncacheable = kernel->getDevice().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED); - retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &bufferCacheable1); + retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferCacheable1); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, argMocs(*kernel, 0)); - retVal = clSetKernelArg(kernel.get(), 1, sizeof(cl_mem), &bufferCacheable2); + retVal = clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufferCacheable2); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, argMocs(*kernel, 1)); EXPECT_TRUE(kernel->isPatched()); - retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); + retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); - retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &bufferUncacheable); + retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferUncacheable); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsUncacheable, argMocs(*kernel, 0)); EXPECT_TRUE(kernel->isPatched()); - retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); + retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsUncacheable, cmdQueueMocs(pCmdQ)); kernel->unsetArg(0); - retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &bufferCacheable1); + retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferCacheable1); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, argMocs(*kernel, 0)); EXPECT_TRUE(kernel->isPatched()); - retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); + retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); kernel->unsetArg(0); - retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &bufferUncacheable); + retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferUncacheable); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsUncacheable, argMocs(*kernel, 0)); EXPECT_TRUE(kernel->isPatched()); - retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); + retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(mocsUncacheable, cmdQueueMocs(pCmdQ)); } diff --git a/unit_tests/mocks/mock_kernel.h b/unit_tests/mocks/mock_kernel.h index 63138934aa..1ff74be1b3 100644 --- a/unit_tests/mocks/mock_kernel.h +++ b/unit_tests/mocks/mock_kernel.h @@ -294,15 +294,28 @@ class MockKernelWithInternals { mockKernel->setSshLocal(&sshLocal, sizeof(sshLocal)); if (addDefaultArg) { - defaultKernelArguments.resize(1); + defaultKernelArguments.resize(2); defaultKernelArguments[0] = {}; - kernelInfo.resizeKernelArgInfoAndRegisterParameter(1); - kernelInfo.kernelArgInfo.resize(1); + defaultKernelArguments[1] = {}; + + kernelInfo.resizeKernelArgInfoAndRegisterParameter(2); + kernelInfo.kernelArgInfo.resize(2); kernelInfo.kernelArgInfo[0].kernelArgPatchInfoVector.resize(1); kernelInfo.kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset = 0; kernelInfo.kernelArgInfo[0].kernelArgPatchInfoVector[0].size = sizeof(uintptr_t); + + kernelInfo.kernelArgInfo[1].kernelArgPatchInfoVector.resize(1); + kernelInfo.kernelArgInfo[1].kernelArgPatchInfoVector[0].crossthreadOffset = 0; + kernelInfo.kernelArgInfo[1].kernelArgPatchInfoVector[0].size = sizeof(uintptr_t); + mockKernel->setKernelArguments(defaultKernelArguments); - mockKernel->kernelArgRequiresCacheFlush.resize(1); + mockKernel->kernelArgRequiresCacheFlush.resize(2); + mockKernel->kernelArgHandlers.resize(2); + mockKernel->kernelArgHandlers[0] = &Kernel::setArgBuffer; + mockKernel->kernelArgHandlers[1] = &Kernel::setArgBuffer; + + kernelInfo.kernelArgInfo[1].offsetHeap = 64; + kernelInfo.kernelArgInfo[0].offsetHeap = 64; } } ~MockKernelWithInternals() {