Improve uncached resources handling.

- Change kernel to properly detect true stateless resources
- do not turn of stateless l3 if arg is used in pure stateful manner
- refactor variable names to better reflect what they do
- improve mock kernel with internal to have setKernelArg capabilties

Change-Id: I2cdde04f2144d9b86dc1486126632db0fd7cad49
Signed-off-by: Mrozek, Michal <michal.mrozek@intel.com>
This commit is contained in:
Mrozek, Michal
2019-08-30 09:55:44 +02:00
parent 33f6c7f0da
commit ae201a47d3
5 changed files with 164 additions and 50 deletions

View File

@@ -621,7 +621,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
auto numGrfRequiredByKernel = kernel->getKernelInfo().patchInfo.executionEnvironment->NumGRFRequired;
numGrfRequired = std::max(numGrfRequired, numGrfRequiredByKernel);
specialPipelineSelectMode |= kernel->requiresSpecialPipelineSelectMode();
if (kernel->hasUncacheableArgs()) {
if (kernel->hasUncacheableStatelessArgs()) {
anyUncacheableArgs = true;
}
}

View File

@@ -811,7 +811,7 @@ cl_int Kernel::setArg(uint32_t argIndex, size_t argSize, const void *argVal) {
if (argIndex >= kernelArgHandlers.size()) {
return CL_INVALID_ARG_INDEX;
}
argWasUncacheable = kernelArguments[argIndex].isUncacheable;
argWasUncacheable = kernelArguments[argIndex].isStatelessUncacheable;
auto argHandler = kernelArgHandlers[argIndex];
retVal = (this->*argHandler)(argIndex, argSize, argVal);
}
@@ -820,8 +820,8 @@ cl_int Kernel::setArg(uint32_t argIndex, size_t argSize, const void *argVal) {
patchedArgumentsNum++;
kernelArguments[argIndex].isPatched = true;
}
auto argIsUncacheable = kernelArguments[argIndex].isUncacheable;
uncacheableArgsCount += (argIsUncacheable ? 1 : 0) - (argWasUncacheable ? 1 : 0);
auto argIsUncacheable = kernelArguments[argIndex].isStatelessUncacheable;
statelessUncacheableArgsCount += (argIsUncacheable ? 1 : 0) - (argWasUncacheable ? 1 : 0);
resolveArgs();
}
return retVal;
@@ -1205,7 +1205,9 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex,
auto surfaceState = ptrOffset(getSurfaceStateHeap(), kernelArgInfo.offsetHeap);
buffer->setArgStateful(surfaceState, forceNonAuxMode, disableL3, isAuxTranslationKernel, kernelArgInfo.isReadOnly);
}
kernelArguments[argIndex].isUncacheable = buffer->isMemObjUncacheable();
kernelArguments[argIndex].isStatelessUncacheable = !kernelArgInfo.pureStatefulBufferAccess ? buffer->isMemObjUncacheable() : false;
addAllocationToCacheFlushVector(argIndex, buffer->getGraphicsAllocation());
return CL_SUCCESS;
} else {
@@ -1510,9 +1512,9 @@ void Kernel::unsetArg(uint32_t argIndex) {
if (kernelArguments[argIndex].isPatched) {
patchedArgumentsNum--;
kernelArguments[argIndex].isPatched = false;
if (kernelArguments[argIndex].isUncacheable) {
uncacheableArgsCount--;
kernelArguments[argIndex].isUncacheable = false;
if (kernelArguments[argIndex].isStatelessUncacheable) {
statelessUncacheableArgsCount--;
kernelArguments[argIndex].isStatelessUncacheable = false;
}
}
}

View File

@@ -59,7 +59,7 @@ class Kernel : public BaseObject<_cl_kernel> {
GraphicsAllocation *pSvmAlloc;
cl_mem_flags svmFlags;
bool isPatched = false;
bool isUncacheable = false;
bool isStatelessUncacheable = false;
};
typedef int32_t (Kernel::*KernelArgHandler)(uint32_t argIndex,
@@ -290,7 +290,7 @@ class Kernel : public BaseObject<_cl_kernel> {
bool requiresCoherency();
void resetSharedObjectsPatchAddresses();
bool isUsingSharedObjArgs() const { return usingSharedObjArgs; }
bool hasUncacheableArgs() const { return uncacheableArgsCount > 0; }
bool hasUncacheableStatelessArgs() const { return statelessUncacheableArgsCount > 0; }
bool hasPrintfOutput() const;
@@ -515,7 +515,7 @@ class Kernel : public BaseObject<_cl_kernel> {
bool containsStatelessWrites = true;
uint32_t patchedArgumentsNum = 0;
uint32_t startOffset = 0;
uint32_t uncacheableArgsCount = 0;
uint32_t statelessUncacheableArgsCount = 0;
std::vector<PatchInfoData> patchInfoDataList;
std::unique_ptr<ImageTransformer> imageTransformer;

View File

@@ -59,8 +59,11 @@ using clMemLocallyUncachedResourceFixture = Test<HelloWorldFixture<HelloWorldFix
HWTEST_F(clMemLocallyUncachedResourceFixture, GivenAtLeastOneLocallyUncacheableResourceWhenSettingKernelArgumentsThenKernelIsUncacheable) {
cl_int retVal = CL_SUCCESS;
std::unique_ptr<Kernel> kernel(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer"), &retVal));
EXPECT_EQ(CL_SUCCESS, retVal);
MockKernelWithInternals mockKernel(*this->pDevice, context, true);
mockKernel.kernelInfo.usesSsh = true;
mockKernel.kernelInfo.requiresSshForBuffers = true;
auto kernel = mockKernel.mockKernel;
auto bufferCacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, n * sizeof(float), nullptr, nullptr);
auto pBufferCacheable1 = clUniquePtr(castToObject<Buffer>(bufferCacheable1));
@@ -75,59 +78,69 @@ HWTEST_F(clMemLocallyUncachedResourceFixture, GivenAtLeastOneLocallyUncacheableR
auto mocsCacheable = kernel->getDevice().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
auto mocsUncacheable = kernel->getDevice().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED);
retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &bufferCacheable1);
retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferCacheable1);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 0));
retVal = clSetKernelArg(kernel.get(), 1, sizeof(cl_mem), &bufferCacheable2);
retVal = clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufferCacheable2);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 1));
EXPECT_TRUE(kernel->isPatched());
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
EXPECT_FALSE(kernel->hasUncacheableStatelessArgs());
retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &bufferUncacheable1);
retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferUncacheable1);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsUncacheable, argMocs<FamilyType>(*kernel, 0));
EXPECT_TRUE(kernel->isPatched());
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsUncacheable, cmdQueueMocs<FamilyType>(pCmdQ));
EXPECT_TRUE(kernel->hasUncacheableStatelessArgs());
retVal = clSetKernelArg(kernel.get(), 1, sizeof(cl_mem), &bufferUncacheable2);
retVal = clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufferUncacheable2);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsUncacheable, argMocs<FamilyType>(*kernel, 0));
EXPECT_TRUE(kernel->isPatched());
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsUncacheable, cmdQueueMocs<FamilyType>(pCmdQ));
EXPECT_TRUE(kernel->hasUncacheableStatelessArgs());
retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &bufferCacheable1);
retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferCacheable1);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 0));
EXPECT_TRUE(kernel->isPatched());
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsUncacheable, cmdQueueMocs<FamilyType>(pCmdQ));
EXPECT_TRUE(kernel->hasUncacheableStatelessArgs());
retVal = clSetKernelArg(kernel.get(), 1, sizeof(cl_mem), &bufferCacheable2);
retVal = clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufferCacheable2);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 1));
EXPECT_TRUE(kernel->isPatched());
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
EXPECT_FALSE(kernel->hasUncacheableStatelessArgs());
}
HWTEST_F(clMemLocallyUncachedResourceFixture, givenBuffersThatAreUncachedInSurfaceStateWhenStatelessIsProgrammedItIsCached) {
cl_int retVal = CL_SUCCESS;
std::unique_ptr<Kernel> kernel(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer"), &retVal));
MockKernelWithInternals mockKernel(*this->pDevice, context, true);
auto kernel = mockKernel.mockKernel;
mockKernel.kernelInfo.usesSsh = true;
mockKernel.kernelInfo.requiresSshForBuffers = true;
EXPECT_EQ(CL_SUCCESS, retVal);
auto bufferCacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, n * sizeof(float), nullptr, nullptr);
@@ -143,59 +156,145 @@ HWTEST_F(clMemLocallyUncachedResourceFixture, givenBuffersThatAreUncachedInSurfa
auto mocsCacheable = kernel->getDevice().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
auto mocsUncacheable = kernel->getDevice().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED);
retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &bufferCacheable1);
retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferCacheable1);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 0));
retVal = clSetKernelArg(kernel.get(), 1, sizeof(cl_mem), &bufferCacheable2);
retVal = clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufferCacheable2);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 1));
EXPECT_TRUE(kernel->isPatched());
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &bufferUncacheable1);
retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferUncacheable1);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsUncacheable, argMocs<FamilyType>(*kernel, 0));
EXPECT_FALSE(kernel->hasUncacheableStatelessArgs());
EXPECT_TRUE(kernel->isPatched());
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
retVal = clSetKernelArg(kernel.get(), 1, sizeof(cl_mem), &bufferUncacheable2);
retVal = clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufferUncacheable2);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsUncacheable, argMocs<FamilyType>(*kernel, 0));
EXPECT_FALSE(kernel->hasUncacheableStatelessArgs());
EXPECT_TRUE(kernel->isPatched());
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &bufferCacheable1);
retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferCacheable1);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 0));
EXPECT_TRUE(kernel->isPatched());
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
retVal = clSetKernelArg(kernel.get(), 1, sizeof(cl_mem), &bufferCacheable2);
retVal = clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufferCacheable2);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 1));
EXPECT_TRUE(kernel->isPatched());
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
}
HWTEST_F(clMemLocallyUncachedResourceFixture, givenBuffersThatAreUncachedButKernelDoesntHaveAnyStatelessAccessessThenSurfacesAreNotRecordedAsUncacheable) {
cl_int retVal = CL_SUCCESS;
MockKernelWithInternals mockKernel(*this->pDevice, context, true);
auto kernel = mockKernel.mockKernel;
mockKernel.kernelInfo.usesSsh = true;
mockKernel.kernelInfo.requiresSshForBuffers = true;
mockKernel.kernelInfo.kernelArgInfo[0].pureStatefulBufferAccess = true;
mockKernel.kernelInfo.kernelArgInfo[1].pureStatefulBufferAccess = true;
EXPECT_EQ(CL_SUCCESS, retVal);
auto bufferCacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, n * sizeof(float), nullptr, nullptr);
auto pBufferCacheable1 = clUniquePtr(castToObject<Buffer>(bufferCacheable1));
auto bufferCacheable2 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, n * sizeof(float), nullptr, nullptr);
auto pBufferCacheable2 = clUniquePtr(castToObject<Buffer>(bufferCacheable2));
auto bufferUncacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheable, n * sizeof(float), nullptr, nullptr);
auto pBufferUncacheable1 = clUniquePtr(castToObject<Buffer>(bufferUncacheable1));
auto bufferUncacheable2 = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheable, n * sizeof(float), nullptr, nullptr);
auto pBufferUncacheable2 = clUniquePtr(castToObject<Buffer>(bufferUncacheable2));
auto mocsCacheable = kernel->getDevice().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
auto mocsUncacheable = kernel->getDevice().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED);
retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferCacheable1);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 0));
retVal = clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufferCacheable2);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 1));
EXPECT_TRUE(kernel->isPatched());
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferUncacheable1);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsUncacheable, argMocs<FamilyType>(*kernel, 0));
EXPECT_FALSE(kernel->hasUncacheableStatelessArgs());
EXPECT_TRUE(kernel->isPatched());
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
retVal = clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufferUncacheable2);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsUncacheable, argMocs<FamilyType>(*kernel, 0));
EXPECT_FALSE(kernel->hasUncacheableStatelessArgs());
EXPECT_TRUE(kernel->isPatched());
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferCacheable1);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 0));
EXPECT_FALSE(kernel->hasUncacheableStatelessArgs());
EXPECT_TRUE(kernel->isPatched());
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
retVal = clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufferCacheable2);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 1));
EXPECT_FALSE(kernel->hasUncacheableStatelessArgs());
EXPECT_TRUE(kernel->isPatched());
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
EXPECT_FALSE(kernel->hasUncacheableStatelessArgs());
}
HWTEST_F(clMemLocallyUncachedResourceFixture, WhenUnsettingUncacheableResourceFromKernelThanKernelContinuesToCorrectlySetMocs) {
cl_int retVal = CL_SUCCESS;
std::unique_ptr<Kernel> kernel(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer"), &retVal));
MockKernelWithInternals mockKernel(*this->pDevice, context, true);
auto kernel = mockKernel.mockKernel;
mockKernel.kernelInfo.usesSsh = true;
mockKernel.kernelInfo.requiresSshForBuffers = true;
EXPECT_EQ(CL_SUCCESS, retVal);
auto bufferCacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, n * sizeof(float), nullptr, nullptr);
@@ -209,47 +308,47 @@ HWTEST_F(clMemLocallyUncachedResourceFixture, WhenUnsettingUncacheableResourceFr
auto mocsCacheable = kernel->getDevice().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
auto mocsUncacheable = kernel->getDevice().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED);
retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &bufferCacheable1);
retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferCacheable1);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 0));
retVal = clSetKernelArg(kernel.get(), 1, sizeof(cl_mem), &bufferCacheable2);
retVal = clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufferCacheable2);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 1));
EXPECT_TRUE(kernel->isPatched());
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &bufferUncacheable);
retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferUncacheable);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsUncacheable, argMocs<FamilyType>(*kernel, 0));
EXPECT_TRUE(kernel->isPatched());
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsUncacheable, cmdQueueMocs<FamilyType>(pCmdQ));
kernel->unsetArg(0);
retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &bufferCacheable1);
retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferCacheable1);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 0));
EXPECT_TRUE(kernel->isPatched());
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
kernel->unsetArg(0);
retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &bufferUncacheable);
retVal = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufferUncacheable);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsUncacheable, argMocs<FamilyType>(*kernel, 0));
EXPECT_TRUE(kernel->isPatched());
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel, 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsUncacheable, cmdQueueMocs<FamilyType>(pCmdQ));
}

View File

@@ -294,15 +294,28 @@ class MockKernelWithInternals {
mockKernel->setSshLocal(&sshLocal, sizeof(sshLocal));
if (addDefaultArg) {
defaultKernelArguments.resize(1);
defaultKernelArguments.resize(2);
defaultKernelArguments[0] = {};
kernelInfo.resizeKernelArgInfoAndRegisterParameter(1);
kernelInfo.kernelArgInfo.resize(1);
defaultKernelArguments[1] = {};
kernelInfo.resizeKernelArgInfoAndRegisterParameter(2);
kernelInfo.kernelArgInfo.resize(2);
kernelInfo.kernelArgInfo[0].kernelArgPatchInfoVector.resize(1);
kernelInfo.kernelArgInfo[0].kernelArgPatchInfoVector[0].crossthreadOffset = 0;
kernelInfo.kernelArgInfo[0].kernelArgPatchInfoVector[0].size = sizeof(uintptr_t);
kernelInfo.kernelArgInfo[1].kernelArgPatchInfoVector.resize(1);
kernelInfo.kernelArgInfo[1].kernelArgPatchInfoVector[0].crossthreadOffset = 0;
kernelInfo.kernelArgInfo[1].kernelArgPatchInfoVector[0].size = sizeof(uintptr_t);
mockKernel->setKernelArguments(defaultKernelArguments);
mockKernel->kernelArgRequiresCacheFlush.resize(1);
mockKernel->kernelArgRequiresCacheFlush.resize(2);
mockKernel->kernelArgHandlers.resize(2);
mockKernel->kernelArgHandlers[0] = &Kernel::setArgBuffer;
mockKernel->kernelArgHandlers[1] = &Kernel::setArgBuffer;
kernelInfo.kernelArgInfo[1].offsetHeap = 64;
kernelInfo.kernelArgInfo[0].offsetHeap = 64;
}
}
~MockKernelWithInternals() {