diff --git a/runtime/command_queue/enqueue_common.h b/runtime/command_queue/enqueue_common.h index da7794714e..92b93bcaa3 100644 --- a/runtime/command_queue/enqueue_common.h +++ b/runtime/command_queue/enqueue_common.h @@ -527,6 +527,7 @@ CompletionStamp CommandQueueHw::enqueueNonBlocked( uint32_t numGrfRequired = GrfConfig::DefaultGrfNumber; auto specialPipelineSelectMode = false; Kernel *kernel = nullptr; + bool anyUncacheableArgs = false; for (auto &dispatchInfo : multiDispatchInfo) { if (kernel != dispatchInfo.getKernel()) { kernel = dispatchInfo.getKernel(); @@ -539,6 +540,9 @@ CompletionStamp CommandQueueHw::enqueueNonBlocked( auto numGrfRequiredByKernel = kernel->getKernelInfo().patchInfo.executionEnvironment->NumGRFRequired; numGrfRequired = std::max(numGrfRequired, numGrfRequiredByKernel); specialPipelineSelectMode |= kernel->requiresSpecialPipelineSelectMode(); + if (kernel->hasUncacheableArgs()) { + anyUncacheableArgs = true; + } } if (mediaSamplerRequired) { @@ -579,6 +583,10 @@ CompletionStamp CommandQueueHw::enqueueNonBlocked( } } + if (anyUncacheableArgs) { + getCommandStreamReceiver().setDisableL3Cache(true); + } + DispatchFlags dispatchFlags; dispatchFlags.blocking = blocking; dispatchFlags.dcFlush = shouldFlushDC(commandType, printfHandler) || allocNeedsFlushDC; diff --git a/runtime/command_stream/command_stream_receiver.h b/runtime/command_stream/command_stream_receiver.h index 2053aff11e..5311879526 100644 --- a/runtime/command_stream/command_stream_receiver.h +++ b/runtime/command_stream/command_stream_receiver.h @@ -170,12 +170,13 @@ class CommandStreamReceiver { virtual void expectMemory(const void *gfxAddress, const void *srcAddress, size_t length, uint32_t compareOperation); - protected: - void cleanupResources(); void setDisableL3Cache(bool val) { disableL3Cache = val; } + protected: + void cleanupResources(); + std::unique_ptr flushStamp; std::unique_ptr submissionAggregator; std::unique_ptr flatBatchBufferHelper; diff --git a/runtime/kernel/kernel.cpp b/runtime/kernel/kernel.cpp index c7bc814a43..3a83db80ee 100644 --- a/runtime/kernel/kernel.cpp +++ b/runtime/kernel/kernel.cpp @@ -784,6 +784,7 @@ uint32_t Kernel::getScratchSizeValueToProgramMediaVfeState(int scratchSize) { cl_int Kernel::setArg(uint32_t argIndex, size_t argSize, const void *argVal) { cl_int retVal = CL_SUCCESS; bool updateExposedKernel = true; + auto argWasUncacheable = false; if (getKernelInfo().builtinDispatchBuilder != nullptr) { updateExposedKernel = getKernelInfo().builtinDispatchBuilder->setExplicitArg(argIndex, argSize, argVal, retVal); } @@ -791,6 +792,7 @@ cl_int Kernel::setArg(uint32_t argIndex, size_t argSize, const void *argVal) { if (argIndex >= kernelArgHandlers.size()) { return CL_INVALID_ARG_INDEX; } + argWasUncacheable = kernelArguments[argIndex].isUncacheable; auto argHandler = kernelArgHandlers[argIndex]; retVal = (this->*argHandler)(argIndex, argSize, argVal); } @@ -799,6 +801,8 @@ cl_int Kernel::setArg(uint32_t argIndex, size_t argSize, const void *argVal) { patchedArgumentsNum++; kernelArguments[argIndex].isPatched = true; } + auto argIsUncacheable = kernelArguments[argIndex].isUncacheable; + uncacheableArgsCount += (argIsUncacheable ? 1 : 0) - (argWasUncacheable ? 1 : 0); resolveArgs(); } return retVal; @@ -1128,6 +1132,7 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex, if (requiresSshForBuffers()) { auto surfaceState = ptrOffset(getSurfaceStateHeap(), kernelArgInfo.offsetHeap); buffer->setArgStateful(surfaceState, forceNonAuxMode); + kernelArguments[argIndex].isUncacheable = buffer->isMemObjUncacheable(); } addAllocationToCacheFlushVector(argIndex, buffer->getGraphicsAllocation()); return CL_SUCCESS; @@ -1433,6 +1438,10 @@ void Kernel::unsetArg(uint32_t argIndex) { if (kernelArguments[argIndex].isPatched) { patchedArgumentsNum--; kernelArguments[argIndex].isPatched = false; + if (kernelArguments[argIndex].isUncacheable) { + uncacheableArgsCount--; + kernelArguments[argIndex].isUncacheable = false; + } } } diff --git a/runtime/kernel/kernel.h b/runtime/kernel/kernel.h index d06ea24726..1443fa6302 100644 --- a/runtime/kernel/kernel.h +++ b/runtime/kernel/kernel.h @@ -57,6 +57,7 @@ class Kernel : public BaseObject<_cl_kernel> { GraphicsAllocation *pSvmAlloc; cl_mem_flags svmFlags; bool isPatched = false; + bool isUncacheable = false; }; typedef int32_t (Kernel::*KernelArgHandler)(uint32_t argIndex, @@ -286,6 +287,7 @@ class Kernel : public BaseObject<_cl_kernel> { bool requiresCoherency(); void resetSharedObjectsPatchAddresses(); bool isUsingSharedObjArgs() const { return usingSharedObjArgs; } + bool hasUncacheableArgs() const { return uncacheableArgsCount > 0; } bool hasPrintfOutput() const; @@ -496,6 +498,7 @@ class Kernel : public BaseObject<_cl_kernel> { bool auxTranslationRequired = false; uint32_t patchedArgumentsNum = 0; uint32_t startOffset = 0; + uint32_t uncacheableArgsCount = 0; std::vector patchInfoDataList; std::unique_ptr imageTransformer; diff --git a/runtime/mem_obj/buffer.cpp b/runtime/mem_obj/buffer.cpp index ffa2238407..e8241ccb13 100644 --- a/runtime/mem_obj/buffer.cpp +++ b/runtime/mem_obj/buffer.cpp @@ -255,6 +255,7 @@ Buffer *Buffer::create(Context *context, } pBuffer->setHostPtrMinSize(size); + pBuffer->isUncacheable = isValueSet(properties.flags_intel, CL_MEM_LOCALLY_UNCACHED_RESOURCE); if (copyMemoryFromHostPtr) { if ((memory->gmm && memory->gmm->isRenderCompressed) || !MemoryPool::isSystemMemoryPool(memory->getMemoryPool())) { diff --git a/runtime/mem_obj/buffer.inl b/runtime/mem_obj/buffer.inl index 336fb066f8..f584795012 100644 --- a/runtime/mem_obj/buffer.inl +++ b/runtime/mem_obj/buffer.inl @@ -5,6 +5,7 @@ * */ +#include "common/helpers/bit_helpers.h" #include "hw_cmds.h" #include "runtime/execution_environment/execution_environment.h" #include "runtime/helpers/surface_formats.h" @@ -64,8 +65,9 @@ void BufferHw::setArgStateful(void *memory, bool forceNonAuxMode) { surfaceState->setTileMode(RENDER_SURFACE_STATE::TILE_MODE_LINEAR); surfaceState->setVerticalLineStride(0); surfaceState->setVerticalLineStrideOffset(0); - if ((isAligned(bufferAddress) && isAligned(bufferSize)) || - ((getFlags() & CL_MEM_READ_ONLY)) != 0 || !this->isMemObjZeroCopy()) { + if (((isAligned(bufferAddress) && isAligned(bufferSize)) || + isValueSet(getFlags(), CL_MEM_READ_ONLY) || !this->isMemObjZeroCopy()) && + !this->isUncacheable) { surfaceState->setMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER)); } else { surfaceState->setMemoryObjectControlState(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED)); diff --git a/runtime/mem_obj/mem_obj.cpp b/runtime/mem_obj/mem_obj.cpp index 4e72ab9554..eada49b90c 100644 --- a/runtime/mem_obj/mem_obj.cpp +++ b/runtime/mem_obj/mem_obj.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2018 Intel Corporation + * Copyright (C) 2017-2019 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -219,6 +219,10 @@ bool MemObj::isMemObjWithHostPtrSVM() const { return isHostPtrSVM; } +bool MemObj::isMemObjUncacheable() const { + return isUncacheable; +} + GraphicsAllocation *MemObj::getGraphicsAllocation() { return graphicsAllocation; } diff --git a/runtime/mem_obj/mem_obj.h b/runtime/mem_obj/mem_obj.h index cdc7609c02..ac44e9441f 100644 --- a/runtime/mem_obj/mem_obj.h +++ b/runtime/mem_obj/mem_obj.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2017-2018 Intel Corporation + * Copyright (C) 2017-2019 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -71,6 +71,7 @@ class MemObj : public BaseObject<_cl_mem> { bool isMemObjZeroCopy() const; bool isMemObjWithHostPtrSVM() const; + bool isMemObjUncacheable() const; virtual void transferDataToHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) { UNRECOVERABLE_IF(true); }; virtual void transferDataFromHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) { UNRECOVERABLE_IF(true); }; @@ -126,6 +127,7 @@ class MemObj : public BaseObject<_cl_mem> { bool isZeroCopy; bool isHostPtrSVM; bool isObjectRedescribed; + bool isUncacheable = false; MemoryManager *memoryManager = nullptr; GraphicsAllocation *graphicsAllocation; GraphicsAllocation *mcsAllocation = nullptr; diff --git a/unit_tests/api/cl_mem_locally_uncached_resource_tests.cpp b/unit_tests/api/cl_mem_locally_uncached_resource_tests.cpp index efd76ea4ce..baf7ccb3e6 100644 --- a/unit_tests/api/cl_mem_locally_uncached_resource_tests.cpp +++ b/unit_tests/api/cl_mem_locally_uncached_resource_tests.cpp @@ -17,71 +17,172 @@ #include "test.h" #include "unit_tests/fixtures/hello_world_fixture.h" #include "unit_tests/helpers/hw_parse.h" +#include "unit_tests/utilities/base_object_utils.h" using namespace OCLRT; namespace clMemLocallyUncachedResourceTests { -struct clMemLocallyUncachedResourceFixture : Test>, - ::testing::WithParamInterface {}; - -HWTEST_P(clMemLocallyUncachedResourceFixture, GivenLocallyCachedOrUncachedBufferWhenItIsSetAndQueuedThenItIsCorrectlyCached) { +template +uint32_t argMocs(Kernel &kernel, size_t argIndex) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; + auto surfaceStateHeapAddress = kernel.getSurfaceStateHeap(); + auto surfaceStateHeapAddressOffset = kernel.getKernelInfo().kernelArgInfo[argIndex].offsetHeap; + auto surfaceState = reinterpret_cast(ptrOffset(surfaceStateHeapAddress, surfaceStateHeapAddressOffset)); + return surfaceState->getMemoryObjectControlState(); +} + +template +uint32_t cmdQueueMocs(CommandQueue *pCmdQ) { using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; + auto pCmdQHw = reinterpret_cast *>(pCmdQ); + auto &csr = pCmdQHw->getCommandStreamReceiver(); + HardwareParse hwParse; + hwParse.parseCommands(csr.getCS(0), 0); + auto itorCmd = reverse_find(hwParse.cmdList.rbegin(), hwParse.cmdList.rend()); + EXPECT_NE(hwParse.cmdList.rend(), itorCmd); + auto sba = genCmdCast(*itorCmd); + EXPECT_NE(nullptr, sba); - const size_t n = 512; - size_t globalWorkSize[3] = {n, 1, 1}; - size_t localWorkSize[3] = {256, 1, 1}; - bool useUncachedFlag = GetParam(); + return sba->getStatelessDataPortAccessMemoryObjectControlState(); +} +const size_t n = 512; +const size_t globalWorkSize[3] = {n, 1, 1}; +const size_t localWorkSize[3] = {256, 1, 1}; + +const cl_mem_properties_intel *propertiesCacheable = nullptr; +const cl_mem_properties_intel propertiesUncacheable[] = {CL_MEM_FLAGS_INTEL, CL_MEM_LOCALLY_UNCACHED_RESOURCE, 0}; + +using clMemLocallyUncachedResourceFixture = Test>; + +HWTEST_F(clMemLocallyUncachedResourceFixture, GivenAtLeastOneLocallyUncacheableResourceWhenSettingKernelArgumentsThenKernelIsUncacheable) { cl_int retVal = CL_SUCCESS; std::unique_ptr kernel(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer"), &retVal)); EXPECT_EQ(CL_SUCCESS, retVal); - cl_mem_properties_intel propertiesUncached[] = {CL_MEM_FLAGS_INTEL, CL_MEM_LOCALLY_UNCACHED_RESOURCE, 0}; - cl_mem_properties_intel *properties = (useUncachedFlag ? propertiesUncached : nullptr); - auto buffer1 = clCreateBufferWithPropertiesINTEL(context, properties, n * sizeof(float), nullptr, nullptr); - auto buffer2 = clCreateBufferWithPropertiesINTEL(context, properties, n * sizeof(float), nullptr, nullptr); + auto bufferCacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, n * sizeof(float), nullptr, nullptr); + auto pBufferCacheable1 = clUniquePtr(castToObject(bufferCacheable1)); + auto bufferCacheable2 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, n * sizeof(float), nullptr, nullptr); + auto pBufferCacheable2 = clUniquePtr(castToObject(bufferCacheable2)); - retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &buffer1); - EXPECT_EQ(CL_SUCCESS, retVal); - auto surfaceStateHeapAddress = kernel.get()->getSurfaceStateHeap(); - auto surfaceStateHeapAddressOffset = kernel.get()->getKernelInfo().kernelArgInfo[0].offsetHeap; - auto surfaceState = reinterpret_cast(ptrOffset(surfaceStateHeapAddress, surfaceStateHeapAddressOffset)); - auto expectedMocs = pDevice->getGmmHelper()->getMOCS(useUncachedFlag ? GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED - : GMM_RESOURCE_USAGE_OCL_BUFFER); - EXPECT_EQ(expectedMocs, surfaceState->getMemoryObjectControlState()); + auto bufferUncacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheable, n * sizeof(float), nullptr, nullptr); + auto pBufferUncacheable1 = clUniquePtr(castToObject(bufferUncacheable1)); + auto bufferUncacheable2 = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheable, n * sizeof(float), nullptr, nullptr); + auto pBufferUncacheable2 = clUniquePtr(castToObject(bufferUncacheable2)); - retVal = clSetKernelArg(kernel.get(), 1, sizeof(cl_mem), &buffer2); + auto mocsCacheable = kernel->getDevice().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); + auto mocsUncacheable = kernel->getDevice().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED); + + retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &bufferCacheable1); EXPECT_EQ(CL_SUCCESS, retVal); - surfaceStateHeapAddressOffset = kernel.get()->getKernelInfo().kernelArgInfo[1].offsetHeap; - surfaceState = reinterpret_cast(ptrOffset(surfaceStateHeapAddress, surfaceStateHeapAddressOffset)); - EXPECT_EQ(expectedMocs, surfaceState->getMemoryObjectControlState()); + EXPECT_EQ(mocsCacheable, argMocs(*kernel, 0)); + + retVal = clSetKernelArg(kernel.get(), 1, sizeof(cl_mem), &bufferCacheable2); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(mocsCacheable, argMocs(*kernel, 1)); EXPECT_TRUE(kernel->isPatched()); retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); - auto pCmdQHw = reinterpret_cast *>(pCmdQ); - ASSERT_NE(nullptr, pCmdQHw); - auto &csr = pCmdQHw->getCommandStreamReceiver(); - HardwareParse hwParse; - hwParse.parseCommands(csr.getCS(0), 0); - auto itorCmd = find(hwParse.cmdList.begin(), hwParse.cmdList.end()); - EXPECT_NE(hwParse.cmdList.end(), itorCmd); - auto sba = genCmdCast(*itorCmd); - ASSERT_NE(nullptr, sba); - - EXPECT_EQ(expectedMocs, sba->getStatelessDataPortAccessMemoryObjectControlState()); - - retVal = clReleaseMemObject(buffer1); + retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &bufferUncacheable1); EXPECT_EQ(CL_SUCCESS, retVal); - retVal = clReleaseMemObject(buffer2); + EXPECT_EQ(mocsUncacheable, argMocs(*kernel, 0)); + + EXPECT_TRUE(kernel->isPatched()); + retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(mocsUncacheable, cmdQueueMocs(pCmdQ)); + + retVal = clSetKernelArg(kernel.get(), 1, sizeof(cl_mem), &bufferUncacheable2); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(mocsUncacheable, argMocs(*kernel, 0)); + + EXPECT_TRUE(kernel->isPatched()); + retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(mocsUncacheable, cmdQueueMocs(pCmdQ)); + + retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &bufferCacheable1); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(mocsCacheable, argMocs(*kernel, 0)); + + EXPECT_TRUE(kernel->isPatched()); + retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(mocsUncacheable, cmdQueueMocs(pCmdQ)); + + retVal = clSetKernelArg(kernel.get(), 1, sizeof(cl_mem), &bufferCacheable2); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(mocsCacheable, argMocs(*kernel, 1)); + + EXPECT_TRUE(kernel->isPatched()); + retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); } -INSTANTIATE_TEST_CASE_P(clMemLocallyUncachedResourceTest, - clMemLocallyUncachedResourceFixture, - ::testing::Bool()); +HWTEST_F(clMemLocallyUncachedResourceFixture, WhenUnsettingUncacheableResourceFromKernelThanKernelContinuesToCorrectlySetMocs) { + cl_int retVal = CL_SUCCESS; + std::unique_ptr kernel(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer"), &retVal)); + EXPECT_EQ(CL_SUCCESS, retVal); + + auto bufferCacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, n * sizeof(float), nullptr, nullptr); + auto pBufferCacheable1 = clUniquePtr(castToObject(bufferCacheable1)); + auto bufferCacheable2 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, n * sizeof(float), nullptr, nullptr); + auto pBufferCacheable2 = clUniquePtr(castToObject(bufferCacheable2)); + + auto bufferUncacheable = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheable, n * sizeof(float), nullptr, nullptr); + auto pBufferUncacheable = clUniquePtr(castToObject(bufferUncacheable)); + + auto mocsCacheable = kernel->getDevice().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); + auto mocsUncacheable = kernel->getDevice().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED); + + retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &bufferCacheable1); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(mocsCacheable, argMocs(*kernel, 0)); + + retVal = clSetKernelArg(kernel.get(), 1, sizeof(cl_mem), &bufferCacheable2); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(mocsCacheable, argMocs(*kernel, 1)); + + EXPECT_TRUE(kernel->isPatched()); + retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); + + retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &bufferUncacheable); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(mocsUncacheable, argMocs(*kernel, 0)); + + EXPECT_TRUE(kernel->isPatched()); + retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(mocsUncacheable, cmdQueueMocs(pCmdQ)); + + kernel->unsetArg(0); + + retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &bufferCacheable1); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(mocsCacheable, argMocs(*kernel, 0)); + + EXPECT_TRUE(kernel->isPatched()); + retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); + + kernel->unsetArg(0); + + retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &bufferUncacheable); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(mocsUncacheable, argMocs(*kernel, 0)); + + EXPECT_TRUE(kernel->isPatched()); + retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(mocsUncacheable, cmdQueueMocs(pCmdQ)); +} } // namespace clMemLocallyUncachedResourceTests diff --git a/unit_tests/mocks/mock_gmm_client_context_base.cpp b/unit_tests/mocks/mock_gmm_client_context_base.cpp index 1489045a68..668f463c59 100644 --- a/unit_tests/mocks/mock_gmm_client_context_base.cpp +++ b/unit_tests/mocks/mock_gmm_client_context_base.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018 Intel Corporation + * Copyright (C) 2018-2019 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -14,6 +14,9 @@ MockGmmClientContextBase::MockGmmClientContextBase(GMM_CLIENT clientType, GmmExp MEMORY_OBJECT_CONTROL_STATE MockGmmClientContextBase::cachePolicyGetMemoryObject(GMM_RESOURCE_INFO *pResInfo, GMM_RESOURCE_USAGE_TYPE usage) { MEMORY_OBJECT_CONTROL_STATE retVal = {}; memset(&retVal, 0, sizeof(MEMORY_OBJECT_CONTROL_STATE)); + if (usage != GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) { + retVal.DwordValue = 4u; + } return retVal; }