diff --git a/core/memory_properties/memory_properties_flags_common.inl b/core/memory_properties/memory_properties_flags_common.inl index 0f2361f9d3..6e73031923 100644 --- a/core/memory_properties/memory_properties_flags_common.inl +++ b/core/memory_properties/memory_properties_flags_common.inl @@ -21,6 +21,7 @@ struct MemoryFlags { uint32_t accessFlagsUnrestricted : 1; uint32_t noAccess : 1; uint32_t locallyUncachedResource : 1; + uint32_t locallyUncachedInSurfaceState : 1; uint32_t allowUnrestrictedSize : 1; uint32_t forceSharedPhysicalMemory : 1; }; diff --git a/public/cl_ext_private.h b/public/cl_ext_private.h index b08461a43c..5a8b5a280d 100644 --- a/public/cl_ext_private.h +++ b/public/cl_ext_private.h @@ -54,6 +54,7 @@ using cl_unified_shared_memory_capabilities_intel = cl_bitfield; #define CL_MEM_FLAGS_INTEL 0x10001 #define CL_MEM_LOCALLY_UNCACHED_RESOURCE (1 << 18) +#define CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE (1 << 21) // Used with clEnqueueVerifyMemory #define CL_MEM_COMPARE_EQUAL 0u diff --git a/runtime/helpers/memory_properties_flags_helpers_base.inl b/runtime/helpers/memory_properties_flags_helpers_base.inl index bb1459005f..c72454ec26 100644 --- a/runtime/helpers/memory_properties_flags_helpers_base.inl +++ b/runtime/helpers/memory_properties_flags_helpers_base.inl @@ -63,6 +63,11 @@ MemoryPropertiesFlags MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(M if (isValueSet(properties.flags_intel, CL_MEM_LOCALLY_UNCACHED_RESOURCE)) { memoryPropertiesFlags.flags.locallyUncachedResource = true; } + + if (isValueSet(properties.flags_intel, CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE)) { + memoryPropertiesFlags.flags.locallyUncachedInSurfaceState = true; + } + if (isValueSet(properties.flags, CL_MEM_FORCE_SHARED_PHYSICAL_MEMORY_INTEL)) { memoryPropertiesFlags.flags.forceSharedPhysicalMemory = true; } @@ -72,4 +77,4 @@ MemoryPropertiesFlags MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(M return memoryPropertiesFlags; } -} // namespace NEO \ No newline at end of file +} // namespace NEO diff --git a/runtime/kernel/kernel.cpp b/runtime/kernel/kernel.cpp index efe3630ce4..fa66af5ac7 100644 --- a/runtime/kernel/kernel.cpp +++ b/runtime/kernel/kernel.cpp @@ -1204,8 +1204,8 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex, if (requiresSshForBuffers()) { auto surfaceState = ptrOffset(getSurfaceStateHeap(), kernelArgInfo.offsetHeap); buffer->setArgStateful(surfaceState, forceNonAuxMode, disableL3, isAuxTranslationKernel, kernelArgInfo.isReadOnly); - kernelArguments[argIndex].isUncacheable = buffer->isMemObjUncacheable(); } + kernelArguments[argIndex].isUncacheable = buffer->isMemObjUncacheable(); addAllocationToCacheFlushVector(argIndex, buffer->getGraphicsAllocation()); return CL_SUCCESS; } else { diff --git a/runtime/mem_obj/buffer.cpp b/runtime/mem_obj/buffer.cpp index 49411abbf1..7603c00ad1 100644 --- a/runtime/mem_obj/buffer.cpp +++ b/runtime/mem_obj/buffer.cpp @@ -551,7 +551,7 @@ uint32_t Buffer::getMocsValue(bool disableL3Cache, bool isReadOnlyArgument) cons isAligned(bufferSize); auto gmmHelper = executionEnvironment->getGmmHelper(); - if (!disableL3Cache && !isMemObjUncacheable() && (alignedMemObj || readOnlyMemObj || !isMemObjZeroCopy())) { + if (!disableL3Cache && !isMemObjUncacheableForSurfaceState() && (alignedMemObj || readOnlyMemObj || !isMemObjZeroCopy())) { return gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); } else { return gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED); diff --git a/runtime/mem_obj/mem_obj.cpp b/runtime/mem_obj/mem_obj.cpp index 3a938ed636..7446a2ecb2 100644 --- a/runtime/mem_obj/mem_obj.cpp +++ b/runtime/mem_obj/mem_obj.cpp @@ -236,6 +236,10 @@ bool MemObj::isMemObjUncacheable() const { return isValueSet(properties.flags_intel, CL_MEM_LOCALLY_UNCACHED_RESOURCE); } +bool MemObj::isMemObjUncacheableForSurfaceState() const { + return isAnyBitSet(properties.flags_intel, CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE | CL_MEM_LOCALLY_UNCACHED_RESOURCE); +} + GraphicsAllocation *MemObj::getGraphicsAllocation() const { return graphicsAllocation; } diff --git a/runtime/mem_obj/mem_obj.h b/runtime/mem_obj/mem_obj.h index 8811188d79..708052ecd4 100644 --- a/runtime/mem_obj/mem_obj.h +++ b/runtime/mem_obj/mem_obj.h @@ -76,6 +76,7 @@ class MemObj : public BaseObject<_cl_mem> { bool isMemObjZeroCopy() const; bool isMemObjWithHostPtrSVM() const; bool isMemObjUncacheable() const; + bool isMemObjUncacheableForSurfaceState() const; virtual void transferDataToHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) { UNRECOVERABLE_IF(true); }; virtual void transferDataFromHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) { UNRECOVERABLE_IF(true); }; diff --git a/runtime/mem_obj/mem_obj_helper.h b/runtime/mem_obj/mem_obj_helper.h index 104731c65e..7ff552cf3b 100644 --- a/runtime/mem_obj/mem_obj_helper.h +++ b/runtime/mem_obj/mem_obj_helper.h @@ -138,7 +138,7 @@ class MemObjHelper { CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR | CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS; - properties.flags_intel |= CL_MEM_LOCALLY_UNCACHED_RESOURCE; + properties.flags_intel |= CL_MEM_LOCALLY_UNCACHED_RESOURCE | CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE; } static inline void addImageMemoryProperties(MemoryProperties &properties) { diff --git a/unit_tests/api/cl_create_buffer_tests.cpp b/unit_tests/api/cl_create_buffer_tests.cpp index 8518cbedee..127a7c6be9 100644 --- a/unit_tests/api/cl_create_buffer_tests.cpp +++ b/unit_tests/api/cl_create_buffer_tests.cpp @@ -115,7 +115,7 @@ TEST_P(clCreateBufferValidFlagsIntelTests, GivenValidFlagsIntelWhenCreatingBuffe static cl_mem_flags validFlagsIntel[] = { CL_MEM_LOCALLY_UNCACHED_RESOURCE, -}; + CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE}; INSTANTIATE_TEST_CASE_P( CreateBufferCheckFlagsIntel, diff --git a/unit_tests/api/cl_mem_locally_uncached_resource_tests.cpp b/unit_tests/api/cl_mem_locally_uncached_resource_tests.cpp index 6db042b174..171d0a9425 100644 --- a/unit_tests/api/cl_mem_locally_uncached_resource_tests.cpp +++ b/unit_tests/api/cl_mem_locally_uncached_resource_tests.cpp @@ -53,6 +53,7 @@ const size_t localWorkSize[3] = {256, 1, 1}; const cl_mem_properties_intel *propertiesCacheable = nullptr; const cl_mem_properties_intel propertiesUncacheable[] = {CL_MEM_FLAGS_INTEL, CL_MEM_LOCALLY_UNCACHED_RESOURCE, 0}; +const cl_mem_properties_intel propertiesUncacheableInSurfaceState[] = {CL_MEM_FLAGS_INTEL, CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE, 0}; using clMemLocallyUncachedResourceFixture = Test>; @@ -124,6 +125,74 @@ HWTEST_F(clMemLocallyUncachedResourceFixture, GivenAtLeastOneLocallyUncacheableR EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); } +HWTEST_F(clMemLocallyUncachedResourceFixture, givenBuffersThatAreUncachedInSurfaceStateWhenStatelessIsProgrammedItIsCached) { + cl_int retVal = CL_SUCCESS; + std::unique_ptr kernel(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer"), &retVal)); + EXPECT_EQ(CL_SUCCESS, retVal); + + auto bufferCacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, n * sizeof(float), nullptr, nullptr); + auto pBufferCacheable1 = clUniquePtr(castToObject(bufferCacheable1)); + auto bufferCacheable2 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, n * sizeof(float), nullptr, nullptr); + auto pBufferCacheable2 = clUniquePtr(castToObject(bufferCacheable2)); + + auto bufferUncacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheableInSurfaceState, n * sizeof(float), nullptr, nullptr); + auto pBufferUncacheable1 = clUniquePtr(castToObject(bufferUncacheable1)); + auto bufferUncacheable2 = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheableInSurfaceState, n * sizeof(float), nullptr, nullptr); + auto pBufferUncacheable2 = clUniquePtr(castToObject(bufferUncacheable2)); + + auto mocsCacheable = kernel->getDevice().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); + auto mocsUncacheable = kernel->getDevice().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED); + + retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &bufferCacheable1); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(mocsCacheable, argMocs(*kernel, 0)); + + retVal = clSetKernelArg(kernel.get(), 1, sizeof(cl_mem), &bufferCacheable2); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(mocsCacheable, argMocs(*kernel, 1)); + + EXPECT_TRUE(kernel->isPatched()); + retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); + + retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &bufferUncacheable1); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(mocsUncacheable, argMocs(*kernel, 0)); + + EXPECT_TRUE(kernel->isPatched()); + retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); + + retVal = clSetKernelArg(kernel.get(), 1, sizeof(cl_mem), &bufferUncacheable2); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(mocsUncacheable, argMocs(*kernel, 0)); + + EXPECT_TRUE(kernel->isPatched()); + retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); + + retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &bufferCacheable1); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(mocsCacheable, argMocs(*kernel, 0)); + + EXPECT_TRUE(kernel->isPatched()); + retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); + + retVal = clSetKernelArg(kernel.get(), 1, sizeof(cl_mem), &bufferCacheable2); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(mocsCacheable, argMocs(*kernel, 1)); + + EXPECT_TRUE(kernel->isPatched()); + retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(mocsCacheable, cmdQueueMocs(pCmdQ)); +} + HWTEST_F(clMemLocallyUncachedResourceFixture, WhenUnsettingUncacheableResourceFromKernelThanKernelContinuesToCorrectlySetMocs) { cl_int retVal = CL_SUCCESS; std::unique_ptr kernel(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer"), &retVal)); diff --git a/unit_tests/helpers/mem_properties_parser_helper_tests.cpp b/unit_tests/helpers/mem_properties_parser_helper_tests.cpp index 986f5229a4..33aa013847 100644 --- a/unit_tests/helpers/mem_properties_parser_helper_tests.cpp +++ b/unit_tests/helpers/mem_properties_parser_helper_tests.cpp @@ -29,7 +29,7 @@ TEST(MemoryPropertiesParser, givenValidPropertiesWhenParsingMemoryPropertiesThen CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR | CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS, CL_MEM_FLAGS_INTEL, - CL_MEM_LOCALLY_UNCACHED_RESOURCE, + CL_MEM_LOCALLY_UNCACHED_RESOURCE | CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE, 0}; MemoryProperties propertiesStruct; @@ -67,4 +67,4 @@ TEST(MemoryPropertiesParser, givenDifferentParametersWhenCallingFillCachePolicyI } } } -} \ No newline at end of file +} diff --git a/unit_tests/helpers/memory_properties_flags_helpers_tests.cpp b/unit_tests/helpers/memory_properties_flags_helpers_tests.cpp index c5965e1b11..6a8615445b 100644 --- a/unit_tests/helpers/memory_properties_flags_helpers_tests.cpp +++ b/unit_tests/helpers/memory_properties_flags_helpers_tests.cpp @@ -56,6 +56,10 @@ TEST(MemoryPropertiesFlags, givenValidPropertiesWhenCreateMemoryPropertiesFlagsT properties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(memoryProperties); EXPECT_TRUE(properties.flags.locallyUncachedResource); + memoryProperties.flags_intel = CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE; + properties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(memoryProperties); + EXPECT_TRUE(properties.flags.locallyUncachedInSurfaceState); + properties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_FORCE_SHARED_PHYSICAL_MEMORY_INTEL); EXPECT_TRUE(properties.flags.forceSharedPhysicalMemory); } @@ -108,4 +112,4 @@ TEST(MemoryPropertiesFlags, givenClAllowUnrestrictedSizeFlagWhenCreateMemoryProp memoryProperties.flags_intel = 0; properties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(memoryProperties); EXPECT_FALSE(properties.flags.allowUnrestrictedSize); -} \ No newline at end of file +} diff --git a/unit_tests/mem_obj/mem_obj_helper_tests.cpp b/unit_tests/mem_obj/mem_obj_helper_tests.cpp index 9767703947..33cdf87706 100644 --- a/unit_tests/mem_obj/mem_obj_helper_tests.cpp +++ b/unit_tests/mem_obj/mem_obj_helper_tests.cpp @@ -79,6 +79,10 @@ TEST(MemObjHelper, givenValidPropertiesWhenValidatingMemoryPropertiesThenTrueIsR EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForBuffer(properties)); EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForImage(properties, nullptr)); + properties.flags_intel = CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE; + EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForBuffer(properties)); + EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForImage(properties, nullptr)); + properties.flags = 0; EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForBuffer(properties)); EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForImage(properties, nullptr));