Add new flag to disable L3 for stateful accesses.
- With this flag resource will not be cached in L3 for stateful accesses. Change-Id: Icf9a393ab92d55c2cdf30444420ea40da0d5630c Signed-off-by: Mrozek, Michal <michal.mrozek@intel.com>
This commit is contained in:
parent
08a3046e4d
commit
33f6c7f0da
|
@ -21,6 +21,7 @@ struct MemoryFlags {
|
|||
uint32_t accessFlagsUnrestricted : 1;
|
||||
uint32_t noAccess : 1;
|
||||
uint32_t locallyUncachedResource : 1;
|
||||
uint32_t locallyUncachedInSurfaceState : 1;
|
||||
uint32_t allowUnrestrictedSize : 1;
|
||||
uint32_t forceSharedPhysicalMemory : 1;
|
||||
};
|
||||
|
|
|
@ -54,6 +54,7 @@ using cl_unified_shared_memory_capabilities_intel = cl_bitfield;
|
|||
|
||||
#define CL_MEM_FLAGS_INTEL 0x10001
|
||||
#define CL_MEM_LOCALLY_UNCACHED_RESOURCE (1 << 18)
|
||||
#define CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE (1 << 21)
|
||||
|
||||
// Used with clEnqueueVerifyMemory
|
||||
#define CL_MEM_COMPARE_EQUAL 0u
|
||||
|
|
|
@ -63,6 +63,11 @@ MemoryPropertiesFlags MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(M
|
|||
if (isValueSet(properties.flags_intel, CL_MEM_LOCALLY_UNCACHED_RESOURCE)) {
|
||||
memoryPropertiesFlags.flags.locallyUncachedResource = true;
|
||||
}
|
||||
|
||||
if (isValueSet(properties.flags_intel, CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE)) {
|
||||
memoryPropertiesFlags.flags.locallyUncachedInSurfaceState = true;
|
||||
}
|
||||
|
||||
if (isValueSet(properties.flags, CL_MEM_FORCE_SHARED_PHYSICAL_MEMORY_INTEL)) {
|
||||
memoryPropertiesFlags.flags.forceSharedPhysicalMemory = true;
|
||||
}
|
||||
|
@ -72,4 +77,4 @@ MemoryPropertiesFlags MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(M
|
|||
return memoryPropertiesFlags;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
} // namespace NEO
|
||||
|
|
|
@ -1204,8 +1204,8 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex,
|
|||
if (requiresSshForBuffers()) {
|
||||
auto surfaceState = ptrOffset(getSurfaceStateHeap(), kernelArgInfo.offsetHeap);
|
||||
buffer->setArgStateful(surfaceState, forceNonAuxMode, disableL3, isAuxTranslationKernel, kernelArgInfo.isReadOnly);
|
||||
kernelArguments[argIndex].isUncacheable = buffer->isMemObjUncacheable();
|
||||
}
|
||||
kernelArguments[argIndex].isUncacheable = buffer->isMemObjUncacheable();
|
||||
addAllocationToCacheFlushVector(argIndex, buffer->getGraphicsAllocation());
|
||||
return CL_SUCCESS;
|
||||
} else {
|
||||
|
|
|
@ -551,7 +551,7 @@ uint32_t Buffer::getMocsValue(bool disableL3Cache, bool isReadOnlyArgument) cons
|
|||
isAligned<MemoryConstants::cacheLineSize>(bufferSize);
|
||||
|
||||
auto gmmHelper = executionEnvironment->getGmmHelper();
|
||||
if (!disableL3Cache && !isMemObjUncacheable() && (alignedMemObj || readOnlyMemObj || !isMemObjZeroCopy())) {
|
||||
if (!disableL3Cache && !isMemObjUncacheableForSurfaceState() && (alignedMemObj || readOnlyMemObj || !isMemObjZeroCopy())) {
|
||||
return gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
|
||||
} else {
|
||||
return gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED);
|
||||
|
|
|
@ -236,6 +236,10 @@ bool MemObj::isMemObjUncacheable() const {
|
|||
return isValueSet(properties.flags_intel, CL_MEM_LOCALLY_UNCACHED_RESOURCE);
|
||||
}
|
||||
|
||||
bool MemObj::isMemObjUncacheableForSurfaceState() const {
|
||||
return isAnyBitSet(properties.flags_intel, CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE | CL_MEM_LOCALLY_UNCACHED_RESOURCE);
|
||||
}
|
||||
|
||||
GraphicsAllocation *MemObj::getGraphicsAllocation() const {
|
||||
return graphicsAllocation;
|
||||
}
|
||||
|
|
|
@ -76,6 +76,7 @@ class MemObj : public BaseObject<_cl_mem> {
|
|||
bool isMemObjZeroCopy() const;
|
||||
bool isMemObjWithHostPtrSVM() const;
|
||||
bool isMemObjUncacheable() const;
|
||||
bool isMemObjUncacheableForSurfaceState() const;
|
||||
virtual void transferDataToHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) { UNRECOVERABLE_IF(true); };
|
||||
virtual void transferDataFromHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) { UNRECOVERABLE_IF(true); };
|
||||
|
||||
|
|
|
@ -138,7 +138,7 @@ class MemObjHelper {
|
|||
CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY |
|
||||
CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR |
|
||||
CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS;
|
||||
properties.flags_intel |= CL_MEM_LOCALLY_UNCACHED_RESOURCE;
|
||||
properties.flags_intel |= CL_MEM_LOCALLY_UNCACHED_RESOURCE | CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE;
|
||||
}
|
||||
|
||||
static inline void addImageMemoryProperties(MemoryProperties &properties) {
|
||||
|
|
|
@ -115,7 +115,7 @@ TEST_P(clCreateBufferValidFlagsIntelTests, GivenValidFlagsIntelWhenCreatingBuffe
|
|||
|
||||
static cl_mem_flags validFlagsIntel[] = {
|
||||
CL_MEM_LOCALLY_UNCACHED_RESOURCE,
|
||||
};
|
||||
CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(
|
||||
CreateBufferCheckFlagsIntel,
|
||||
|
|
|
@ -53,6 +53,7 @@ const size_t localWorkSize[3] = {256, 1, 1};
|
|||
|
||||
const cl_mem_properties_intel *propertiesCacheable = nullptr;
|
||||
const cl_mem_properties_intel propertiesUncacheable[] = {CL_MEM_FLAGS_INTEL, CL_MEM_LOCALLY_UNCACHED_RESOURCE, 0};
|
||||
const cl_mem_properties_intel propertiesUncacheableInSurfaceState[] = {CL_MEM_FLAGS_INTEL, CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE, 0};
|
||||
|
||||
using clMemLocallyUncachedResourceFixture = Test<HelloWorldFixture<HelloWorldFixtureFactory>>;
|
||||
|
||||
|
@ -124,6 +125,74 @@ HWTEST_F(clMemLocallyUncachedResourceFixture, GivenAtLeastOneLocallyUncacheableR
|
|||
EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
|
||||
}
|
||||
|
||||
HWTEST_F(clMemLocallyUncachedResourceFixture, givenBuffersThatAreUncachedInSurfaceStateWhenStatelessIsProgrammedItIsCached) {
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
std::unique_ptr<Kernel> kernel(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer"), &retVal));
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
||||
auto bufferCacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, n * sizeof(float), nullptr, nullptr);
|
||||
auto pBufferCacheable1 = clUniquePtr(castToObject<Buffer>(bufferCacheable1));
|
||||
auto bufferCacheable2 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, n * sizeof(float), nullptr, nullptr);
|
||||
auto pBufferCacheable2 = clUniquePtr(castToObject<Buffer>(bufferCacheable2));
|
||||
|
||||
auto bufferUncacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheableInSurfaceState, n * sizeof(float), nullptr, nullptr);
|
||||
auto pBufferUncacheable1 = clUniquePtr(castToObject<Buffer>(bufferUncacheable1));
|
||||
auto bufferUncacheable2 = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheableInSurfaceState, n * sizeof(float), nullptr, nullptr);
|
||||
auto pBufferUncacheable2 = clUniquePtr(castToObject<Buffer>(bufferUncacheable2));
|
||||
|
||||
auto mocsCacheable = kernel->getDevice().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
|
||||
auto mocsUncacheable = kernel->getDevice().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED);
|
||||
|
||||
retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &bufferCacheable1);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 0));
|
||||
|
||||
retVal = clSetKernelArg(kernel.get(), 1, sizeof(cl_mem), &bufferCacheable2);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 1));
|
||||
|
||||
EXPECT_TRUE(kernel->isPatched());
|
||||
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
|
||||
|
||||
retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &bufferUncacheable1);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(mocsUncacheable, argMocs<FamilyType>(*kernel, 0));
|
||||
|
||||
EXPECT_TRUE(kernel->isPatched());
|
||||
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
|
||||
|
||||
retVal = clSetKernelArg(kernel.get(), 1, sizeof(cl_mem), &bufferUncacheable2);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(mocsUncacheable, argMocs<FamilyType>(*kernel, 0));
|
||||
|
||||
EXPECT_TRUE(kernel->isPatched());
|
||||
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
|
||||
|
||||
retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &bufferCacheable1);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 0));
|
||||
|
||||
EXPECT_TRUE(kernel->isPatched());
|
||||
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
|
||||
|
||||
retVal = clSetKernelArg(kernel.get(), 1, sizeof(cl_mem), &bufferCacheable2);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 1));
|
||||
|
||||
EXPECT_TRUE(kernel->isPatched());
|
||||
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
|
||||
}
|
||||
|
||||
HWTEST_F(clMemLocallyUncachedResourceFixture, WhenUnsettingUncacheableResourceFromKernelThanKernelContinuesToCorrectlySetMocs) {
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
std::unique_ptr<Kernel> kernel(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer"), &retVal));
|
||||
|
|
|
@ -29,7 +29,7 @@ TEST(MemoryPropertiesParser, givenValidPropertiesWhenParsingMemoryPropertiesThen
|
|||
CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR |
|
||||
CL_MEM_USE_HOST_PTR | CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS,
|
||||
CL_MEM_FLAGS_INTEL,
|
||||
CL_MEM_LOCALLY_UNCACHED_RESOURCE,
|
||||
CL_MEM_LOCALLY_UNCACHED_RESOURCE | CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE,
|
||||
0};
|
||||
|
||||
MemoryProperties propertiesStruct;
|
||||
|
@ -67,4 +67,4 @@ TEST(MemoryPropertiesParser, givenDifferentParametersWhenCallingFillCachePolicyI
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -56,6 +56,10 @@ TEST(MemoryPropertiesFlags, givenValidPropertiesWhenCreateMemoryPropertiesFlagsT
|
|||
properties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(memoryProperties);
|
||||
EXPECT_TRUE(properties.flags.locallyUncachedResource);
|
||||
|
||||
memoryProperties.flags_intel = CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE;
|
||||
properties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(memoryProperties);
|
||||
EXPECT_TRUE(properties.flags.locallyUncachedInSurfaceState);
|
||||
|
||||
properties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_FORCE_SHARED_PHYSICAL_MEMORY_INTEL);
|
||||
EXPECT_TRUE(properties.flags.forceSharedPhysicalMemory);
|
||||
}
|
||||
|
@ -108,4 +112,4 @@ TEST(MemoryPropertiesFlags, givenClAllowUnrestrictedSizeFlagWhenCreateMemoryProp
|
|||
memoryProperties.flags_intel = 0;
|
||||
properties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(memoryProperties);
|
||||
EXPECT_FALSE(properties.flags.allowUnrestrictedSize);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -79,6 +79,10 @@ TEST(MemObjHelper, givenValidPropertiesWhenValidatingMemoryPropertiesThenTrueIsR
|
|||
EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForBuffer(properties));
|
||||
EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForImage(properties, nullptr));
|
||||
|
||||
properties.flags_intel = CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE;
|
||||
EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForBuffer(properties));
|
||||
EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForImage(properties, nullptr));
|
||||
|
||||
properties.flags = 0;
|
||||
EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForBuffer(properties));
|
||||
EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForImage(properties, nullptr));
|
||||
|
|
Loading…
Reference in New Issue