Add new flag to disable L3 for stateful accesses.

- With this flag resource will not be cached in L3 for stateful accesses.

Change-Id: Icf9a393ab92d55c2cdf30444420ea40da0d5630c
Signed-off-by: Mrozek, Michal <michal.mrozek@intel.com>
This commit is contained in:
Mrozek, Michal 2019-08-30 08:18:34 +02:00
parent 08a3046e4d
commit 33f6c7f0da
13 changed files with 97 additions and 8 deletions

View File

@ -21,6 +21,7 @@ struct MemoryFlags {
uint32_t accessFlagsUnrestricted : 1;
uint32_t noAccess : 1;
uint32_t locallyUncachedResource : 1;
uint32_t locallyUncachedInSurfaceState : 1;
uint32_t allowUnrestrictedSize : 1;
uint32_t forceSharedPhysicalMemory : 1;
};

View File

@ -54,6 +54,7 @@ using cl_unified_shared_memory_capabilities_intel = cl_bitfield;
#define CL_MEM_FLAGS_INTEL 0x10001
#define CL_MEM_LOCALLY_UNCACHED_RESOURCE (1 << 18)
#define CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE (1 << 21)
// Used with clEnqueueVerifyMemory
#define CL_MEM_COMPARE_EQUAL 0u

View File

@ -63,6 +63,11 @@ MemoryPropertiesFlags MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(M
if (isValueSet(properties.flags_intel, CL_MEM_LOCALLY_UNCACHED_RESOURCE)) {
memoryPropertiesFlags.flags.locallyUncachedResource = true;
}
if (isValueSet(properties.flags_intel, CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE)) {
memoryPropertiesFlags.flags.locallyUncachedInSurfaceState = true;
}
if (isValueSet(properties.flags, CL_MEM_FORCE_SHARED_PHYSICAL_MEMORY_INTEL)) {
memoryPropertiesFlags.flags.forceSharedPhysicalMemory = true;
}
@ -72,4 +77,4 @@ MemoryPropertiesFlags MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(M
return memoryPropertiesFlags;
}
} // namespace NEO
} // namespace NEO

View File

@ -1204,8 +1204,8 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex,
if (requiresSshForBuffers()) {
auto surfaceState = ptrOffset(getSurfaceStateHeap(), kernelArgInfo.offsetHeap);
buffer->setArgStateful(surfaceState, forceNonAuxMode, disableL3, isAuxTranslationKernel, kernelArgInfo.isReadOnly);
kernelArguments[argIndex].isUncacheable = buffer->isMemObjUncacheable();
}
kernelArguments[argIndex].isUncacheable = buffer->isMemObjUncacheable();
addAllocationToCacheFlushVector(argIndex, buffer->getGraphicsAllocation());
return CL_SUCCESS;
} else {

View File

@ -551,7 +551,7 @@ uint32_t Buffer::getMocsValue(bool disableL3Cache, bool isReadOnlyArgument) cons
isAligned<MemoryConstants::cacheLineSize>(bufferSize);
auto gmmHelper = executionEnvironment->getGmmHelper();
if (!disableL3Cache && !isMemObjUncacheable() && (alignedMemObj || readOnlyMemObj || !isMemObjZeroCopy())) {
if (!disableL3Cache && !isMemObjUncacheableForSurfaceState() && (alignedMemObj || readOnlyMemObj || !isMemObjZeroCopy())) {
return gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
} else {
return gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED);

View File

@ -236,6 +236,10 @@ bool MemObj::isMemObjUncacheable() const {
return isValueSet(properties.flags_intel, CL_MEM_LOCALLY_UNCACHED_RESOURCE);
}
bool MemObj::isMemObjUncacheableForSurfaceState() const {
return isAnyBitSet(properties.flags_intel, CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE | CL_MEM_LOCALLY_UNCACHED_RESOURCE);
}
GraphicsAllocation *MemObj::getGraphicsAllocation() const {
return graphicsAllocation;
}

View File

@ -76,6 +76,7 @@ class MemObj : public BaseObject<_cl_mem> {
bool isMemObjZeroCopy() const;
bool isMemObjWithHostPtrSVM() const;
bool isMemObjUncacheable() const;
bool isMemObjUncacheableForSurfaceState() const;
virtual void transferDataToHostPtr(MemObjSizeArray &copySize, MemObjOffsetArray &copyOffset) { UNRECOVERABLE_IF(true); };
virtual void transferDataFromHostPtr(MemObjSizeArray &copySize, MemObjOffsetArray &copyOffset) { UNRECOVERABLE_IF(true); };

View File

@ -138,7 +138,7 @@ class MemObjHelper {
CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY |
CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR |
CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS;
properties.flags_intel |= CL_MEM_LOCALLY_UNCACHED_RESOURCE;
properties.flags_intel |= CL_MEM_LOCALLY_UNCACHED_RESOURCE | CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE;
}
static inline void addImageMemoryProperties(MemoryProperties &properties) {

View File

@ -115,7 +115,7 @@ TEST_P(clCreateBufferValidFlagsIntelTests, GivenValidFlagsIntelWhenCreatingBuffe
static cl_mem_flags validFlagsIntel[] = {
CL_MEM_LOCALLY_UNCACHED_RESOURCE,
};
CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE};
INSTANTIATE_TEST_CASE_P(
CreateBufferCheckFlagsIntel,

View File

@ -53,6 +53,7 @@ const size_t localWorkSize[3] = {256, 1, 1};
const cl_mem_properties_intel *propertiesCacheable = nullptr;
const cl_mem_properties_intel propertiesUncacheable[] = {CL_MEM_FLAGS_INTEL, CL_MEM_LOCALLY_UNCACHED_RESOURCE, 0};
const cl_mem_properties_intel propertiesUncacheableInSurfaceState[] = {CL_MEM_FLAGS_INTEL, CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE, 0};
using clMemLocallyUncachedResourceFixture = Test<HelloWorldFixture<HelloWorldFixtureFactory>>;
@ -124,6 +125,74 @@ HWTEST_F(clMemLocallyUncachedResourceFixture, GivenAtLeastOneLocallyUncacheableR
EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
}
HWTEST_F(clMemLocallyUncachedResourceFixture, givenBuffersThatAreUncachedInSurfaceStateWhenStatelessIsProgrammedItIsCached) {
cl_int retVal = CL_SUCCESS;
std::unique_ptr<Kernel> kernel(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer"), &retVal));
EXPECT_EQ(CL_SUCCESS, retVal);
auto bufferCacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, n * sizeof(float), nullptr, nullptr);
auto pBufferCacheable1 = clUniquePtr(castToObject<Buffer>(bufferCacheable1));
auto bufferCacheable2 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, n * sizeof(float), nullptr, nullptr);
auto pBufferCacheable2 = clUniquePtr(castToObject<Buffer>(bufferCacheable2));
auto bufferUncacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheableInSurfaceState, n * sizeof(float), nullptr, nullptr);
auto pBufferUncacheable1 = clUniquePtr(castToObject<Buffer>(bufferUncacheable1));
auto bufferUncacheable2 = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheableInSurfaceState, n * sizeof(float), nullptr, nullptr);
auto pBufferUncacheable2 = clUniquePtr(castToObject<Buffer>(bufferUncacheable2));
auto mocsCacheable = kernel->getDevice().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
auto mocsUncacheable = kernel->getDevice().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED);
retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &bufferCacheable1);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 0));
retVal = clSetKernelArg(kernel.get(), 1, sizeof(cl_mem), &bufferCacheable2);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 1));
EXPECT_TRUE(kernel->isPatched());
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &bufferUncacheable1);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsUncacheable, argMocs<FamilyType>(*kernel, 0));
EXPECT_TRUE(kernel->isPatched());
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
retVal = clSetKernelArg(kernel.get(), 1, sizeof(cl_mem), &bufferUncacheable2);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsUncacheable, argMocs<FamilyType>(*kernel, 0));
EXPECT_TRUE(kernel->isPatched());
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &bufferCacheable1);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 0));
EXPECT_TRUE(kernel->isPatched());
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
retVal = clSetKernelArg(kernel.get(), 1, sizeof(cl_mem), &bufferCacheable2);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 1));
EXPECT_TRUE(kernel->isPatched());
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
}
HWTEST_F(clMemLocallyUncachedResourceFixture, WhenUnsettingUncacheableResourceFromKernelThanKernelContinuesToCorrectlySetMocs) {
cl_int retVal = CL_SUCCESS;
std::unique_ptr<Kernel> kernel(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer"), &retVal));

View File

@ -29,7 +29,7 @@ TEST(MemoryPropertiesParser, givenValidPropertiesWhenParsingMemoryPropertiesThen
CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR |
CL_MEM_USE_HOST_PTR | CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS,
CL_MEM_FLAGS_INTEL,
CL_MEM_LOCALLY_UNCACHED_RESOURCE,
CL_MEM_LOCALLY_UNCACHED_RESOURCE | CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE,
0};
MemoryProperties propertiesStruct;
@ -67,4 +67,4 @@ TEST(MemoryPropertiesParser, givenDifferentParametersWhenCallingFillCachePolicyI
}
}
}
}
}

View File

@ -56,6 +56,10 @@ TEST(MemoryPropertiesFlags, givenValidPropertiesWhenCreateMemoryPropertiesFlagsT
properties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(memoryProperties);
EXPECT_TRUE(properties.flags.locallyUncachedResource);
memoryProperties.flags_intel = CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE;
properties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(memoryProperties);
EXPECT_TRUE(properties.flags.locallyUncachedInSurfaceState);
properties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_FORCE_SHARED_PHYSICAL_MEMORY_INTEL);
EXPECT_TRUE(properties.flags.forceSharedPhysicalMemory);
}
@ -108,4 +112,4 @@ TEST(MemoryPropertiesFlags, givenClAllowUnrestrictedSizeFlagWhenCreateMemoryProp
memoryProperties.flags_intel = 0;
properties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(memoryProperties);
EXPECT_FALSE(properties.flags.allowUnrestrictedSize);
}
}

View File

@ -79,6 +79,10 @@ TEST(MemObjHelper, givenValidPropertiesWhenValidatingMemoryPropertiesThenTrueIsR
EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForBuffer(properties));
EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForImage(properties, nullptr));
properties.flags_intel = CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE;
EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForBuffer(properties));
EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForImage(properties, nullptr));
properties.flags = 0;
EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForBuffer(properties));
EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForImage(properties, nullptr));