Add new flag to disable L3 for stateful accesses.
- With this flag resource will not be cached in L3 for stateful accesses. Change-Id: Icf9a393ab92d55c2cdf30444420ea40da0d5630c Signed-off-by: Mrozek, Michal <michal.mrozek@intel.com>
This commit is contained in:
parent
08a3046e4d
commit
33f6c7f0da
|
@ -21,6 +21,7 @@ struct MemoryFlags {
|
||||||
uint32_t accessFlagsUnrestricted : 1;
|
uint32_t accessFlagsUnrestricted : 1;
|
||||||
uint32_t noAccess : 1;
|
uint32_t noAccess : 1;
|
||||||
uint32_t locallyUncachedResource : 1;
|
uint32_t locallyUncachedResource : 1;
|
||||||
|
uint32_t locallyUncachedInSurfaceState : 1;
|
||||||
uint32_t allowUnrestrictedSize : 1;
|
uint32_t allowUnrestrictedSize : 1;
|
||||||
uint32_t forceSharedPhysicalMemory : 1;
|
uint32_t forceSharedPhysicalMemory : 1;
|
||||||
};
|
};
|
||||||
|
|
|
@ -54,6 +54,7 @@ using cl_unified_shared_memory_capabilities_intel = cl_bitfield;
|
||||||
|
|
||||||
#define CL_MEM_FLAGS_INTEL 0x10001
|
#define CL_MEM_FLAGS_INTEL 0x10001
|
||||||
#define CL_MEM_LOCALLY_UNCACHED_RESOURCE (1 << 18)
|
#define CL_MEM_LOCALLY_UNCACHED_RESOURCE (1 << 18)
|
||||||
|
#define CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE (1 << 21)
|
||||||
|
|
||||||
// Used with clEnqueueVerifyMemory
|
// Used with clEnqueueVerifyMemory
|
||||||
#define CL_MEM_COMPARE_EQUAL 0u
|
#define CL_MEM_COMPARE_EQUAL 0u
|
||||||
|
|
|
@ -63,6 +63,11 @@ MemoryPropertiesFlags MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(M
|
||||||
if (isValueSet(properties.flags_intel, CL_MEM_LOCALLY_UNCACHED_RESOURCE)) {
|
if (isValueSet(properties.flags_intel, CL_MEM_LOCALLY_UNCACHED_RESOURCE)) {
|
||||||
memoryPropertiesFlags.flags.locallyUncachedResource = true;
|
memoryPropertiesFlags.flags.locallyUncachedResource = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (isValueSet(properties.flags_intel, CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE)) {
|
||||||
|
memoryPropertiesFlags.flags.locallyUncachedInSurfaceState = true;
|
||||||
|
}
|
||||||
|
|
||||||
if (isValueSet(properties.flags, CL_MEM_FORCE_SHARED_PHYSICAL_MEMORY_INTEL)) {
|
if (isValueSet(properties.flags, CL_MEM_FORCE_SHARED_PHYSICAL_MEMORY_INTEL)) {
|
||||||
memoryPropertiesFlags.flags.forceSharedPhysicalMemory = true;
|
memoryPropertiesFlags.flags.forceSharedPhysicalMemory = true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1204,8 +1204,8 @@ cl_int Kernel::setArgBuffer(uint32_t argIndex,
|
||||||
if (requiresSshForBuffers()) {
|
if (requiresSshForBuffers()) {
|
||||||
auto surfaceState = ptrOffset(getSurfaceStateHeap(), kernelArgInfo.offsetHeap);
|
auto surfaceState = ptrOffset(getSurfaceStateHeap(), kernelArgInfo.offsetHeap);
|
||||||
buffer->setArgStateful(surfaceState, forceNonAuxMode, disableL3, isAuxTranslationKernel, kernelArgInfo.isReadOnly);
|
buffer->setArgStateful(surfaceState, forceNonAuxMode, disableL3, isAuxTranslationKernel, kernelArgInfo.isReadOnly);
|
||||||
kernelArguments[argIndex].isUncacheable = buffer->isMemObjUncacheable();
|
|
||||||
}
|
}
|
||||||
|
kernelArguments[argIndex].isUncacheable = buffer->isMemObjUncacheable();
|
||||||
addAllocationToCacheFlushVector(argIndex, buffer->getGraphicsAllocation());
|
addAllocationToCacheFlushVector(argIndex, buffer->getGraphicsAllocation());
|
||||||
return CL_SUCCESS;
|
return CL_SUCCESS;
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -551,7 +551,7 @@ uint32_t Buffer::getMocsValue(bool disableL3Cache, bool isReadOnlyArgument) cons
|
||||||
isAligned<MemoryConstants::cacheLineSize>(bufferSize);
|
isAligned<MemoryConstants::cacheLineSize>(bufferSize);
|
||||||
|
|
||||||
auto gmmHelper = executionEnvironment->getGmmHelper();
|
auto gmmHelper = executionEnvironment->getGmmHelper();
|
||||||
if (!disableL3Cache && !isMemObjUncacheable() && (alignedMemObj || readOnlyMemObj || !isMemObjZeroCopy())) {
|
if (!disableL3Cache && !isMemObjUncacheableForSurfaceState() && (alignedMemObj || readOnlyMemObj || !isMemObjZeroCopy())) {
|
||||||
return gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
|
return gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
|
||||||
} else {
|
} else {
|
||||||
return gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED);
|
return gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED);
|
||||||
|
|
|
@ -236,6 +236,10 @@ bool MemObj::isMemObjUncacheable() const {
|
||||||
return isValueSet(properties.flags_intel, CL_MEM_LOCALLY_UNCACHED_RESOURCE);
|
return isValueSet(properties.flags_intel, CL_MEM_LOCALLY_UNCACHED_RESOURCE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool MemObj::isMemObjUncacheableForSurfaceState() const {
|
||||||
|
return isAnyBitSet(properties.flags_intel, CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE | CL_MEM_LOCALLY_UNCACHED_RESOURCE);
|
||||||
|
}
|
||||||
|
|
||||||
GraphicsAllocation *MemObj::getGraphicsAllocation() const {
|
GraphicsAllocation *MemObj::getGraphicsAllocation() const {
|
||||||
return graphicsAllocation;
|
return graphicsAllocation;
|
||||||
}
|
}
|
||||||
|
|
|
@ -76,6 +76,7 @@ class MemObj : public BaseObject<_cl_mem> {
|
||||||
bool isMemObjZeroCopy() const;
|
bool isMemObjZeroCopy() const;
|
||||||
bool isMemObjWithHostPtrSVM() const;
|
bool isMemObjWithHostPtrSVM() const;
|
||||||
bool isMemObjUncacheable() const;
|
bool isMemObjUncacheable() const;
|
||||||
|
bool isMemObjUncacheableForSurfaceState() const;
|
||||||
virtual void transferDataToHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) { UNRECOVERABLE_IF(true); };
|
virtual void transferDataToHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) { UNRECOVERABLE_IF(true); };
|
||||||
virtual void transferDataFromHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) { UNRECOVERABLE_IF(true); };
|
virtual void transferDataFromHostPtr(MemObjSizeArray ©Size, MemObjOffsetArray ©Offset) { UNRECOVERABLE_IF(true); };
|
||||||
|
|
||||||
|
|
|
@ -138,7 +138,7 @@ class MemObjHelper {
|
||||||
CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY |
|
CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY |
|
||||||
CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR |
|
CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR |
|
||||||
CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS;
|
CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS;
|
||||||
properties.flags_intel |= CL_MEM_LOCALLY_UNCACHED_RESOURCE;
|
properties.flags_intel |= CL_MEM_LOCALLY_UNCACHED_RESOURCE | CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void addImageMemoryProperties(MemoryProperties &properties) {
|
static inline void addImageMemoryProperties(MemoryProperties &properties) {
|
||||||
|
|
|
@ -115,7 +115,7 @@ TEST_P(clCreateBufferValidFlagsIntelTests, GivenValidFlagsIntelWhenCreatingBuffe
|
||||||
|
|
||||||
static cl_mem_flags validFlagsIntel[] = {
|
static cl_mem_flags validFlagsIntel[] = {
|
||||||
CL_MEM_LOCALLY_UNCACHED_RESOURCE,
|
CL_MEM_LOCALLY_UNCACHED_RESOURCE,
|
||||||
};
|
CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE};
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(
|
INSTANTIATE_TEST_CASE_P(
|
||||||
CreateBufferCheckFlagsIntel,
|
CreateBufferCheckFlagsIntel,
|
||||||
|
|
|
@ -53,6 +53,7 @@ const size_t localWorkSize[3] = {256, 1, 1};
|
||||||
|
|
||||||
const cl_mem_properties_intel *propertiesCacheable = nullptr;
|
const cl_mem_properties_intel *propertiesCacheable = nullptr;
|
||||||
const cl_mem_properties_intel propertiesUncacheable[] = {CL_MEM_FLAGS_INTEL, CL_MEM_LOCALLY_UNCACHED_RESOURCE, 0};
|
const cl_mem_properties_intel propertiesUncacheable[] = {CL_MEM_FLAGS_INTEL, CL_MEM_LOCALLY_UNCACHED_RESOURCE, 0};
|
||||||
|
const cl_mem_properties_intel propertiesUncacheableInSurfaceState[] = {CL_MEM_FLAGS_INTEL, CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE, 0};
|
||||||
|
|
||||||
using clMemLocallyUncachedResourceFixture = Test<HelloWorldFixture<HelloWorldFixtureFactory>>;
|
using clMemLocallyUncachedResourceFixture = Test<HelloWorldFixture<HelloWorldFixtureFactory>>;
|
||||||
|
|
||||||
|
@ -124,6 +125,74 @@ HWTEST_F(clMemLocallyUncachedResourceFixture, GivenAtLeastOneLocallyUncacheableR
|
||||||
EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
|
EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST_F(clMemLocallyUncachedResourceFixture, givenBuffersThatAreUncachedInSurfaceStateWhenStatelessIsProgrammedItIsCached) {
|
||||||
|
cl_int retVal = CL_SUCCESS;
|
||||||
|
std::unique_ptr<Kernel> kernel(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer"), &retVal));
|
||||||
|
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||||
|
|
||||||
|
auto bufferCacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, n * sizeof(float), nullptr, nullptr);
|
||||||
|
auto pBufferCacheable1 = clUniquePtr(castToObject<Buffer>(bufferCacheable1));
|
||||||
|
auto bufferCacheable2 = clCreateBufferWithPropertiesINTEL(context, propertiesCacheable, n * sizeof(float), nullptr, nullptr);
|
||||||
|
auto pBufferCacheable2 = clUniquePtr(castToObject<Buffer>(bufferCacheable2));
|
||||||
|
|
||||||
|
auto bufferUncacheable1 = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheableInSurfaceState, n * sizeof(float), nullptr, nullptr);
|
||||||
|
auto pBufferUncacheable1 = clUniquePtr(castToObject<Buffer>(bufferUncacheable1));
|
||||||
|
auto bufferUncacheable2 = clCreateBufferWithPropertiesINTEL(context, propertiesUncacheableInSurfaceState, n * sizeof(float), nullptr, nullptr);
|
||||||
|
auto pBufferUncacheable2 = clUniquePtr(castToObject<Buffer>(bufferUncacheable2));
|
||||||
|
|
||||||
|
auto mocsCacheable = kernel->getDevice().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
|
||||||
|
auto mocsUncacheable = kernel->getDevice().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED);
|
||||||
|
|
||||||
|
retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &bufferCacheable1);
|
||||||
|
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||||
|
EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 0));
|
||||||
|
|
||||||
|
retVal = clSetKernelArg(kernel.get(), 1, sizeof(cl_mem), &bufferCacheable2);
|
||||||
|
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||||
|
EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 1));
|
||||||
|
|
||||||
|
EXPECT_TRUE(kernel->isPatched());
|
||||||
|
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
|
||||||
|
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||||
|
EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
|
||||||
|
|
||||||
|
retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &bufferUncacheable1);
|
||||||
|
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||||
|
EXPECT_EQ(mocsUncacheable, argMocs<FamilyType>(*kernel, 0));
|
||||||
|
|
||||||
|
EXPECT_TRUE(kernel->isPatched());
|
||||||
|
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
|
||||||
|
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||||
|
EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
|
||||||
|
|
||||||
|
retVal = clSetKernelArg(kernel.get(), 1, sizeof(cl_mem), &bufferUncacheable2);
|
||||||
|
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||||
|
EXPECT_EQ(mocsUncacheable, argMocs<FamilyType>(*kernel, 0));
|
||||||
|
|
||||||
|
EXPECT_TRUE(kernel->isPatched());
|
||||||
|
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
|
||||||
|
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||||
|
EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
|
||||||
|
|
||||||
|
retVal = clSetKernelArg(kernel.get(), 0, sizeof(cl_mem), &bufferCacheable1);
|
||||||
|
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||||
|
EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 0));
|
||||||
|
|
||||||
|
EXPECT_TRUE(kernel->isPatched());
|
||||||
|
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
|
||||||
|
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||||
|
EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
|
||||||
|
|
||||||
|
retVal = clSetKernelArg(kernel.get(), 1, sizeof(cl_mem), &bufferCacheable2);
|
||||||
|
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||||
|
EXPECT_EQ(mocsCacheable, argMocs<FamilyType>(*kernel, 1));
|
||||||
|
|
||||||
|
EXPECT_TRUE(kernel->isPatched());
|
||||||
|
retVal = clEnqueueNDRangeKernel(pCmdQ, kernel.get(), 1, nullptr, globalWorkSize, localWorkSize, 0, nullptr, nullptr);
|
||||||
|
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||||
|
EXPECT_EQ(mocsCacheable, cmdQueueMocs<FamilyType>(pCmdQ));
|
||||||
|
}
|
||||||
|
|
||||||
HWTEST_F(clMemLocallyUncachedResourceFixture, WhenUnsettingUncacheableResourceFromKernelThanKernelContinuesToCorrectlySetMocs) {
|
HWTEST_F(clMemLocallyUncachedResourceFixture, WhenUnsettingUncacheableResourceFromKernelThanKernelContinuesToCorrectlySetMocs) {
|
||||||
cl_int retVal = CL_SUCCESS;
|
cl_int retVal = CL_SUCCESS;
|
||||||
std::unique_ptr<Kernel> kernel(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer"), &retVal));
|
std::unique_ptr<Kernel> kernel(Kernel::create(pProgram, *pProgram->getKernelInfo("CopyBuffer"), &retVal));
|
||||||
|
|
|
@ -29,7 +29,7 @@ TEST(MemoryPropertiesParser, givenValidPropertiesWhenParsingMemoryPropertiesThen
|
||||||
CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR |
|
CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR |
|
||||||
CL_MEM_USE_HOST_PTR | CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS,
|
CL_MEM_USE_HOST_PTR | CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS,
|
||||||
CL_MEM_FLAGS_INTEL,
|
CL_MEM_FLAGS_INTEL,
|
||||||
CL_MEM_LOCALLY_UNCACHED_RESOURCE,
|
CL_MEM_LOCALLY_UNCACHED_RESOURCE | CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE,
|
||||||
0};
|
0};
|
||||||
|
|
||||||
MemoryProperties propertiesStruct;
|
MemoryProperties propertiesStruct;
|
||||||
|
|
|
@ -56,6 +56,10 @@ TEST(MemoryPropertiesFlags, givenValidPropertiesWhenCreateMemoryPropertiesFlagsT
|
||||||
properties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(memoryProperties);
|
properties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(memoryProperties);
|
||||||
EXPECT_TRUE(properties.flags.locallyUncachedResource);
|
EXPECT_TRUE(properties.flags.locallyUncachedResource);
|
||||||
|
|
||||||
|
memoryProperties.flags_intel = CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE;
|
||||||
|
properties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(memoryProperties);
|
||||||
|
EXPECT_TRUE(properties.flags.locallyUncachedInSurfaceState);
|
||||||
|
|
||||||
properties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_FORCE_SHARED_PHYSICAL_MEMORY_INTEL);
|
properties = MemoryPropertiesFlagsParser::createMemoryPropertiesFlags(CL_MEM_FORCE_SHARED_PHYSICAL_MEMORY_INTEL);
|
||||||
EXPECT_TRUE(properties.flags.forceSharedPhysicalMemory);
|
EXPECT_TRUE(properties.flags.forceSharedPhysicalMemory);
|
||||||
}
|
}
|
||||||
|
|
|
@ -79,6 +79,10 @@ TEST(MemObjHelper, givenValidPropertiesWhenValidatingMemoryPropertiesThenTrueIsR
|
||||||
EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForBuffer(properties));
|
EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForBuffer(properties));
|
||||||
EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForImage(properties, nullptr));
|
EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForImage(properties, nullptr));
|
||||||
|
|
||||||
|
properties.flags_intel = CL_MEM_LOCALLY_UNCACHED_SURFACE_STATE_RESOURCE;
|
||||||
|
EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForBuffer(properties));
|
||||||
|
EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForImage(properties, nullptr));
|
||||||
|
|
||||||
properties.flags = 0;
|
properties.flags = 0;
|
||||||
EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForBuffer(properties));
|
EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForBuffer(properties));
|
||||||
EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForImage(properties, nullptr));
|
EXPECT_TRUE(MemObjHelper::validateMemoryPropertiesForImage(properties, nullptr));
|
||||||
|
|
Loading…
Reference in New Issue