diff --git a/opencl/source/context/context.cpp b/opencl/source/context/context.cpp
index 3fbeb55d6e..25411d7fdb 100644
--- a/opencl/source/context/context.cpp
+++ b/opencl/source/context/context.cpp
@@ -475,10 +475,14 @@ bool Context::isSingleDeviceContext() {
void Context::BufferPoolAllocator::initAggregatedSmallBuffers(Context *context) {
static constexpr cl_mem_flags flags{};
[[maybe_unused]] cl_int errcodeRet{};
+ Buffer::AdditionalBufferCreateArgs bufferCreateArgs{};
+ bufferCreateArgs.doNotProvidePerformanceHints = true;
+ bufferCreateArgs.makeAllocationLockable = true;
this->mainStorage = Buffer::create(context,
flags,
BufferPoolAllocator::aggregatedSmallBuffersPoolSize,
nullptr,
+ bufferCreateArgs,
errcodeRet);
if (this->mainStorage) {
this->chunkAllocator.reset(new HeapAllocator(BufferPoolAllocator::startingOffset,
diff --git a/opencl/source/context/context.h b/opencl/source/context/context.h
index 942b5d3401..778ae2dd28 100644
--- a/opencl/source/context/context.h
+++ b/opencl/source/context/context.h
@@ -50,6 +50,7 @@ class Context : public BaseObject<_cl_context> {
static constexpr auto startingOffset = chunkAlignment;
static_assert(aggregatedSmallBuffersPoolSize > smallBufferThreshold, "Largest allowed buffer needs to fit in pool");
+
Buffer *allocateBufferFromPool(const MemoryProperties &memoryProperties,
cl_mem_flags flags,
cl_mem_flags_intel flagsIntel,
diff --git a/opencl/source/mem_obj/buffer.cpp b/opencl/source/mem_obj/buffer.cpp
index c9828c4352..fc3e7af2a6 100644
--- a/opencl/source/mem_obj/buffer.cpp
+++ b/opencl/source/mem_obj/buffer.cpp
@@ -170,9 +170,20 @@ Buffer *Buffer::create(Context *context,
cl_mem_flags flags,
size_t size,
void *hostPtr,
+ AdditionalBufferCreateArgs &bufferCreateArgs,
cl_int &errcodeRet) {
return create(context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()),
- flags, 0, size, hostPtr, errcodeRet);
+ flags, 0, size, hostPtr, bufferCreateArgs, errcodeRet);
+}
+
+Buffer *Buffer::create(Context *context,
+ cl_mem_flags flags,
+ size_t size,
+ void *hostPtr,
+ cl_int &errcodeRet) {
+ AdditionalBufferCreateArgs bufferCreateArgs{};
+ return create(context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()),
+ flags, 0, size, hostPtr, bufferCreateArgs, errcodeRet);
}
bool inline copyHostPointer(Buffer *buffer,
@@ -233,6 +244,18 @@ Buffer *Buffer::create(Context *context,
size_t size,
void *hostPtr,
cl_int &errcodeRet) {
+ AdditionalBufferCreateArgs bufferCreateArgs{};
+ return create(context, memoryProperties, flags, flagsIntel, size, hostPtr, bufferCreateArgs, errcodeRet);
+}
+
+Buffer *Buffer::create(Context *context,
+ const MemoryProperties &memoryProperties,
+ cl_mem_flags flags,
+ cl_mem_flags_intel flagsIntel,
+ size_t size,
+ void *hostPtr,
+ AdditionalBufferCreateArgs &bufferCreateArgs,
+ cl_int &errcodeRet) {
errcodeRet = CL_SUCCESS;
Context::BufferPoolAllocator &bufferPoolAllocator = context->getBufferPoolAllocator();
@@ -355,7 +378,7 @@ Buffer *Buffer::create(Context *context,
allocationInfo.allocateMemory = false;
}
- if (hostPtr && context->isProvidingPerformanceHints()) {
+ if (!bufferCreateArgs.doNotProvidePerformanceHints && hostPtr && context->isProvidingPerformanceHints()) {
if (allocationInfo.zeroCopyAllowed) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_BUFFER_MEETS_ALIGNMENT_RESTRICTIONS, hostPtr, size);
} else {
@@ -367,7 +390,7 @@ Buffer *Buffer::create(Context *context,
allocationInfo.zeroCopyAllowed = false;
}
- if (allocationInfo.allocateMemory && context->isProvidingPerformanceHints()) {
+ if (!bufferCreateArgs.doNotProvidePerformanceHints && allocationInfo.allocateMemory && context->isProvidingPerformanceHints()) {
context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_BUFFER_NEEDS_ALLOCATE_MEMORY);
}
@@ -381,6 +404,7 @@ Buffer *Buffer::create(Context *context,
*hwInfo, context->getDeviceBitfieldForAllocation(rootDeviceIndex), context->isSingleDeviceContext());
allocProperties.flags.crossRootDeviceAccess = context->getRootDeviceIndices().size() > 1;
allocProperties.flags.preferCompressed = compressionEnabled;
+ allocProperties.makeDeviceBufferLockable = bufferCreateArgs.makeAllocationLockable;
if (allocationCpuPtr) {
allocationInfo.memory = memoryManager->createGraphicsAllocationFromExistingStorage(allocProperties, allocationCpuPtr, multiGraphicsAllocation);
diff --git a/opencl/source/mem_obj/buffer.h b/opencl/source/mem_obj/buffer.h
index c397807da5..1f9c19429c 100644
--- a/opencl/source/mem_obj/buffer.h
+++ b/opencl/source/mem_obj/buffer.h
@@ -57,6 +57,10 @@ extern ValidateInputAndCreateBufferFunc validateInputAndCreateBuffer;
class Buffer : public MemObj {
public:
+ struct AdditionalBufferCreateArgs {
+ bool doNotProvidePerformanceHints;
+ bool makeAllocationLockable;
+ };
constexpr static size_t maxBufferSizeForReadWriteOnCpu = 10 * MB;
constexpr static size_t maxBufferSizeForCopyOnCpu = 64 * KB;
constexpr static cl_ulong maskMagic = 0xFFFFFFFFFFFFFFFFLL;
@@ -80,6 +84,13 @@ class Buffer : public MemObj {
void *hostPtr,
cl_int &errcodeRet);
+ static Buffer *create(Context *context,
+ cl_mem_flags flags,
+ size_t size,
+ void *hostPtr,
+ AdditionalBufferCreateArgs &bufferCreateArgs,
+ cl_int &errcodeRet);
+
static Buffer *create(Context *context,
const MemoryProperties &properties,
cl_mem_flags flags,
@@ -88,6 +99,15 @@ class Buffer : public MemObj {
void *hostPtr,
cl_int &errcodeRet);
+ static Buffer *create(Context *context,
+ const MemoryProperties &properties,
+ cl_mem_flags flags,
+ cl_mem_flags_intel flagsIntel,
+ size_t size,
+ void *hostPtr,
+ AdditionalBufferCreateArgs &bufferCreateArgs,
+ cl_int &errcodeRet);
+
static Buffer *createSharedBuffer(Context *context,
cl_mem_flags flags,
SharingHandler *sharingHandler,
diff --git a/opencl/test/unit_test/context/driver_diagnostics_tests.cpp b/opencl/test/unit_test/context/driver_diagnostics_tests.cpp
index 6302935e73..5cada22c40 100644
--- a/opencl/test/unit_test/context/driver_diagnostics_tests.cpp
+++ b/opencl/test/unit_test/context/driver_diagnostics_tests.cpp
@@ -91,19 +91,23 @@ TEST_P(PerformanceHintBufferTest, GivenHostPtrAndSizeAlignmentsWhenBufferIsCreat
flags |= CL_MEM_FORCE_HOST_MEMORY_INTEL;
}
+ Buffer::AdditionalBufferCreateArgs bufferCreateArgs{};
+ bufferCreateArgs.doNotProvidePerformanceHints = !providePerformanceHint;
+
buffer = Buffer::create(
context,
flags,
sizeForBuffer,
(void *)addressForBuffer,
+ bufferCreateArgs,
retVal);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_NE(nullptr, buffer);
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS], addressForBuffer, sizeForBuffer, MemoryConstants::pageSize, MemoryConstants::pageSize);
- EXPECT_EQ(!(alignedSize && alignedAddress), containsHint(expectedHint, userData));
+ EXPECT_EQ(providePerformanceHint && !(alignedSize && alignedAddress), containsHint(expectedHint, userData));
snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_BUFFER_NEEDS_ALLOCATE_MEMORY], 0);
- EXPECT_EQ(!(alignedSize && alignedAddress), containsHint(expectedHint, userData));
+ EXPECT_EQ(providePerformanceHint && !(alignedSize && alignedAddress), containsHint(expectedHint, userData));
}
TEST_P(PerformanceHintCommandQueueTest, GivenProfilingFlagAndPreemptionFlagWhenCommandQueueIsCreatingThenContextProvidesProperHints) {
@@ -916,6 +920,7 @@ INSTANTIATE_TEST_CASE_P(
DriverDiagnosticsTests,
PerformanceHintBufferTest,
testing::Combine(
+ ::testing::Bool(),
::testing::Bool(),
::testing::Bool()));
diff --git a/opencl/test/unit_test/context/driver_diagnostics_tests.h b/opencl/test/unit_test/context/driver_diagnostics_tests.h
index f20208022a..ea3f3b7291 100644
--- a/opencl/test/unit_test/context/driver_diagnostics_tests.h
+++ b/opencl/test/unit_test/context/driver_diagnostics_tests.h
@@ -84,11 +84,11 @@ struct PerformanceHintTest : public DriverDiagnosticsTest,
};
struct PerformanceHintBufferTest : public PerformanceHintTest,
- public ::testing::WithParamInterface> {
+ public ::testing::WithParamInterface> {
void SetUp() override {
PerformanceHintTest::SetUp();
- std::tie(alignedAddress, alignedSize) = GetParam();
+ std::tie(alignedAddress, alignedSize, providePerformanceHint) = GetParam();
address = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
}
@@ -99,6 +99,7 @@ struct PerformanceHintBufferTest : public PerformanceHintTest,
}
bool alignedSize = false;
bool alignedAddress = false;
+ bool providePerformanceHint = false;
void *address = nullptr;
Buffer *buffer = nullptr;
};
diff --git a/opencl/test/unit_test/mem_obj/buffer_pool_alloc_tests.cpp b/opencl/test/unit_test/mem_obj/buffer_pool_alloc_tests.cpp
index 0ebed29e04..182c244577 100644
--- a/opencl/test/unit_test/mem_obj/buffer_pool_alloc_tests.cpp
+++ b/opencl/test/unit_test/mem_obj/buffer_pool_alloc_tests.cpp
@@ -19,24 +19,12 @@ namespace Ult {
using PoolAllocator = Context::BufferPoolAllocator;
using MockBufferPoolAllocator = MockContext::MockBufferPoolAllocator;
-template
+template
class AggregatedSmallBuffersTestTemplate : public ::testing::Test {
void SetUp() override {
- this->setUpImpl();
- }
-
- void setUpImpl() {
- DebugManager.flags.ExperimentalSmallBufferPoolAllocator.set(poolBufferFlag);
- this->deviceFactory = std::make_unique(1, 0);
- this->device = deviceFactory->rootDevices[0];
- this->mockMemoryManager = static_cast(device->getMemoryManager());
- this->mockMemoryManager->localMemorySupported[mockRootDeviceIndex] = true;
- this->setAllocationToFail(failMainStorageAllocation);
- cl_device_id devices[] = {device};
- this->context.reset(Context::create(nullptr, ClDeviceVector(devices, 1), nullptr, nullptr, retVal));
- ASSERT_EQ(retVal, CL_SUCCESS);
- this->setAllocationToFail(false);
- this->poolAllocator = static_cast(&context->smallBufferPoolAllocator);
+ if constexpr (runSetup) {
+ this->setUpImpl();
+ }
}
void TearDown() override {
@@ -62,6 +50,20 @@ class AggregatedSmallBuffersTestTemplate : public ::testing::Test {
cl_int retVal = CL_SUCCESS;
DebugManagerStateRestore restore;
+
+ void setUpImpl() {
+ DebugManager.flags.ExperimentalSmallBufferPoolAllocator.set(poolBufferFlag);
+ this->deviceFactory = std::make_unique(1, 0);
+ this->device = deviceFactory->rootDevices[0];
+ this->mockMemoryManager = static_cast(device->getMemoryManager());
+ this->mockMemoryManager->localMemorySupported[mockRootDeviceIndex] = true;
+ this->setAllocationToFail(failMainStorageAllocation);
+ cl_device_id devices[] = {device};
+ this->context.reset(Context::create(nullptr, ClDeviceVector(devices, 1), nullptr, nullptr, retVal));
+ ASSERT_EQ(retVal, CL_SUCCESS);
+ this->setAllocationToFail(false);
+ this->poolAllocator = static_cast(&context->smallBufferPoolAllocator);
+ }
};
using aggregatedSmallBuffersDefaultTest = AggregatedSmallBuffersTestTemplate<-1>;
@@ -84,6 +86,13 @@ TEST_F(aggregatedSmallBuffersDisabledTest, givenAggregatedSmallBuffersDisabledWh
using aggregatedSmallBuffersEnabledTest = AggregatedSmallBuffersTestTemplate<1>;
+TEST_F(aggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledWhenAllocatingMainStorageThenMakeDeviceBufferLockable) {
+ ASSERT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled());
+ ASSERT_NE(poolAllocator->mainStorage, nullptr);
+ ASSERT_NE(mockMemoryManager->lastAllocationProperties, nullptr);
+ EXPECT_TRUE(mockMemoryManager->lastAllocationProperties->makeDeviceBufferLockable);
+}
+
TEST_F(aggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndSizeLargerThanThresholdWhenBufferCreateCalledThenDoNotUsePool) {
ASSERT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled());
ASSERT_NE(poolAllocator->mainStorage, nullptr);
@@ -235,9 +244,9 @@ TEST_F(aggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndS
}
}
-using aggregatedSmallBuffersEnabledTestDoNotRunSetup = AggregatedSmallBuffersTestTemplate<1, true>;
+using aggregatedSmallBuffersEnabledTestFailPoolInit = AggregatedSmallBuffersTestTemplate<1, true>;
-TEST_F(aggregatedSmallBuffersEnabledTestDoNotRunSetup, givenAggregatedSmallBuffersEnabledAndSizeEqualToThresholdWhenBufferCreateCalledButPoolCreateFailedThenDoNotUsePool) {
+TEST_F(aggregatedSmallBuffersEnabledTestFailPoolInit, givenAggregatedSmallBuffersEnabledAndSizeEqualToThresholdWhenBufferCreateCalledButPoolCreateFailedThenDoNotUsePool) {
ASSERT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled());
ASSERT_EQ(poolAllocator->mainStorage, nullptr);
std::unique_ptr buffer(Buffer::create(context.get(), flags, size, hostPtr, retVal));
@@ -247,6 +256,19 @@ TEST_F(aggregatedSmallBuffersEnabledTestDoNotRunSetup, givenAggregatedSmallBuffe
EXPECT_EQ(poolAllocator->mainStorage, nullptr);
}
+using aggregatedSmallBuffersEnabledTestDoNotRunSetup = AggregatedSmallBuffersTestTemplate<1, false, false>;
+
+TEST_F(aggregatedSmallBuffersEnabledTestDoNotRunSetup, givenAggregatedSmallBuffersEnabledWhenPoolInitializedThenPerformanceHintsNotProvided) {
+ testing::internal::CaptureStdout();
+ DebugManager.flags.PrintDriverDiagnostics.set(1);
+ setUpImpl();
+ ASSERT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled());
+ ASSERT_NE(poolAllocator->mainStorage, nullptr);
+ ASSERT_NE(context->driverDiagnostics, nullptr);
+ std::string output = testing::internal::GetCapturedStdout();
+ EXPECT_EQ(0u, output.size());
+}
+
template
class AggregatedSmallBuffersApiTestTemplate : public ::testing::Test {
void SetUp() override {
diff --git a/shared/source/memory_manager/allocation_properties.h b/shared/source/memory_manager/allocation_properties.h
index b41e90ca9a..d507db9e12 100644
--- a/shared/source/memory_manager/allocation_properties.h
+++ b/shared/source/memory_manager/allocation_properties.h
@@ -48,6 +48,7 @@ struct AllocationProperties {
OsContext *osContext = nullptr;
bool useMmapObject = true;
uint32_t cacheRegion = 0;
+ bool makeDeviceBufferLockable = false;
AllocationProperties(uint32_t rootDeviceIndex, size_t size,
AllocationType allocationType, DeviceBitfield subDevicesBitfieldParam)
diff --git a/shared/source/memory_manager/definitions/storage_info.cpp b/shared/source/memory_manager/definitions/storage_info.cpp
index 29f2555563..4353180870 100644
--- a/shared/source/memory_manager/definitions/storage_info.cpp
+++ b/shared/source/memory_manager/definitions/storage_info.cpp
@@ -35,7 +35,7 @@ StorageInfo MemoryManager::createStorageInfoFromProperties(const AllocationPrope
StorageInfo storageInfo{preferredTile, allTilesValue};
storageInfo.subDeviceBitfield = properties.subDevicesBitfield;
- storageInfo.isLockable = GraphicsAllocation::isLockable(properties.allocationType);
+ storageInfo.isLockable = GraphicsAllocation::isLockable(properties.allocationType) || (properties.makeDeviceBufferLockable && properties.allocationType == AllocationType::BUFFER);
storageInfo.cpuVisibleSegment = GraphicsAllocation::isCpuAccessRequired(properties.allocationType);
AppResourceHelper::copyResourceTagStr(storageInfo.resourceTag, properties.allocationType,
diff --git a/shared/test/common/mocks/mock_memory_manager.cpp b/shared/test/common/mocks/mock_memory_manager.cpp
index 325033da80..e993c666b5 100644
--- a/shared/test/common/mocks/mock_memory_manager.cpp
+++ b/shared/test/common/mocks/mock_memory_manager.cpp
@@ -56,6 +56,7 @@ GraphicsAllocation *MockMemoryManager::allocateGraphicsMemoryWithProperties(cons
}
GraphicsAllocation *MockMemoryManager::allocateGraphicsMemoryWithProperties(const AllocationProperties &properties, const void *ptr) {
+ lastAllocationProperties.reset(new AllocationProperties(properties));
if (returnFakeAllocation) {
return new GraphicsAllocation(properties.rootDeviceIndex, properties.allocationType, reinterpret_cast(dummyAddress), reinterpret_cast(ptr), properties.size, 0, MemoryPool::System4KBPages, maxOsContextCount);
}
diff --git a/shared/test/common/mocks/mock_memory_manager.h b/shared/test/common/mocks/mock_memory_manager.h
index 35358073ce..420891804b 100644
--- a/shared/test/common/mocks/mock_memory_manager.h
+++ b/shared/test/common/mocks/mock_memory_manager.h
@@ -261,6 +261,7 @@ class MockMemoryManager : public MemoryManagerCreate {
MemAdviseFlags memAdviseFlags{};
MemoryManager::AllocationStatus populateOsHandlesResult = MemoryManager::AllocationStatus::Success;
GraphicsAllocation *allocateGraphicsMemoryForNonSvmHostPtrResult = nullptr;
+ std::unique_ptr lastAllocationProperties = nullptr;
};
class MockAllocSysMemAgnosticMemoryManager : public OsAgnosticMemoryManager {
diff --git a/shared/test/unit_test/memory_manager/storage_info_tests.cpp b/shared/test/unit_test/memory_manager/storage_info_tests.cpp
index 9d7c3b7855..06b1c45572 100644
--- a/shared/test/unit_test/memory_manager/storage_info_tests.cpp
+++ b/shared/test/unit_test/memory_manager/storage_info_tests.cpp
@@ -472,6 +472,25 @@ TEST_F(MultiDeviceStorageInfoTest, givenGraphicsAllocationThatIsLockableWhenCrea
}
}
+TEST_F(MultiDeviceStorageInfoTest, givenAllocationTypeBufferWhenCreatingStorageInfoThenIsLockableFlagIsSetCorrectly) {
+ AllocationProperties properties{mockRootDeviceIndex, false, 1u, AllocationType::BUFFER, false, singleTileMask};
+ {
+ properties.makeDeviceBufferLockable = false;
+ auto storageInfo = memoryManager->createStorageInfoFromProperties(properties);
+ EXPECT_FALSE(storageInfo.isLockable);
+ }
+ {
+ properties.makeDeviceBufferLockable = true;
+ auto storageInfo = memoryManager->createStorageInfoFromProperties(properties);
+ EXPECT_TRUE(storageInfo.isLockable);
+ }
+ {
+ properties.allocationType = AllocationType::IMAGE;
+ auto storageInfo = memoryManager->createStorageInfoFromProperties(properties);
+ EXPECT_FALSE(storageInfo.isLockable);
+ }
+}
+
TEST_F(MultiDeviceStorageInfoTest, givenGpuTimestampAllocationWhenUsingSingleTileDeviceThenExpectRegularAllocationStorageInfo) {
AllocationProperties properties{mockRootDeviceIndex,
false,