diff --git a/opencl/source/context/context.cpp b/opencl/source/context/context.cpp index 3fbeb55d6e..25411d7fdb 100644 --- a/opencl/source/context/context.cpp +++ b/opencl/source/context/context.cpp @@ -475,10 +475,14 @@ bool Context::isSingleDeviceContext() { void Context::BufferPoolAllocator::initAggregatedSmallBuffers(Context *context) { static constexpr cl_mem_flags flags{}; [[maybe_unused]] cl_int errcodeRet{}; + Buffer::AdditionalBufferCreateArgs bufferCreateArgs{}; + bufferCreateArgs.doNotProvidePerformanceHints = true; + bufferCreateArgs.makeAllocationLockable = true; this->mainStorage = Buffer::create(context, flags, BufferPoolAllocator::aggregatedSmallBuffersPoolSize, nullptr, + bufferCreateArgs, errcodeRet); if (this->mainStorage) { this->chunkAllocator.reset(new HeapAllocator(BufferPoolAllocator::startingOffset, diff --git a/opencl/source/context/context.h b/opencl/source/context/context.h index 942b5d3401..778ae2dd28 100644 --- a/opencl/source/context/context.h +++ b/opencl/source/context/context.h @@ -50,6 +50,7 @@ class Context : public BaseObject<_cl_context> { static constexpr auto startingOffset = chunkAlignment; static_assert(aggregatedSmallBuffersPoolSize > smallBufferThreshold, "Largest allowed buffer needs to fit in pool"); + Buffer *allocateBufferFromPool(const MemoryProperties &memoryProperties, cl_mem_flags flags, cl_mem_flags_intel flagsIntel, diff --git a/opencl/source/mem_obj/buffer.cpp b/opencl/source/mem_obj/buffer.cpp index c9828c4352..fc3e7af2a6 100644 --- a/opencl/source/mem_obj/buffer.cpp +++ b/opencl/source/mem_obj/buffer.cpp @@ -170,9 +170,20 @@ Buffer *Buffer::create(Context *context, cl_mem_flags flags, size_t size, void *hostPtr, + AdditionalBufferCreateArgs &bufferCreateArgs, cl_int &errcodeRet) { return create(context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), - flags, 0, size, hostPtr, errcodeRet); + flags, 0, size, hostPtr, bufferCreateArgs, errcodeRet); +} + +Buffer *Buffer::create(Context *context, + cl_mem_flags flags, + size_t size, + void *hostPtr, + cl_int &errcodeRet) { + AdditionalBufferCreateArgs bufferCreateArgs{}; + return create(context, ClMemoryPropertiesHelper::createMemoryProperties(flags, 0, 0, &context->getDevice(0)->getDevice()), + flags, 0, size, hostPtr, bufferCreateArgs, errcodeRet); } bool inline copyHostPointer(Buffer *buffer, @@ -233,6 +244,18 @@ Buffer *Buffer::create(Context *context, size_t size, void *hostPtr, cl_int &errcodeRet) { + AdditionalBufferCreateArgs bufferCreateArgs{}; + return create(context, memoryProperties, flags, flagsIntel, size, hostPtr, bufferCreateArgs, errcodeRet); +} + +Buffer *Buffer::create(Context *context, + const MemoryProperties &memoryProperties, + cl_mem_flags flags, + cl_mem_flags_intel flagsIntel, + size_t size, + void *hostPtr, + AdditionalBufferCreateArgs &bufferCreateArgs, + cl_int &errcodeRet) { errcodeRet = CL_SUCCESS; Context::BufferPoolAllocator &bufferPoolAllocator = context->getBufferPoolAllocator(); @@ -355,7 +378,7 @@ Buffer *Buffer::create(Context *context, allocationInfo.allocateMemory = false; } - if (hostPtr && context->isProvidingPerformanceHints()) { + if (!bufferCreateArgs.doNotProvidePerformanceHints && hostPtr && context->isProvidingPerformanceHints()) { if (allocationInfo.zeroCopyAllowed) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_BUFFER_MEETS_ALIGNMENT_RESTRICTIONS, hostPtr, size); } else { @@ -367,7 +390,7 @@ Buffer *Buffer::create(Context *context, allocationInfo.zeroCopyAllowed = false; } - if (allocationInfo.allocateMemory && context->isProvidingPerformanceHints()) { + if (!bufferCreateArgs.doNotProvidePerformanceHints && allocationInfo.allocateMemory && context->isProvidingPerformanceHints()) { context->providePerformanceHint(CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL, CL_BUFFER_NEEDS_ALLOCATE_MEMORY); } @@ -381,6 +404,7 @@ Buffer *Buffer::create(Context *context, *hwInfo, context->getDeviceBitfieldForAllocation(rootDeviceIndex), context->isSingleDeviceContext()); allocProperties.flags.crossRootDeviceAccess = context->getRootDeviceIndices().size() > 1; allocProperties.flags.preferCompressed = compressionEnabled; + allocProperties.makeDeviceBufferLockable = bufferCreateArgs.makeAllocationLockable; if (allocationCpuPtr) { allocationInfo.memory = memoryManager->createGraphicsAllocationFromExistingStorage(allocProperties, allocationCpuPtr, multiGraphicsAllocation); diff --git a/opencl/source/mem_obj/buffer.h b/opencl/source/mem_obj/buffer.h index c397807da5..1f9c19429c 100644 --- a/opencl/source/mem_obj/buffer.h +++ b/opencl/source/mem_obj/buffer.h @@ -57,6 +57,10 @@ extern ValidateInputAndCreateBufferFunc validateInputAndCreateBuffer; class Buffer : public MemObj { public: + struct AdditionalBufferCreateArgs { + bool doNotProvidePerformanceHints; + bool makeAllocationLockable; + }; constexpr static size_t maxBufferSizeForReadWriteOnCpu = 10 * MB; constexpr static size_t maxBufferSizeForCopyOnCpu = 64 * KB; constexpr static cl_ulong maskMagic = 0xFFFFFFFFFFFFFFFFLL; @@ -80,6 +84,13 @@ class Buffer : public MemObj { void *hostPtr, cl_int &errcodeRet); + static Buffer *create(Context *context, + cl_mem_flags flags, + size_t size, + void *hostPtr, + AdditionalBufferCreateArgs &bufferCreateArgs, + cl_int &errcodeRet); + static Buffer *create(Context *context, const MemoryProperties &properties, cl_mem_flags flags, @@ -88,6 +99,15 @@ class Buffer : public MemObj { void *hostPtr, cl_int &errcodeRet); + static Buffer *create(Context *context, + const MemoryProperties &properties, + cl_mem_flags flags, + cl_mem_flags_intel flagsIntel, + size_t size, + void *hostPtr, + AdditionalBufferCreateArgs &bufferCreateArgs, + cl_int &errcodeRet); + static Buffer *createSharedBuffer(Context *context, cl_mem_flags flags, SharingHandler *sharingHandler, diff --git a/opencl/test/unit_test/context/driver_diagnostics_tests.cpp b/opencl/test/unit_test/context/driver_diagnostics_tests.cpp index 6302935e73..5cada22c40 100644 --- a/opencl/test/unit_test/context/driver_diagnostics_tests.cpp +++ b/opencl/test/unit_test/context/driver_diagnostics_tests.cpp @@ -91,19 +91,23 @@ TEST_P(PerformanceHintBufferTest, GivenHostPtrAndSizeAlignmentsWhenBufferIsCreat flags |= CL_MEM_FORCE_HOST_MEMORY_INTEL; } + Buffer::AdditionalBufferCreateArgs bufferCreateArgs{}; + bufferCreateArgs.doNotProvidePerformanceHints = !providePerformanceHint; + buffer = Buffer::create( context, flags, sizeForBuffer, (void *)addressForBuffer, + bufferCreateArgs, retVal); EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_NE(nullptr, buffer); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_BUFFER_DOESNT_MEET_ALIGNMENT_RESTRICTIONS], addressForBuffer, sizeForBuffer, MemoryConstants::pageSize, MemoryConstants::pageSize); - EXPECT_EQ(!(alignedSize && alignedAddress), containsHint(expectedHint, userData)); + EXPECT_EQ(providePerformanceHint && !(alignedSize && alignedAddress), containsHint(expectedHint, userData)); snprintf(expectedHint, DriverDiagnostics::maxHintStringSize, DriverDiagnostics::hintFormat[CL_BUFFER_NEEDS_ALLOCATE_MEMORY], 0); - EXPECT_EQ(!(alignedSize && alignedAddress), containsHint(expectedHint, userData)); + EXPECT_EQ(providePerformanceHint && !(alignedSize && alignedAddress), containsHint(expectedHint, userData)); } TEST_P(PerformanceHintCommandQueueTest, GivenProfilingFlagAndPreemptionFlagWhenCommandQueueIsCreatingThenContextProvidesProperHints) { @@ -916,6 +920,7 @@ INSTANTIATE_TEST_CASE_P( DriverDiagnosticsTests, PerformanceHintBufferTest, testing::Combine( + ::testing::Bool(), ::testing::Bool(), ::testing::Bool())); diff --git a/opencl/test/unit_test/context/driver_diagnostics_tests.h b/opencl/test/unit_test/context/driver_diagnostics_tests.h index f20208022a..ea3f3b7291 100644 --- a/opencl/test/unit_test/context/driver_diagnostics_tests.h +++ b/opencl/test/unit_test/context/driver_diagnostics_tests.h @@ -84,11 +84,11 @@ struct PerformanceHintTest : public DriverDiagnosticsTest, }; struct PerformanceHintBufferTest : public PerformanceHintTest, - public ::testing::WithParamInterface> { + public ::testing::WithParamInterface> { void SetUp() override { PerformanceHintTest::SetUp(); - std::tie(alignedAddress, alignedSize) = GetParam(); + std::tie(alignedAddress, alignedSize, providePerformanceHint) = GetParam(); address = alignedMalloc(2 * MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize); } @@ -99,6 +99,7 @@ struct PerformanceHintBufferTest : public PerformanceHintTest, } bool alignedSize = false; bool alignedAddress = false; + bool providePerformanceHint = false; void *address = nullptr; Buffer *buffer = nullptr; }; diff --git a/opencl/test/unit_test/mem_obj/buffer_pool_alloc_tests.cpp b/opencl/test/unit_test/mem_obj/buffer_pool_alloc_tests.cpp index 0ebed29e04..182c244577 100644 --- a/opencl/test/unit_test/mem_obj/buffer_pool_alloc_tests.cpp +++ b/opencl/test/unit_test/mem_obj/buffer_pool_alloc_tests.cpp @@ -19,24 +19,12 @@ namespace Ult { using PoolAllocator = Context::BufferPoolAllocator; using MockBufferPoolAllocator = MockContext::MockBufferPoolAllocator; -template +template class AggregatedSmallBuffersTestTemplate : public ::testing::Test { void SetUp() override { - this->setUpImpl(); - } - - void setUpImpl() { - DebugManager.flags.ExperimentalSmallBufferPoolAllocator.set(poolBufferFlag); - this->deviceFactory = std::make_unique(1, 0); - this->device = deviceFactory->rootDevices[0]; - this->mockMemoryManager = static_cast(device->getMemoryManager()); - this->mockMemoryManager->localMemorySupported[mockRootDeviceIndex] = true; - this->setAllocationToFail(failMainStorageAllocation); - cl_device_id devices[] = {device}; - this->context.reset(Context::create(nullptr, ClDeviceVector(devices, 1), nullptr, nullptr, retVal)); - ASSERT_EQ(retVal, CL_SUCCESS); - this->setAllocationToFail(false); - this->poolAllocator = static_cast(&context->smallBufferPoolAllocator); + if constexpr (runSetup) { + this->setUpImpl(); + } } void TearDown() override { @@ -62,6 +50,20 @@ class AggregatedSmallBuffersTestTemplate : public ::testing::Test { cl_int retVal = CL_SUCCESS; DebugManagerStateRestore restore; + + void setUpImpl() { + DebugManager.flags.ExperimentalSmallBufferPoolAllocator.set(poolBufferFlag); + this->deviceFactory = std::make_unique(1, 0); + this->device = deviceFactory->rootDevices[0]; + this->mockMemoryManager = static_cast(device->getMemoryManager()); + this->mockMemoryManager->localMemorySupported[mockRootDeviceIndex] = true; + this->setAllocationToFail(failMainStorageAllocation); + cl_device_id devices[] = {device}; + this->context.reset(Context::create(nullptr, ClDeviceVector(devices, 1), nullptr, nullptr, retVal)); + ASSERT_EQ(retVal, CL_SUCCESS); + this->setAllocationToFail(false); + this->poolAllocator = static_cast(&context->smallBufferPoolAllocator); + } }; using aggregatedSmallBuffersDefaultTest = AggregatedSmallBuffersTestTemplate<-1>; @@ -84,6 +86,13 @@ TEST_F(aggregatedSmallBuffersDisabledTest, givenAggregatedSmallBuffersDisabledWh using aggregatedSmallBuffersEnabledTest = AggregatedSmallBuffersTestTemplate<1>; +TEST_F(aggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledWhenAllocatingMainStorageThenMakeDeviceBufferLockable) { + ASSERT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled()); + ASSERT_NE(poolAllocator->mainStorage, nullptr); + ASSERT_NE(mockMemoryManager->lastAllocationProperties, nullptr); + EXPECT_TRUE(mockMemoryManager->lastAllocationProperties->makeDeviceBufferLockable); +} + TEST_F(aggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndSizeLargerThanThresholdWhenBufferCreateCalledThenDoNotUsePool) { ASSERT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled()); ASSERT_NE(poolAllocator->mainStorage, nullptr); @@ -235,9 +244,9 @@ TEST_F(aggregatedSmallBuffersEnabledTest, givenAggregatedSmallBuffersEnabledAndS } } -using aggregatedSmallBuffersEnabledTestDoNotRunSetup = AggregatedSmallBuffersTestTemplate<1, true>; +using aggregatedSmallBuffersEnabledTestFailPoolInit = AggregatedSmallBuffersTestTemplate<1, true>; -TEST_F(aggregatedSmallBuffersEnabledTestDoNotRunSetup, givenAggregatedSmallBuffersEnabledAndSizeEqualToThresholdWhenBufferCreateCalledButPoolCreateFailedThenDoNotUsePool) { +TEST_F(aggregatedSmallBuffersEnabledTestFailPoolInit, givenAggregatedSmallBuffersEnabledAndSizeEqualToThresholdWhenBufferCreateCalledButPoolCreateFailedThenDoNotUsePool) { ASSERT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled()); ASSERT_EQ(poolAllocator->mainStorage, nullptr); std::unique_ptr buffer(Buffer::create(context.get(), flags, size, hostPtr, retVal)); @@ -247,6 +256,19 @@ TEST_F(aggregatedSmallBuffersEnabledTestDoNotRunSetup, givenAggregatedSmallBuffe EXPECT_EQ(poolAllocator->mainStorage, nullptr); } +using aggregatedSmallBuffersEnabledTestDoNotRunSetup = AggregatedSmallBuffersTestTemplate<1, false, false>; + +TEST_F(aggregatedSmallBuffersEnabledTestDoNotRunSetup, givenAggregatedSmallBuffersEnabledWhenPoolInitializedThenPerformanceHintsNotProvided) { + testing::internal::CaptureStdout(); + DebugManager.flags.PrintDriverDiagnostics.set(1); + setUpImpl(); + ASSERT_TRUE(poolAllocator->isAggregatedSmallBuffersEnabled()); + ASSERT_NE(poolAllocator->mainStorage, nullptr); + ASSERT_NE(context->driverDiagnostics, nullptr); + std::string output = testing::internal::GetCapturedStdout(); + EXPECT_EQ(0u, output.size()); +} + template class AggregatedSmallBuffersApiTestTemplate : public ::testing::Test { void SetUp() override { diff --git a/shared/source/memory_manager/allocation_properties.h b/shared/source/memory_manager/allocation_properties.h index b41e90ca9a..d507db9e12 100644 --- a/shared/source/memory_manager/allocation_properties.h +++ b/shared/source/memory_manager/allocation_properties.h @@ -48,6 +48,7 @@ struct AllocationProperties { OsContext *osContext = nullptr; bool useMmapObject = true; uint32_t cacheRegion = 0; + bool makeDeviceBufferLockable = false; AllocationProperties(uint32_t rootDeviceIndex, size_t size, AllocationType allocationType, DeviceBitfield subDevicesBitfieldParam) diff --git a/shared/source/memory_manager/definitions/storage_info.cpp b/shared/source/memory_manager/definitions/storage_info.cpp index 29f2555563..4353180870 100644 --- a/shared/source/memory_manager/definitions/storage_info.cpp +++ b/shared/source/memory_manager/definitions/storage_info.cpp @@ -35,7 +35,7 @@ StorageInfo MemoryManager::createStorageInfoFromProperties(const AllocationPrope StorageInfo storageInfo{preferredTile, allTilesValue}; storageInfo.subDeviceBitfield = properties.subDevicesBitfield; - storageInfo.isLockable = GraphicsAllocation::isLockable(properties.allocationType); + storageInfo.isLockable = GraphicsAllocation::isLockable(properties.allocationType) || (properties.makeDeviceBufferLockable && properties.allocationType == AllocationType::BUFFER); storageInfo.cpuVisibleSegment = GraphicsAllocation::isCpuAccessRequired(properties.allocationType); AppResourceHelper::copyResourceTagStr(storageInfo.resourceTag, properties.allocationType, diff --git a/shared/test/common/mocks/mock_memory_manager.cpp b/shared/test/common/mocks/mock_memory_manager.cpp index 325033da80..e993c666b5 100644 --- a/shared/test/common/mocks/mock_memory_manager.cpp +++ b/shared/test/common/mocks/mock_memory_manager.cpp @@ -56,6 +56,7 @@ GraphicsAllocation *MockMemoryManager::allocateGraphicsMemoryWithProperties(cons } GraphicsAllocation *MockMemoryManager::allocateGraphicsMemoryWithProperties(const AllocationProperties &properties, const void *ptr) { + lastAllocationProperties.reset(new AllocationProperties(properties)); if (returnFakeAllocation) { return new GraphicsAllocation(properties.rootDeviceIndex, properties.allocationType, reinterpret_cast(dummyAddress), reinterpret_cast(ptr), properties.size, 0, MemoryPool::System4KBPages, maxOsContextCount); } diff --git a/shared/test/common/mocks/mock_memory_manager.h b/shared/test/common/mocks/mock_memory_manager.h index 35358073ce..420891804b 100644 --- a/shared/test/common/mocks/mock_memory_manager.h +++ b/shared/test/common/mocks/mock_memory_manager.h @@ -261,6 +261,7 @@ class MockMemoryManager : public MemoryManagerCreate { MemAdviseFlags memAdviseFlags{}; MemoryManager::AllocationStatus populateOsHandlesResult = MemoryManager::AllocationStatus::Success; GraphicsAllocation *allocateGraphicsMemoryForNonSvmHostPtrResult = nullptr; + std::unique_ptr lastAllocationProperties = nullptr; }; class MockAllocSysMemAgnosticMemoryManager : public OsAgnosticMemoryManager { diff --git a/shared/test/unit_test/memory_manager/storage_info_tests.cpp b/shared/test/unit_test/memory_manager/storage_info_tests.cpp index 9d7c3b7855..06b1c45572 100644 --- a/shared/test/unit_test/memory_manager/storage_info_tests.cpp +++ b/shared/test/unit_test/memory_manager/storage_info_tests.cpp @@ -472,6 +472,25 @@ TEST_F(MultiDeviceStorageInfoTest, givenGraphicsAllocationThatIsLockableWhenCrea } } +TEST_F(MultiDeviceStorageInfoTest, givenAllocationTypeBufferWhenCreatingStorageInfoThenIsLockableFlagIsSetCorrectly) { + AllocationProperties properties{mockRootDeviceIndex, false, 1u, AllocationType::BUFFER, false, singleTileMask}; + { + properties.makeDeviceBufferLockable = false; + auto storageInfo = memoryManager->createStorageInfoFromProperties(properties); + EXPECT_FALSE(storageInfo.isLockable); + } + { + properties.makeDeviceBufferLockable = true; + auto storageInfo = memoryManager->createStorageInfoFromProperties(properties); + EXPECT_TRUE(storageInfo.isLockable); + } + { + properties.allocationType = AllocationType::IMAGE; + auto storageInfo = memoryManager->createStorageInfoFromProperties(properties); + EXPECT_FALSE(storageInfo.isLockable); + } +} + TEST_F(MultiDeviceStorageInfoTest, givenGpuTimestampAllocationWhenUsingSingleTileDeviceThenExpectRegularAllocationStorageInfo) { AllocationProperties properties{mockRootDeviceIndex, false,