feature: implement pool allocator for gpuTimestampDeviceBuffer

The patch applies to Level Zero.
Only allocations < 2MB will be fetched from the pool.
Allocations are shared and reused within a given device.

Additionally, I added a new debug flag to control the allocator:
EnableTimestampPoolAllocator

Related-To: NEO-12287
Signed-off-by: Fabian Zwoliński <fabian.zwolinski@intel.com>
This commit is contained in:
Fabian Zwoliński
2025-04-01 19:52:25 +00:00
committed by Compute-Runtime-Automation
parent 8836f6df0b
commit 7ef3880793
18 changed files with 724 additions and 51 deletions

View File

@@ -661,6 +661,7 @@ LogIndirectDetectionKernelDetails = 0
DirectSubmissionRelaxedOrderingCounterHeuristic = -1
DirectSubmissionRelaxedOrderingCounterHeuristicTreshold = -1
ClearStandaloneInOrderTimestampAllocation = -1
EnableTimestampPoolAllocator = -1
PipelinedEuThreadArbitration = -1
ExperimentalUSMAllocationReuseCleaner = -1
DummyPageBackingEnabled = 0

View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2019-2024 Intel Corporation
# Copyright (C) 2019-2025 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
@@ -26,6 +26,7 @@ target_sources(neo_shared_tests PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/spinlock_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/tag_allocator_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/timer_util_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/timestamp_pool_allocator_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/vec_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/wait_util_tests.cpp
${CMAKE_CURRENT_SOURCE_DIR}/isa_pool_allocator_tests.cpp

View File

@@ -0,0 +1,166 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/test/common/fixtures/device_fixture.h"
#include "shared/test/common/mocks/mock_device.h"
#include "shared/test/common/mocks/mock_memory_manager.h"
#include "shared/test/common/mocks/mock_product_helper.h"
#include "shared/test/common/test_macros/test.h"
#include "gtest/gtest.h"
using namespace NEO;
using TimestampPoolAllocatorTest = Test<DeviceFixture>;
namespace {
void verifySharedTimestampAllocation(const SharedTimestampAllocation *sharedAllocation,
size_t expectedOffset,
size_t expectedSize) {
ASSERT_NE(nullptr, sharedAllocation);
EXPECT_NE(nullptr, sharedAllocation->getGraphicsAllocation());
EXPECT_EQ(expectedOffset, sharedAllocation->getOffset());
EXPECT_EQ(expectedSize, sharedAllocation->getSize());
}
} // namespace
TEST_F(TimestampPoolAllocatorTest, givenTimestampPoolAllocatorWhenNoAllocationsThenCreateNewAllocation) {
auto &timestampAllocator = pDevice->getDeviceTimestampPoolAllocator();
constexpr size_t requestAllocationSize = MemoryConstants::pageSize;
auto allocation = timestampAllocator.requestGraphicsAllocationForTimestamp(requestAllocationSize);
verifySharedTimestampAllocation(allocation, 0ul, requestAllocationSize);
EXPECT_EQ(AllocationType::gpuTimestampDeviceBuffer,
allocation->getGraphicsAllocation()->getAllocationType());
timestampAllocator.freeSharedTimestampAllocation(allocation);
}
TEST_F(TimestampPoolAllocatorTest, givenTimestampPoolAllocatorWhenAllocationsExistThenReuseAllocation) {
auto &timestampAllocator = pDevice->getDeviceTimestampPoolAllocator();
constexpr size_t requestAllocationSize = MemoryConstants::pageSize;
auto allocation = timestampAllocator.requestGraphicsAllocationForTimestamp(requestAllocationSize);
verifySharedTimestampAllocation(allocation, 0ul, requestAllocationSize);
auto allocationSize = allocation->getGraphicsAllocation()->getUnderlyingBufferSize();
auto numOfSharedAllocations = allocationSize / requestAllocationSize;
// Perform requests until allocation is full
for (auto i = 1u; i < numOfSharedAllocations; i++) {
auto tempSharedAllocation = timestampAllocator.requestGraphicsAllocationForTimestamp(requestAllocationSize);
verifySharedTimestampAllocation(tempSharedAllocation, requestAllocationSize * i, requestAllocationSize);
EXPECT_EQ(allocation->getGraphicsAllocation(), tempSharedAllocation->getGraphicsAllocation());
timestampAllocator.freeSharedTimestampAllocation(tempSharedAllocation);
}
// Verify that draining freed chunks is correct and allocation can be reused
auto newAllocation = timestampAllocator.requestGraphicsAllocationForTimestamp(requestAllocationSize);
verifySharedTimestampAllocation(newAllocation, requestAllocationSize, requestAllocationSize);
EXPECT_EQ(allocation->getGraphicsAllocation(), newAllocation->getGraphicsAllocation());
timestampAllocator.freeSharedTimestampAllocation(newAllocation);
timestampAllocator.freeSharedTimestampAllocation(allocation);
}
TEST_F(TimestampPoolAllocatorTest, givenTimestampPoolAllocatorWhenPoolIsFullThenCreateNewPool) {
// This test verifies that:
// 1. First two allocations of size=poolSize/2 come from the same pool
// 2. When pool becomes full (after two allocations), a new pool is created
// 3. Third allocation comes from the new pool (different GraphicsAllocation)
auto &timestampAllocator = pDevice->getDeviceTimestampPoolAllocator();
// Request half of pool size to ensure exactly 2 allocations fit in one pool
size_t requestAllocationSize = timestampAllocator.getDefaultPoolSize() / 2;
// First allocation - should come from first pool
auto allocation1 = timestampAllocator.requestGraphicsAllocationForTimestamp(requestAllocationSize);
verifySharedTimestampAllocation(allocation1, 0, requestAllocationSize);
// Second allocation - should come from first pool but with offset
auto allocation2 = timestampAllocator.requestGraphicsAllocationForTimestamp(requestAllocationSize);
verifySharedTimestampAllocation(allocation2, requestAllocationSize, requestAllocationSize);
EXPECT_EQ(allocation1->getGraphicsAllocation(), allocation2->getGraphicsAllocation());
// Third allocation - should create new pool because first one is full
auto allocation3 = timestampAllocator.requestGraphicsAllocationForTimestamp(requestAllocationSize);
verifySharedTimestampAllocation(allocation3, 0, requestAllocationSize);
EXPECT_NE(allocation1->getGraphicsAllocation(), allocation3->getGraphicsAllocation());
timestampAllocator.freeSharedTimestampAllocation(allocation1);
timestampAllocator.freeSharedTimestampAllocation(allocation2);
timestampAllocator.freeSharedTimestampAllocation(allocation3);
}
TEST_F(TimestampPoolAllocatorTest, givenTimestampPoolAllocatorWhenRequestExceedsMaxSizeThenReturnNull) {
auto &timestampAllocator = pDevice->getDeviceTimestampPoolAllocator();
constexpr size_t requestAllocationSize = 3 * MemoryConstants::megaByte; // Larger than maxAllocationSize
auto allocation = timestampAllocator.requestGraphicsAllocationForTimestamp(requestAllocationSize);
EXPECT_EQ(nullptr, allocation);
}
TEST_F(TimestampPoolAllocatorTest, whenCheckingIsEnabledWithDifferentSettingsThenReturnsExpectedValue) {
auto mockProductHelper = new MockProductHelper;
pDevice->getRootDeviceEnvironmentRef().productHelper.reset(mockProductHelper);
auto &timestampAllocator = pDevice->getDeviceTimestampPoolAllocator();
{
debugManager.flags.EnableTimestampPoolAllocator.set(0);
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = true;
EXPECT_FALSE(timestampAllocator.isEnabled());
}
{
debugManager.flags.EnableTimestampPoolAllocator.set(-1);
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = false;
EXPECT_FALSE(timestampAllocator.isEnabled());
}
{
debugManager.flags.EnableTimestampPoolAllocator.set(1);
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = false;
EXPECT_TRUE(timestampAllocator.isEnabled());
}
{
debugManager.flags.EnableTimestampPoolAllocator.set(-1);
mockProductHelper->is2MBLocalMemAlignmentEnabledResult = true;
EXPECT_TRUE(timestampAllocator.isEnabled());
}
}
TEST_F(TimestampPoolAllocatorTest, givenTimestampPoolAllocatorWhenPoolSizeAlignmentRequestedThenReturnsAlignedSize) {
auto &timestampAllocator = pDevice->getDeviceTimestampPoolAllocator();
constexpr size_t requestAllocationSize = MemoryConstants::pageSize;
auto allocation = timestampAllocator.requestGraphicsAllocationForTimestamp(requestAllocationSize);
ASSERT_NE(nullptr, allocation);
auto allocationSize = allocation->getGraphicsAllocation()->getUnderlyingBufferSize();
EXPECT_EQ(0u, allocationSize % MemoryConstants::pageSize2M);
timestampAllocator.freeSharedTimestampAllocation(allocation);
}
TEST_F(TimestampPoolAllocatorTest, givenFailingMemoryManagerWhenRequestingAllocationThenReturnNull) {
auto &timestampAllocator = pDevice->getDeviceTimestampPoolAllocator();
auto memoryManager = static_cast<MockMemoryManager *>(pDevice->getMemoryManager());
memoryManager->isMockHostMemoryManager = true;
memoryManager->forceFailureInPrimaryAllocation = true;
size_t requestAllocationSize = timestampAllocator.getDefaultPoolSize() / 2;
auto allocation = timestampAllocator.requestGraphicsAllocationForTimestamp(requestAllocationSize);
EXPECT_EQ(nullptr, allocation);
if (allocation) {
timestampAllocator.freeSharedTimestampAllocation(allocation);
}
}