From 198762baa8966f835b616dfd3b4c79ed5edcd824 Mon Sep 17 00:00:00 2001 From: Lukasz Jobczyk Date: Thu, 26 Jun 2025 12:10:18 +0000 Subject: [PATCH] performance: Store pattern allocations on reset and sync Related-To: NEO-9729 Signed-off-by: Lukasz Jobczyk --- level_zero/core/source/cmdlist/cmdlist_hw.inl | 5 +++ .../source/cmdlist/cmdlist_hw_immediate.h | 1 + .../source/cmdlist/cmdlist_hw_immediate.inl | 5 +++ .../cmdlist/test_in_order_cmdlist_2.cpp | 40 +++++++++++++++++++ 4 files changed, 51 insertions(+) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index adc89f93c9..8158bbd6ec 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -112,6 +112,11 @@ void CommandListCoreFamily::postInitComputeSetup() { template ze_result_t CommandListCoreFamily::reset() { + for (auto &patternAlloc : this->patternAllocations) { + device->storeReusableAllocation(*patternAlloc); + } + this->patternAllocations.clear(); + removeDeallocationContainerData(); removeHostPtrAllocations(); removeMemoryPrefetchAllocations(); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h index 6604e3075c..09f0f88813 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h @@ -57,6 +57,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily::*)(NEO::LinearStream &, size_t, bool, bool, NEO::AppendOperations, bool); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl index da7e8ef923..a9c1cd1f73 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl @@ -1197,6 +1197,11 @@ ze_result_t CommandListCoreFamilyImmediate::hostSynchronize(uint6 if (inOrderExecInfo) { inOrderExecInfo->releaseNotUsedTempTimestampNodes(false); } + + for (auto &patternAlloc : this->patternAllocations) { + this->device->storeReusableAllocation(*patternAlloc); + } + this->patternAllocations.clear(); } bool hangDetected = status == ZE_RESULT_ERROR_DEVICE_LOST; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp index aec886c73d..4e6d74c9bb 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp @@ -1864,6 +1864,46 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL EXPECT_FALSE(regularCmdList->latestOperationRequiredNonWalkerInOrderCmdsChaining); } +HWTEST2_F(InOrderRegularCmdListTests, givenAppendMemoryFillWhenHostSynchronizeThenStoreFillAllocationsInReusableContainer, IsAtLeastXeCore) { + auto immCmdList = createImmCmdList(); + EXPECT_EQ(immCmdList->patternAllocations.size(), 0u); + EXPECT_TRUE(static_cast(immCmdList->device)->allocationsForReuse->peekIsEmpty()); + + constexpr size_t size = 128 * sizeof(uint32_t); + auto data = allocDeviceMem(size); + uint64_t pattern = 0u; + + immCmdList->appendMemoryFill(data, &pattern, sizeof(pattern), size, nullptr, 0, nullptr, copyParams); + EXPECT_EQ(immCmdList->patternAllocations.size(), 1u); + EXPECT_TRUE(static_cast(immCmdList->device)->allocationsForReuse->peekIsEmpty()); + + immCmdList->hostSynchronize(std::numeric_limits::max()); + EXPECT_EQ(immCmdList->patternAllocations.size(), 0u); + EXPECT_FALSE(static_cast(immCmdList->device)->allocationsForReuse->peekIsEmpty()); + + context->freeMem(data); +} + +HWTEST2_F(InOrderRegularCmdListTests, givenAppendMemoryFillWhenResetThenStoreFillAllocationsInReusableContainer, IsAtLeastXeCore) { + auto regularCmdList = createRegularCmdList(false); + EXPECT_EQ(regularCmdList->patternAllocations.size(), 0u); + EXPECT_TRUE(static_cast(regularCmdList->device)->allocationsForReuse->peekIsEmpty()); + + constexpr size_t size = 128 * sizeof(uint32_t); + auto data = allocDeviceMem(size); + uint64_t pattern = 0u; + + regularCmdList->appendMemoryFill(data, &pattern, sizeof(pattern), size, nullptr, 0, nullptr, copyParams); + EXPECT_EQ(regularCmdList->patternAllocations.size(), 1u); + EXPECT_TRUE(static_cast(regularCmdList->device)->allocationsForReuse->peekIsEmpty()); + + regularCmdList->reset(); + EXPECT_EQ(regularCmdList->patternAllocations.size(), 0u); + EXPECT_FALSE(static_cast(regularCmdList->device)->allocationsForReuse->peekIsEmpty()); + + context->freeMem(data); +} + HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdListThenUpdateCounterAllocation, IsAtLeastXeCore) { using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;