From 00e899734278f5a964cedf90c964360b5f2ecbe9 Mon Sep 17 00:00:00 2001 From: "Dunajski, Bartosz" Date: Thu, 11 May 2023 10:48:40 +0000 Subject: [PATCH] feature: Experimental support of immediate cmd list in-order execution Dont signal sync allocation from Walker in Kernel split path Related-To: LOCI-4332 Signed-off-by: Dunajski, Bartosz --- .../cmdlist/cmdlist_hw_xehp_and_later.inl | 2 +- .../test_cmdlist_append_launch_kernel_3.cpp | 38 +++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl index dd6a9f706d..f775b3067b 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl @@ -277,7 +277,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K this->dcFlushSupport // dcFlushEnable }; - if (this->inOrderExecutionEnabled) { + if (this->inOrderExecutionEnabled && !launchParams.isKernelSplitOperation) { DEBUG_BREAK_IF(isTimestampEvent); dispatchKernelArgs.isTimestampEvent = false; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp index c48098189a..668b555d51 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp @@ -9,6 +9,7 @@ #include "shared/source/command_container/encode_surface_state.h" #include "shared/source/helpers/api_specific_config.h" #include "shared/source/helpers/bindless_heaps_helper.h" +#include "shared/source/helpers/constants.h" #include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/helpers/preamble.h" #include "shared/source/helpers/register_offsets.h" @@ -964,6 +965,43 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingWalkerThenSignalSy EXPECT_EQ(ZE_RESULT_SUCCESS, events[0]->hostSynchronize(1)); } +HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingKernelSplitThenDontSignalFromWalker, IsAtLeastXeHpCore) { + using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER; + using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA; + + auto immCmdList = createImmCmdList(); + + auto cmdStream = immCmdList->getCmdContainer().getCommandStream(); + + const size_t ptrBaseSize = 128; + const size_t offset = 1; + auto alignedPtr = alignedMalloc(ptrBaseSize, MemoryConstants::cacheLineSize); + auto unalignedPtr = ptrOffset(alignedPtr, offset); + + immCmdList->appendMemoryCopy(unalignedPtr, unalignedPtr, ptrBaseSize - offset, nullptr, 0, nullptr, false); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, cmdStream->getCpuBase(), cmdStream->getUsed())); + + auto walkerItor = find(cmdList.begin(), cmdList.end()); + + uint32_t walkersFound = 0; + while (cmdList.end() != walkerItor) { + walkersFound++; + + auto walkerCmd = genCmdCast(*walkerItor); + auto &postSync = walkerCmd->getPostSync(); + + EXPECT_EQ(POSTSYNC_DATA::OPERATION_NO_WRITE, postSync.getOperation()); + + walkerItor = find(++walkerItor, cmdList.end()); + } + + EXPECT_TRUE(walkersFound > 1); + + alignedFree(alignedPtr); +} + HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendWaitOnEventsThenSignalSyncAllocation, IsAtLeastXeHpCore) { using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;