From 487b02a2ac65e03a7246a9c21453b7dca5fcaa1d Mon Sep 17 00:00:00 2001 From: Bartosz Dunajski Date: Wed, 11 Sep 2024 15:08:44 +0000 Subject: [PATCH] feature: debug flag to disable walker split during copy operations Related-To: NEO-12607 Signed-off-by: Bartosz Dunajski --- level_zero/core/source/cmdlist/cmdlist_hw.inl | 7 ++++++ .../sources/cmdlist/test_in_order_cmdlist.cpp | 25 +++++++++++++++++++ .../debug_settings/debug_variables_base.inl | 1 + shared/test/common/test_files/igdrcl.config | 1 + 4 files changed, 34 insertions(+) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index c30bf80581..321d3efa22 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -1514,6 +1514,13 @@ ze_result_t CommandListCoreFamily::appendMemoryCopy(void *dstptr, kernelCounter += rightSize > 0 ? 1 : 0; } + if (NEO::debugManager.flags.ForceNonWalkerSplitMemoryCopy.get() == 1) { + leftSize = size; + middleSizeBytes = 0; + rightSize = 0; + kernelCounter = 1; + } + bool waitForImplicitInOrderDependency = !isCopyOnlyEnabled || inOrderCopyOnlySignalingAllowed; ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, relaxedOrderingDispatch, false, waitForImplicitInOrderDependency, false); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist.cpp index 883aab6e4c..e9b877b526 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist.cpp @@ -4659,6 +4659,31 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingKernelSplitWithout alignedFree(alignedPtr); } +HWTEST2_F(InOrderCmdListTests, givenDebugFlagSetWhenKernelSplitIsExpectedThenDontSplit, IsAtLeastXeHpCore) { + debugManager.flags.ForceNonWalkerSplitMemoryCopy.set(1); + using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + + auto immCmdList = createImmCmdList(); + + auto cmdStream = immCmdList->getCmdContainer().getCommandStream(); + + const size_t ptrBaseSize = 128; + const size_t offset = 1; + auto alignedPtr = alignedMalloc(ptrBaseSize, MemoryConstants::cacheLineSize); + auto unalignedPtr = ptrOffset(alignedPtr, offset); + + immCmdList->appendMemoryCopy(unalignedPtr, unalignedPtr, ptrBaseSize - offset, nullptr, 0, nullptr, false, false); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(cmdList, cmdStream->getCpuBase(), cmdStream->getUsed())); + + auto walkers = NEO::UnitTestHelper::findAllWalkerTypeCmds(cmdList.begin(), cmdList.end()); + EXPECT_EQ(1u, walkers.size()); + + alignedFree(alignedPtr); +} + HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingKernelSplitWithEventThenSignalCounter, IsAtLeastXeHpCore) { using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index 85caa406fa..2474fe0181 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -436,6 +436,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionRelaxedOrderingQueueSizeLimit, - DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionRelaxedOrderingMinNumberOfClients, -1, "-1: default, >0: Enables RelaxedOrdering mode only if specified number of clients is assigned to given CSR.") DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionMonitorFenceInputPolicy, -1, "-1: default, 0: stalling command flag, 1: explicit monitor fence flag. Selects policy to dispatch monitor fence upon input flag, either for every stalling command or explicit motor fence dispatch") DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionPrintSemaphoreUsage, -1, "-1: default, 0: disabled, 1: enabled. If set, print DirectSubmission semaphore programming and unlocking") +DECLARE_DEBUG_VARIABLE(int32_t, ForceNonWalkerSplitMemoryCopy, -1, "-1: default, 0: disabled, 1: enabled. If set, memory copy will be executed as single byte copy Walker without performance optimizations") DECLARE_DEBUG_VARIABLE(bool, DirectSubmissionPrintBuffers, false, "Print address of submitted command buffers") DECLARE_DEBUG_VARIABLE(int32_t, WaitForPagingFenceInController, -1, "Instead of waiting for paging fence on user thread, program additional semaphore which will be signaled by direct submission controller when paging fence reaches required value -1: default, 0 - disable, 1 - enable.") diff --git a/shared/test/common/test_files/igdrcl.config b/shared/test/common/test_files/igdrcl.config index f12fbaa96f..437676f716 100644 --- a/shared/test/common/test_files/igdrcl.config +++ b/shared/test/common/test_files/igdrcl.config @@ -625,4 +625,5 @@ WaitForPagingFenceInController = -1 DirectSubmissionPrintSemaphoreUsage = -1 ForceNonCoherentModeForTimestamps = 0 ExperimentalUSMAllocationReuseVersion = -1 +ForceNonWalkerSplitMemoryCopy = -1 # Please don't edit below this line