feature: debug flag to disable walker split during copy operations

Related-To: NEO-12607

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski
2024-09-11 15:08:44 +00:00
committed by Compute-Runtime-Automation
parent 8d3ddbac31
commit 487b02a2ac
4 changed files with 34 additions and 0 deletions

View File

@@ -1514,6 +1514,13 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
kernelCounter += rightSize > 0 ? 1 : 0;
}
if (NEO::debugManager.flags.ForceNonWalkerSplitMemoryCopy.get() == 1) {
leftSize = size;
middleSizeBytes = 0;
rightSize = 0;
kernelCounter = 1;
}
bool waitForImplicitInOrderDependency = !isCopyOnlyEnabled || inOrderCopyOnlySignalingAllowed;
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, relaxedOrderingDispatch, false, waitForImplicitInOrderDependency, false);

View File

@@ -4659,6 +4659,31 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingKernelSplitWithout
alignedFree(alignedPtr);
}
HWTEST2_F(InOrderCmdListTests, givenDebugFlagSetWhenKernelSplitIsExpectedThenDontSplit, IsAtLeastXeHpCore) {
debugManager.flags.ForceNonWalkerSplitMemoryCopy.set(1);
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
auto immCmdList = createImmCmdList<gfxCoreFamily>();
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
const size_t ptrBaseSize = 128;
const size_t offset = 1;
auto alignedPtr = alignedMalloc(ptrBaseSize, MemoryConstants::cacheLineSize);
auto unalignedPtr = ptrOffset(alignedPtr, offset);
immCmdList->appendMemoryCopy(unalignedPtr, unalignedPtr, ptrBaseSize - offset, nullptr, 0, nullptr, false, false);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(cmdList, cmdStream->getCpuBase(), cmdStream->getUsed()));
auto walkers = NEO::UnitTestHelper<FamilyType>::findAllWalkerTypeCmds(cmdList.begin(), cmdList.end());
EXPECT_EQ(1u, walkers.size());
alignedFree(alignedPtr);
}
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingKernelSplitWithEventThenSignalCounter, IsAtLeastXeHpCore) {
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;

View File

@@ -436,6 +436,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionRelaxedOrderingQueueSizeLimit, -
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionRelaxedOrderingMinNumberOfClients, -1, "-1: default, >0: Enables RelaxedOrdering mode only if specified number of clients is assigned to given CSR.")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionMonitorFenceInputPolicy, -1, "-1: default, 0: stalling command flag, 1: explicit monitor fence flag. Selects policy to dispatch monitor fence upon input flag, either for every stalling command or explicit motor fence dispatch")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionPrintSemaphoreUsage, -1, "-1: default, 0: disabled, 1: enabled. If set, print DirectSubmission semaphore programming and unlocking")
DECLARE_DEBUG_VARIABLE(int32_t, ForceNonWalkerSplitMemoryCopy, -1, "-1: default, 0: disabled, 1: enabled. If set, memory copy will be executed as single byte copy Walker without performance optimizations")
DECLARE_DEBUG_VARIABLE(bool, DirectSubmissionPrintBuffers, false, "Print address of submitted command buffers")
DECLARE_DEBUG_VARIABLE(int32_t, WaitForPagingFenceInController, -1, "Instead of waiting for paging fence on user thread, program additional semaphore which will be signaled by direct submission controller when paging fence reaches required value -1: default, 0 - disable, 1 - enable.")

View File

@@ -625,4 +625,5 @@ WaitForPagingFenceInController = -1
DirectSubmissionPrintSemaphoreUsage = -1
ForceNonCoherentModeForTimestamps = 0
ExperimentalUSMAllocationReuseVersion = -1
ForceNonWalkerSplitMemoryCopy = -1
# Please don't edit below this line