Debug flag to force ComputeWalker->PostSync flushing bits

Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2022-11-14 18:21:44 +00:00
committed by Compute-Runtime-Automation
parent 3281dea1fe
commit 62db166cee
5 changed files with 59 additions and 1 deletions

View File

@ -279,6 +279,11 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
EncodeDispatchKernel<Family>::adjustTimestampPacket(walkerCmd, hwInfo);
}
if (DebugManager.flags.ForceComputeWalkerPostSyncFlush.get() == 1) {
postSync.setDataportPipelineFlush(true);
EncodeDispatchKernel<Family>::adjustTimestampPacket(walkerCmd, hwInfo);
}
walkerCmd.setPredicateEnable(args.isPredicate);
auto threadGroupCount = walkerCmd.getThreadGroupIdXDimension() * walkerCmd.getThreadGroupIdYDimension() * walkerCmd.getThreadGroupIdZDimension();

View File

@ -223,6 +223,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, CopyHostPtrOnCpu, -1, "-1: default, 0: disable,
DECLARE_DEBUG_VARIABLE(int32_t, ForceZeDeviceCanAccessPerReturnValue, -1, "-1: default, 0: zeDeviceCanAccessPeer always return false 1: zeDeviceCanAccessPeer always return true")
DECLARE_DEBUG_VARIABLE(int32_t, AdjustThreadGroupDispatchSize, -1, "-1: default, 0: do not adjust thread group dispatch size 1: adjust thread group dispatch size (PVC)")
DECLARE_DEBUG_VARIABLE(int32_t, ForceNonblockingExecbufferCalls, -1, "-1: default, 0: make execbuffer call blocking, 1: make execbuffer call nonblocking. Supported only in prelim i915 kernels.")
DECLARE_DEBUG_VARIABLE(int32_t, ForceComputeWalkerPostSyncFlush, -1, "-1: default, 0: disable 1: Enable all flushing bits in ComputeWalker->PostSync")
/*LOGGING FLAGS*/
DECLARE_DEBUG_VARIABLE(int32_t, PrintDriverDiagnostics, -1, "prints driver diagnostics messages to standard output, value corresponds to hint level")

View File

@ -486,4 +486,5 @@ ForceZeDeviceCanAccessPerReturnValue = -1
AdjustThreadGroupDispatchSize = -1
ForceNonblockingExecbufferCalls = -1
UseHighAlignmentForHeapExtended = -1
ForceAutoGrfCompilationMode = -1
ForceAutoGrfCompilationMode = -1
ForceComputeWalkerPostSyncFlush = -1

View File

@ -58,6 +58,28 @@ HWTEST_F(CommandEncodeStatesTest, givenDispatchInterfaceWhenDispatchKernelThenWa
ASSERT_NE(itorPC, commands.end());
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenDebugFlagSetWhenProgrammingWalkerThenSetFlushingBits) {
DebugManagerStateRestore restore;
DebugManager.flags.ForceComputeWalkerPostSyncFlush.set(1);
uint32_t dims[] = {2, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs, nullptr);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
auto itor = find<WALKER_TYPE *>(commands.begin(), commands.end());
ASSERT_NE(itor, commands.end());
auto walkerCmd = genCmdCast<WALKER_TYPE *>(*itor);
EXPECT_TRUE(walkerCmd->getPostSync().getDataportPipelineFlush());
}
using CommandEncodeStatesUncachedMocsTests = Test<CommandEncodeStatesFixture>;
HWTEST_F(CommandEncodeStatesUncachedMocsTests, whenEncodingDispatchKernelWithUncachedMocsAndDirtyHeapsThenCorrectMocsIsSet) {

View File

@ -6,7 +6,11 @@
*/
#include "shared/source/command_container/command_encoder.h"
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/test_macros/hw_test.h"
#include "shared/test/unit_test/fixtures/command_container_fixture.h"
#include "shared/test/unit_test/mocks/mock_dispatch_kernel_encoder_interface.h"
using namespace NEO;
@ -196,4 +200,29 @@ HWTEST2_F(EncodeConditionalBatchBufferStartTest, whenProgrammingConditionalRegRe
validateBaseProgramming<FamilyType>(buffer, compareOperation, startAddress, indirect, compareReg1, compareReg2);
}
}
}
using CommandEncodeStatesXeHpcAndLaterTests = Test<CommandEncodeStatesFixture>;
HWTEST2_F(CommandEncodeStatesXeHpcAndLaterTests, givenDebugFlagSetWhenProgrammingWalkerThenSetFlushingBits, IsAtLeastXeHpcCore) {
DebugManagerStateRestore restore;
DebugManager.flags.ForceComputeWalkerPostSyncFlush.set(1);
uint32_t dims[] = {2, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs, nullptr);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
auto itor = find<WALKER_TYPE *>(commands.begin(), commands.end());
ASSERT_NE(itor, commands.end());
auto walkerCmd = genCmdCast<WALKER_TYPE *>(*itor);
EXPECT_TRUE(walkerCmd->getPostSync().getDataportPipelineFlush());
EXPECT_TRUE(walkerCmd->getPostSync().getDataportSubsliceCacheFlush());
}