fix: Call RelaxedOrdering regs init before in-order dependencies

Related-To: LOCI-4332

Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2023-04-28 16:03:32 +00:00
committed by Compute-Runtime-Automation
parent 7b0283e810
commit 1dcab07300
2 changed files with 33 additions and 4 deletions

View File

@ -2024,6 +2024,10 @@ inline uint32_t CommandListCoreFamily<gfxCoreFamily>::getRegionOffsetForAppendMe
template <GFXCORE_FAMILY gfxCoreFamily>
inline ze_result_t CommandListCoreFamily<gfxCoreFamily>::addEventsToCmdList(uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingAllowed, bool trackDependencies) {
if (relaxedOrderingAllowed && (numWaitEvents > 0 || latestSentInOrderEvent)) {
NEO::RelaxedOrderingHelper::encodeRegistersBeforeDependencyCheckers<GfxFamily>(*commandContainer.getCommandStream());
}
if (latestSentInOrderEvent) {
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(1, &latestSentInOrderEvent, relaxedOrderingAllowed, trackDependencies);
}
@ -2107,10 +2111,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
}
}
if (relaxedOrderingAllowed) {
NEO::RelaxedOrderingHelper::encodeRegistersBeforeDependencyCheckers<GfxFamily>(*commandContainer.getCommandStream());
}
for (uint32_t i = 0; i < numEvents; i++) {
auto event = Event::fromHandle(phEvent[i]);
commandContainer.addToResidencyContainer(&event->getAllocation(this->device));

View File

@ -824,6 +824,35 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenSubmittingThenProgramSemaphor
ASSERT_NE(cmdList.end(), itor);
}
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenAddingRelaxedOrderingEventsThenConfigureRegistersFirst, IsAtLeastSkl) {
auto immCmdList = createImmCmdList<gfxCoreFamily>();
auto eventPool = createEvents(1);
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false);
auto offset = cmdStream->getUsed();
immCmdList->addEventsToCmdList(0, nullptr, true, true);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList,
ptrOffset(cmdStream->getCpuBase(), offset),
cmdStream->getUsed() - offset));
auto lrrCmd = genCmdCast<typename FamilyType::MI_LOAD_REGISTER_REG *>(*cmdList.begin());
ASSERT_NE(nullptr, lrrCmd);
EXPECT_EQ(CS_GPR_R4, lrrCmd->getSourceRegisterAddress());
EXPECT_EQ(CS_GPR_R0, lrrCmd->getDestinationRegisterAddress());
lrrCmd++;
EXPECT_EQ(CS_GPR_R4 + 4, lrrCmd->getSourceRegisterAddress());
EXPECT_EQ(CS_GPR_R0 + 4, lrrCmd->getDestinationRegisterAddress());
}
struct CommandListAppendLaunchKernelWithImplicitArgs : CommandListAppendLaunchKernel {
template <typename FamilyType>
uint64_t getIndirectHeapOffsetForImplicitArgsBuffer(const Mock<::L0::Kernel> &kernel) {