diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 752242174f..446994cec8 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -297,6 +297,7 @@ ze_result_t CommandListCoreFamily::appendEventReset(ze_event_hand uint64_t baseAddr = event->getGpuAddress(this->device); uint32_t packetsToReset = event->getPacketsInUse(); + bool appendPipeControlWithPostSync = false; NEO::Device *neoDevice = device->getNEODevice(); uint32_t callId = 0; @@ -323,17 +324,31 @@ ze_result_t CommandListCoreFamily::appendEventReset(ze_event_hand NEO::MiFlushArgs args; args.commandWithPostSync = true; NEO::EncodeMiFlushDW::programMiFlushDw(*commandContainer.getCommandStream(), - event->getGpuAddress(this->device), + baseAddr, Event::STATE_CLEARED, args, hwInfo); } else { - NEO::PipeControlArgs args; - args.dcFlushEnable = NEO::MemorySynchronizationCommands::getDcFlushEnable(event->signalScope, hwInfo); - size_t estimateSize = NEO::MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo) * packetsToReset; - if (this->partitionCount > 1) { - estimateSize += estimateBufferSizeMultiTileBarrier(hwInfo); + bool applyScope = event->signalScope; + uint32_t packetsToResetUsingSdi = packetsToReset; + if (applyScope || event->isEventTimestampFlagSet()) { + UNRECOVERABLE_IF(packetsToReset == 0); + packetsToResetUsingSdi = packetsToReset - 1; + appendPipeControlWithPostSync = true; } - for (uint32_t i = 0u; i < packetsToReset; i++) { + for (uint32_t i = 0u; i < packetsToResetUsingSdi; i++) { + NEO::EncodeStoreMemory::programStoreDataImm( + *commandContainer.getCommandStream(), + baseAddr, + Event::STATE_CLEARED, + 0u, + false, + false); + baseAddr += event->getSinglePacketSize(); + } + + if (appendPipeControlWithPostSync) { + NEO::PipeControlArgs args; + args.dcFlushEnable = NEO::MemorySynchronizationCommands::getDcFlushEnable(event->signalScope, hwInfo); NEO::MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( *commandContainer.getCommandStream(), POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, @@ -341,8 +356,8 @@ ze_result_t CommandListCoreFamily::appendEventReset(ze_event_hand Event::STATE_CLEARED, hwInfo, args); - baseAddr += event->getSinglePacketSize(); } + if (this->partitionCount > 1) { appendMultiTileBarrier(*neoDevice); } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_event_reset.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_event_reset.cpp index 0e56ccc5b8..b3fcb14813 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_event_reset.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_event_reset.cpp @@ -21,9 +21,10 @@ namespace ult { using CommandListAppendEventReset = Test; -HWTEST_F(CommandListAppendEventReset, givenCmdlistWhenResetEventAppendedThenPostSyncWriteIsGenerated) { +HWTEST_F(CommandListAppendEventReset, givenCmdlistWhenResetEventAppendedThenStoreDataImmIsGenerated) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; + using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed(); @@ -33,6 +34,50 @@ HWTEST_F(CommandListAppendEventReset, givenCmdlistWhenResetEventAppendedThenPost auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, + ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), + usedSpaceAfter)); + + auto gpuAddress = event->getGpuAddress(device); + if (event->isUsingContextEndOffset()) { + gpuAddress += event->getContextEndOffset(); + } + auto itorSdi = findAll(cmdList.begin(), cmdList.end()); + uint32_t sdiFound = 0; + ASSERT_NE(0u, itorSdi.size()); + for (auto it : itorSdi) { + auto cmd = genCmdCast(*it); + EXPECT_EQ(gpuAddress, cmd->getAddress()); + gpuAddress += event->getSinglePacketSize(); + sdiFound++; + } + EXPECT_NE(0u, sdiFound); +} + +HWTEST_F(CommandListAppendEventReset, givenCmdlistWhenResetEventWithTimeStampIsAppendedThenStoreDataImmAndPostSyncWriteIsGenerated) { + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; + using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 1; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + ze_result_t result = ZE_RESULT_SUCCESS; + auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result)); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); + auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed(); + + result = commandList->appendEventReset(event->toHandle()); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); + ASSERT_GT(usedSpaceAfter, usedSpaceBefore); + GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), @@ -45,6 +90,17 @@ HWTEST_F(CommandListAppendEventReset, givenCmdlistWhenResetEventAppendedThenPost gpuAddress += event->getContextEndOffset(); } + auto itorSdi = findAll(cmdList.begin(), cmdList.end()); + uint32_t sdiFound = 0; + ASSERT_NE(0u, itorSdi.size()); + for (auto it : itorSdi) { + auto cmd = genCmdCast(*it); + EXPECT_EQ(gpuAddress, cmd->getAddress()); + gpuAddress += event->getSinglePacketSize(); + sdiFound++; + } + EXPECT_EQ(EventPacketsCount::eventPackets - 1, sdiFound); + uint32_t postSyncFound = 0; for (auto it : itorPC) { auto cmd = genCmdCast(*it); @@ -98,11 +154,16 @@ HWTEST_F(CommandListAppendEventReset, givenCopyOnlyCmdlistWhenResetEventAppended auto itorPC = findAll(cmdList.begin(), cmdList.end()); ASSERT_NE(0u, itorPC.size()); bool postSyncFound = false; + + auto gpuAddress = event->getGpuAddress(device); + if (event->isUsingContextEndOffset()) { + gpuAddress += event->getContextEndOffset(); + } + for (auto it : itorPC) { auto cmd = genCmdCast(*it); if (cmd->getPostSyncOperation() == MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA_QWORD) { EXPECT_EQ(cmd->getImmediateData(), Event::STATE_INITIAL); - auto gpuAddress = event->getGpuAddress(device); EXPECT_EQ(cmd->getDestinationAddress(), gpuAddress); postSyncFound = true; } @@ -164,6 +225,7 @@ HWTEST2_F(CommandListAppendEventReset, givenTimestampEventUsedInResetThenPipeCon auto contextOffset = event->getContextEndOffset(); auto baseAddr = event->getGpuAddress(device); auto gpuAddress = ptrOffset(baseAddr, contextOffset); + gpuAddress += ((EventPacketsCount::eventPackets - 1) * event->getSinglePacketSize()); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( @@ -179,11 +241,11 @@ HWTEST2_F(CommandListAppendEventReset, givenTimestampEventUsedInResetThenPipeCon EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); EXPECT_EQ(gpuAddress, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*cmd)); EXPECT_FALSE(cmd->getDcFlushEnable()); - postSyncFound++; gpuAddress += event->getSinglePacketSize(); + postSyncFound++; } } - ASSERT_EQ(EventPacketsCount::eventPackets, postSyncFound); + ASSERT_EQ(1u, postSyncFound); } HWTEST2_F(CommandListAppendEventReset, givenEventWithHostScopeUsedInResetThenPipeControlWithDcFlushAppended, IsAtLeastSkl) { @@ -231,13 +293,14 @@ HWTEST2_F(CommandListAppendEventReset, givenEventWithHostScopeUsedInResetThenPip } HWTEST2_F(CommandListAppendEventReset, - givenMultiTileCommandListWhenAppendingMultiPacketEventThenExpectSameNumberOfResetPostSyncAndMultiBarrierCommands, IsAtLeastXeHpCore) { + givenMultiTileCommandListWhenAppendingMultiPacketEventThenExpectCorrectNumberOfStoreDataImmAndResetPostSyncAndMultiBarrierCommands, IsAtLeastXeHpCore) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; using MI_ATOMIC = typename FamilyType::MI_ATOMIC; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily>(); ASSERT_NE(nullptr, commandList); @@ -264,7 +327,8 @@ HWTEST2_F(CommandListAppendEventReset, auto gpuAddress = event->getGpuAddress(device) + event->getContextEndOffset(); auto &hwInfo = device->getNEODevice()->getHardwareInfo(); - size_t expectedSize = NEO::MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo) * packets + + size_t expectedSize = NEO::MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo) + + ((packets - 1) * sizeof(MI_STORE_DATA_IMM)) + commandList->estimateBufferSizeMultiTileBarrier(hwInfo); size_t usedSize = cmdStream->getUsed(); EXPECT_EQ(expectedSize, usedSize); @@ -275,6 +339,11 @@ HWTEST2_F(CommandListAppendEventReset, cmdStream->getCpuBase(), usedSize)); + auto itorSdi = find(cmdList.begin(), cmdList.end()); + auto cmd = genCmdCast(*itorSdi); + EXPECT_EQ(gpuAddress, cmd->getAddress()); + gpuAddress += event->getSinglePacketSize(); + auto pipeControlList = findAll(cmdList.begin(), cmdList.end()); ASSERT_NE(0u, pipeControlList.size()); uint32_t postSyncFound = 0; @@ -291,7 +360,7 @@ HWTEST2_F(CommandListAppendEventReset, postSyncPipeControlItor = it; } } - EXPECT_EQ(packets, postSyncFound); + EXPECT_EQ(1u, postSyncFound); postSyncPipeControlItor++; ASSERT_NE(cmdList.end(), postSyncPipeControlItor);