diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index b3b5e771cf..e95a2c7626 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -1306,6 +1306,53 @@ ze_result_t CommandListCoreFamily::appendWaitOnEvents(uint32_t nu return ZE_RESULT_SUCCESS; } +template +void CommandListCoreFamily::appendEventForProfiling(ze_event_handle_t hEvent, bool beforeWalker) { + if (!hEvent) { + return; + } + if (isCopyOnly()) { + appendEventForProfilingCopyCommand(hEvent, beforeWalker); + } else { + auto event = Event::fromHandle(hEvent); + + if (!event->isTimestampEvent) { + return; + } + + commandContainer.addToResidencyContainer(&event->getAllocation()); + auto baseAddr = event->getGpuAddress(); + + if (beforeWalker) { + auto contextStartAddr = baseAddr; + auto globalStartAddr = baseAddr + offsetof(KernelTimestampEvent, globalStart); + + NEO::EncodeStoreMMIO::encode(*commandContainer.getCommandStream(), + REG_GLOBAL_TIMESTAMP_LDW, globalStartAddr); + NEO::EncodeStoreMMIO::encode(*commandContainer.getCommandStream(), + GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextStartAddr); + + } else { + auto contextEndAddr = baseAddr + offsetof(KernelTimestampEvent, contextEnd); + auto globalEndAddr = baseAddr + offsetof(KernelTimestampEvent, globalEnd); + NEO::PipeControlArgs args; + args.dcFlushEnable = false; + + NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); + + NEO::EncodeStoreMMIO::encode(*commandContainer.getCommandStream(), + REG_GLOBAL_TIMESTAMP_LDW, globalEndAddr); + NEO::EncodeStoreMMIO::encode(*commandContainer.getCommandStream(), + GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextEndAddr); + + args.dcFlushEnable = (event->signalScope == ZE_EVENT_SCOPE_FLAG_NONE) ? false : true; + if (args.dcFlushEnable) { + NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); + } + } + } +} + template ze_result_t CommandListCoreFamily::reserveSpace(size_t size, void **ptr) { auto availableSpace = commandContainer.getCommandStream()->getAvailableSpace(); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl index db1cc36400..8035f12ac1 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl @@ -75,49 +75,6 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(z return ZE_RESULT_SUCCESS; } -template -void CommandListCoreFamily::appendEventForProfiling(ze_event_handle_t hEvent, bool beforeWalker) { - if (!hEvent) { - return; - } - if (isCopyOnly()) { - appendEventForProfilingCopyCommand(hEvent, beforeWalker); - } else { - auto event = Event::fromHandle(hEvent); - - if (!event->isTimestampEvent) { - return; - } - - commandContainer.addToResidencyContainer(&event->getAllocation()); - auto baseAddr = event->getGpuAddress(); - - if (beforeWalker) { - auto contextStartAddr = baseAddr; - auto globalStartAddr = baseAddr + offsetof(KernelTimestampEvent, globalStart); - - NEO::EncodeStoreMMIO::encode(*commandContainer.getCommandStream(), REG_GLOBAL_TIMESTAMP_LDW, globalStartAddr); - NEO::EncodeStoreMMIO::encode(*commandContainer.getCommandStream(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextStartAddr); - - } else { - auto contextEndAddr = baseAddr + offsetof(KernelTimestampEvent, contextEnd); - auto globalEndAddr = baseAddr + offsetof(KernelTimestampEvent, globalEnd); - NEO::PipeControlArgs args; - args.dcFlushEnable = false; - - NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); - - NEO::EncodeStoreMMIO::encode(*commandContainer.getCommandStream(), REG_GLOBAL_TIMESTAMP_LDW, globalEndAddr); - NEO::EncodeStoreMMIO::encode(*commandContainer.getCommandStream(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextEndAddr); - - args.dcFlushEnable = (event->signalScope == ZE_EVENT_SCOPE_FLAG_NONE) ? false : true; - if (args.dcFlushEnable) { - NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); - } - } - } -} - template bool CommandListCoreFamily::useMemCopyToBlitFill(size_t patternSize) { return patternSize > sizeof(uint32_t); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist.cpp index 69147044cf..2aeeb0508a 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist.cpp @@ -584,6 +584,7 @@ HWTEST_F(CommandListCreate, givenCommandListyWhenAppendSignalEventThePipeControl EXPECT_NE(cmdList.end(), itor); } + HWTEST_F(CommandListCreate, givenCommandListWithCopyOnlyWhenAppendWaitEventsWithDcFlushThenMiFlushDWIsProgrammed) { using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; std::unique_ptr commandList(CommandList::create(productFamily, device, true)); @@ -618,6 +619,58 @@ HWTEST_F(CommandListCreate, givenCommandListyWhenAppendWaitEventsWithDcFlushTheP EXPECT_NE(cmdList.end(), itor); } +HWTEST2_F(CommandListCreate, givenCommandListWhenTimestampPassedToMemoryCopyThenAppendProfilingCalledOnceBeforeAndAfterCommand, Platforms) { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; + using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM; + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + + MockAppendMemoryCopy commandList; + commandList.initialize(device, false); + void *srcPtr = reinterpret_cast(0x1234); + void *dstPtr = reinterpret_cast(0x2345); + + ze_event_pool_desc_t eventPoolDesc = { + ZE_EVENT_POOL_DESC_VERSION_CURRENT, + ZE_EVENT_POOL_FLAG_TIMESTAMP, + 1}; + ze_event_desc_t eventDesc = { + ZE_EVENT_DESC_VERSION_CURRENT, + 0, + ZE_EVENT_SCOPE_FLAG_NONE, + ZE_EVENT_SCOPE_FLAG_NONE}; + auto eventPool = std::unique_ptr(L0::EventPool::create(driverHandle.get(), 0, nullptr, &eventPoolDesc)); + auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); + + commandList.appendMemoryCopy(dstPtr, srcPtr, 0x100, event->toHandle(), 0, nullptr); + EXPECT_GT(commandList.appendMemoryCopyKernelWithGACalledTimes, 0u); + EXPECT_EQ(commandList.appendMemoryCopyBlitCalledTimes, 0u); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandList.commandContainer.getCommandStream()->getCpuBase(), 0), + commandList.commandContainer.getCommandStream()->getUsed())); + auto itor = find(cmdList.begin(), cmdList.end()); + EXPECT_NE(cmdList.end(), itor); + auto cmd = genCmdCast(*itor); + EXPECT_EQ(cmd->getRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW); + itor++; + EXPECT_NE(cmdList.end(), itor); + cmd = genCmdCast(*itor); + EXPECT_EQ(cmd->getRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW); + + itor = find(itor, cmdList.end()); + EXPECT_NE(cmdList.end(), itor); + + itor = find(itor, cmdList.end()); + EXPECT_NE(cmdList.end(), itor); + cmd = genCmdCast(*itor); + EXPECT_EQ(cmd->getRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW); + itor++; + EXPECT_NE(cmdList.end(), itor); + cmd = genCmdCast(*itor); + EXPECT_EQ(cmd->getRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW); +} + template class MockCommandListForMemFill : public WhiteBox<::L0::CommandListCoreFamily> { public: