diff --git a/level_zero/api/core/ze_cmdlist.cpp b/level_zero/api/core/ze_cmdlist.cpp index 1980ba43a2..caf8002f6d 100644 --- a/level_zero/api/core/ze_cmdlist.cpp +++ b/level_zero/api/core/ze_cmdlist.cpp @@ -8,6 +8,8 @@ #include "level_zero/core/source/cmdlist/cmdlist.h" #include +#include "third_party/level_zero/ze_api_ext.h" + extern "C" { __zedllexport ze_result_t __zecall @@ -44,4 +46,14 @@ zeCommandListReset( return L0::CommandList::fromHandle(hCommandList)->reset(); } +ZE_APIEXPORT ze_result_t ZE_APICALL +zeCommandListAppendWriteGlobalTimestampExt( + ze_command_list_handle_t hCommandList, + uint64_t *dstptr, + ze_event_handle_t hSignalEvent, + uint32_t numWaitEvents, + ze_event_handle_t *phWaitEvents) { + return L0::CommandList::fromHandle(hCommandList)->appendWriteGlobalTimestamp(dstptr, hSignalEvent, numWaitEvents, phWaitEvents); +} + } // extern "C" diff --git a/level_zero/core/source/cmdlist/cmdlist.h b/level_zero/core/source/cmdlist/cmdlist.h index 264d488e8c..dff937a0a5 100644 --- a/level_zero/core/source/cmdlist/cmdlist.h +++ b/level_zero/core/source/cmdlist/cmdlist.h @@ -98,6 +98,8 @@ struct CommandList : _ze_command_list_handle_t { virtual ze_result_t appendMemoryPrefetch(const void *ptr, size_t count) = 0; virtual ze_result_t appendSignalEvent(ze_event_handle_t hEvent) = 0; virtual ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent) = 0; + virtual ze_result_t appendWriteGlobalTimestamp(uint64_t *dstptr, ze_event_handle_t hSignalEvent, + uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0; virtual ze_result_t reserveSpace(size_t size, void **ptr) = 0; virtual ze_result_t reset() = 0; diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.h b/level_zero/core/source/cmdlist/cmdlist_hw.h index cb8e941c58..e4368a5fce 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw.h @@ -119,6 +119,8 @@ struct CommandListCoreFamily : CommandListImp { ze_result_t appendSignalEvent(ze_event_handle_t hEvent) override; ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent) override; + ze_result_t appendWriteGlobalTimestamp(uint64_t *dstptr, ze_event_handle_t hSignalEvent, + uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t reserveSpace(size_t size, void **ptr) override; ze_result_t reset() override; ze_result_t executeCommandListImmediate(bool performMigration) override; diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index a66a3cd0b2..5191e99cfc 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -1399,6 +1399,39 @@ void CommandListCoreFamily::appendEventForProfiling(ze_event_hand } } +template +ze_result_t CommandListCoreFamily::appendWriteGlobalTimestamp( + uint64_t *dstptr, ze_event_handle_t hSignalEvent, + uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { + + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; + using POST_SYNC_OPERATION = typename GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION; + + if (numWaitEvents > 0) { + if (phWaitEvents) { + CommandListCoreFamily::appendWaitOnEvents(numWaitEvents, phWaitEvents); + } else { + return ZE_RESULT_ERROR_INVALID_ARGUMENT; + } + } + + NEO::PipeControlArgs args(false); + + NEO::MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( + *commandContainer.getCommandStream(), + POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_TIMESTAMP, + reinterpret_cast(dstptr), + 0, + commandContainer.getDevice()->getHardwareInfo(), + args); + + if (hSignalEvent) { + CommandListCoreFamily::appendSignalEvent(hSignalEvent); + } + + return ZE_RESULT_SUCCESS; +} + template ze_result_t CommandListCoreFamily::reserveSpace(size_t size, void **ptr) { auto availableSpace = commandContainer.getCommandStream()->getAvailableSpace(); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h index bd92d7e495..ac373e3c65 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h @@ -65,6 +65,9 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily::appendWaitOnEvents(ui return ret; } +template +ze_result_t CommandListCoreFamilyImmediate::appendWriteGlobalTimestamp( + uint64_t *dstptr, ze_event_handle_t hSignalEvent, + uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { + auto ret = CommandListCoreFamily::appendWriteGlobalTimestamp(dstptr, hSignalEvent, numWaitEvents, phWaitEvents); + if (ret == ZE_RESULT_SUCCESS) { + executeCommandListImmediate(true); + } + return ret; +} + template ze_result_t CommandListCoreFamilyImmediate::appendImageCopyFromMemory( ze_image_handle_t hDstImage, diff --git a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h index 45ca5dba2b..da95d71c54 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h +++ b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h @@ -150,6 +150,8 @@ struct Mock : public CommandList { size_t pattern_size, size_t size, ze_event_handle_t hEvent)); MOCK_METHOD1(appendSignalEvent, ze_result_t(ze_event_handle_t hEvent)); MOCK_METHOD2(appendWaitOnEvents, ze_result_t(uint32_t numEvents, ze_event_handle_t *phEvent)); + MOCK_METHOD4(appendWriteGlobalTimestamp, ze_result_t(uint64_t *dstptr, ze_event_handle_t hSignalEvent, + uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents)); MOCK_METHOD2(reserveSpace, ze_result_t(size_t size, void **ptr)); MOCK_METHOD0(reset, ze_result_t()); MOCK_METHOD0(resetParameters, ze_result_t()); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp index 13a83a4933..9b1211f4a9 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp @@ -326,7 +326,7 @@ HWTEST_F(CommandListCreate, givenCommandListWithCopyOnlyWhenAppendSignalEventThe EXPECT_NE(cmdList.end(), itor); } -HWTEST_F(CommandListCreate, givenCommandListyWhenAppendSignalEventThePipeControlIsProgrammed) { +HWTEST_F(CommandListCreate, givenCommandListWhenAppendSignalEventThePipeControlIsProgrammed) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; std::unique_ptr commandList(CommandList::create(productFamily, device, false)); auto &commandContainer = commandList->commandContainer; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_2.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_2.cpp index 629c11ed51..176ae22b84 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_2.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_2.cpp @@ -17,6 +17,7 @@ #include "level_zero/core/source/image/image_hw.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" +#include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h" #include "level_zero/core/test/unit_tests/mocks/mock_event.h" #include "level_zero/core/test/unit_tests/mocks/mock_kernel.h" @@ -162,6 +163,63 @@ HWTEST2_F(CommandListCreate, givenCommandListAnd3DWhbufferenMemoryCopyRegionCall EXPECT_GT(cmdList.appendMemoryCopyKernel3dCalledTimes, 0u); } +HWTEST2_F(CommandListCreate, givenCommandListWhenAppendWriteGlobalTimestampCalledThenPipeControlWithTimestampWriteEncoded, Platforms) { + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; + std::unique_ptr commandList(CommandList::create(productFamily, device, false)); + auto &commandContainer = commandList->commandContainer; + + uint64_t timestampAddress = 0x12345678555500; + uint32_t timestampAddressLow = (uint32_t)(timestampAddress & 0xFFFFFFFF); + uint32_t timestampAddressHigh = (uint32_t)(timestampAddress >> 32); + uint64_t *dstptr = reinterpret_cast(timestampAddress); + + commandList->appendWriteGlobalTimestamp(dstptr, nullptr, 0, nullptr); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); + + auto itorPC = findAll(cmdList.begin(), cmdList.end()); + EXPECT_NE(0u, itorPC.size()); + bool postSyncFound = false; + for (auto it : itorPC) { + auto cmd = genCmdCast(*it); + if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_TIMESTAMP) { + EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); + EXPECT_FALSE(cmd->getDcFlushEnable()); + EXPECT_EQ(cmd->getAddressHigh(), timestampAddressHigh); + EXPECT_EQ(cmd->getAddress(), timestampAddressLow); + postSyncFound = true; + } + } + EXPECT_TRUE(postSyncFound); +} + +HWTEST2_F(CommandListCreate, givenImmediateCommandListWhenAppendWriteGlobalTimestampReturnsSuccess, Platforms) { + Mock cmdQueue; + const ze_command_queue_desc_t desc = {}; + uint64_t timestampAddress = 0x12345678555500; + uint64_t *dstptr = reinterpret_cast(timestampAddress); + + auto commandList = std::make_unique>>(); + ASSERT_NE(nullptr, commandList); + bool ret = commandList->initialize(device, false); + ASSERT_TRUE(ret); + commandList->device = device; + commandList->cmdQImmediate = &cmdQueue; + commandList->cmdListType = CommandList::CommandListType::TYPE_IMMEDIATE; + commandList->cmdQImmediateDesc = &desc; + + EXPECT_CALL(cmdQueue, executeCommandLists).Times(1).WillRepeatedly(::testing::Return(ZE_RESULT_SUCCESS)); + EXPECT_CALL(cmdQueue, synchronize).Times(1).WillRepeatedly(::testing::Return(ZE_RESULT_SUCCESS)); + + auto result = commandList->appendWriteGlobalTimestamp(dstptr, nullptr, 0, nullptr); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + commandList->cmdQImmediate = nullptr; +} + using AppendMemoryCopy = CommandListCreate; template diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_api.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_api.cpp index 5d83cbcd15..de60135345 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_api.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_api.cpp @@ -11,6 +11,8 @@ #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_kernel.h" +#include "third_party/level_zero/ze_api_ext.h" + namespace L0 { namespace ult { @@ -72,6 +74,15 @@ TEST(zeCommandListAppendWaitOnEvent, whenCalledThenRedirectedToObject) { EXPECT_EQ(ZE_RESULT_SUCCESS, result); } +TEST(zeCommandListAppendWriteGlobalTimestampExt, whenCalledThenRedirectedToObject) { + Mock commandList; + + EXPECT_CALL(commandList, appendWriteGlobalTimestamp(nullptr, nullptr, 0, nullptr)).Times(1); + + auto result = zeCommandListAppendWriteGlobalTimestampExt(commandList.toHandle(), nullptr, nullptr, 0, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); +} + TEST(zeCommandListAppendLaunchKernel, whenCalledThenRedirectedToObject) { Mock commandList; Mock<::L0::Kernel> kernel; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp index 070049d5a6..fbc146b226 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp @@ -141,5 +141,51 @@ HWTEST_F(CommandListAppendSignalEvent, givenEventWithScopeFlagDeviceWhenAppendin ASSERT_TRUE(postSyncFound); } +using Platforms = IsAtLeastProduct; +HWTEST2_F(CommandListAppendSignalEvent, givenCommandListWhenAppendWriteGlobalTimestampCalledWithSignalEventThenPipeControlForTimestampAndSignalEncoded, Platforms) { + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; + auto &commandContainer = commandList->commandContainer; + + uint64_t timestampAddress = 0x12345678555500; + uint32_t timestampAddressLow = (uint32_t)(timestampAddress & 0xFFFFFFFF); + uint32_t timestampAddressHigh = (uint32_t)(timestampAddress >> 32); + uint64_t *dstptr = reinterpret_cast(timestampAddress); + + commandList->appendWriteGlobalTimestamp(dstptr, event->toHandle(), 0, nullptr); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); + + auto itorPC = find(cmdList.begin(), cmdList.end()); + EXPECT_NE(cmdList.end(), itorPC); + auto cmd = genCmdCast(*itorPC); + while (cmd->getPostSyncOperation() != POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_TIMESTAMP) { + itorPC++; + itorPC = find(itorPC, cmdList.end()); + EXPECT_NE(cmdList.end(), itorPC); + cmd = genCmdCast(*itorPC); + } + EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); + EXPECT_FALSE(cmd->getDcFlushEnable()); + EXPECT_EQ(cmd->getAddressHigh(), timestampAddressHigh); + EXPECT_EQ(cmd->getAddress(), timestampAddressLow); + + itorPC++; + itorPC = find(itorPC, cmdList.end()); + EXPECT_NE(cmdList.end(), itorPC); + cmd = genCmdCast(*itorPC); + while (cmd->getPostSyncOperation() != POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { + itorPC++; + itorPC = find(itorPC, cmdList.end()); + EXPECT_NE(cmdList.end(), itorPC); + cmd = genCmdCast(*itorPC); + } + EXPECT_EQ(cmd->getImmediateData(), Event::STATE_SIGNALED); + EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); + EXPECT_FALSE(cmd->getDcFlushEnable()); +} + } // namespace ult } // namespace L0 diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp index 7ee5513d3c..4e175feeb7 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp @@ -130,5 +130,56 @@ HWTEST_F(CommandListAppendWaitOnEvent, givenEventWithWaitScopeFlagDeviceWhenAppe } } +using Platforms = IsAtLeastProduct; +HWTEST2_F(CommandListAppendWaitOnEvent, givenCommandListWhenAppendWriteGlobalTimestampCalledWithWaitOnEventsThenSemaphoreWaitAndPipeControlForTimestampEncoded, Platforms) { + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; + + uint64_t timestampAddress = 0x12345678555500; + uint32_t timestampAddressLow = (uint32_t)(timestampAddress & 0xFFFFFFFF); + uint32_t timestampAddressHigh = (uint32_t)(timestampAddress >> 32); + uint64_t *dstptr = reinterpret_cast(timestampAddress); + ze_event_handle_t hEventHandle = event->toHandle(); + + commandList->appendWriteGlobalTimestamp(dstptr, nullptr, 1, &hEventHandle); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), + commandList->commandContainer.getCommandStream()->getUsed())); + + auto itor = find(cmdList.begin(), cmdList.end()); + ASSERT_NE(cmdList.end(), itor); + + auto cmd = genCmdCast(*itor); + EXPECT_EQ(cmd->getCompareOperation(), + MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD); + EXPECT_EQ(static_cast(-1), cmd->getSemaphoreDataDword()); + + auto addressSpace = device->getHwInfo().capabilityTable.gpuAddressSpace; + + EXPECT_EQ(cmd->getSemaphoreGraphicsAddress() & addressSpace, event->getGpuAddress() & addressSpace); + EXPECT_EQ(cmd->getWaitMode(), + MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE); + + itor++; + + auto itorPC = findAll(itor, cmdList.end()); + ASSERT_NE(0u, itorPC.size()); + bool postSyncFound = false; + for (auto it : itorPC) { + auto cmdPC = genCmdCast(*it); + if (cmdPC->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_TIMESTAMP) { + EXPECT_TRUE(cmdPC->getCommandStreamerStallEnable()); + EXPECT_FALSE(cmdPC->getDcFlushEnable()); + EXPECT_EQ(cmdPC->getAddressHigh(), timestampAddressHigh); + EXPECT_EQ(cmdPC->getAddress(), timestampAddressLow); + postSyncFound = true; + } + } + ASSERT_TRUE(postSyncFound); +} + } // namespace ult } // namespace L0 \ No newline at end of file diff --git a/third_party/level_zero/ze_api_ext.h b/third_party/level_zero/ze_api_ext.h index 79e4a89103..6737b1ec55 100644 --- a/third_party/level_zero/ze_api_ext.h +++ b/third_party/level_zero/ze_api_ext.h @@ -241,6 +241,48 @@ zeDeviceGetCommandQueueGroupProperties( ///< command queue group properties ); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Appends a memory write of the device's global timestamp value into a +/// command list. +/// +/// @details +/// - The application must ensure the events are accessible by the device on +/// which the command list was created. +/// - The timestamp frequency can be queried from +/// ::ze_device_properties_t.timerResolution. +/// - The number of valid bits in the timestamp value can be queried from +/// ::ze_device_properties_t.timestampValidBits. +/// - The application must ensure the memory pointed to by dstptr is +/// accessible by the device on which the command list was created. +/// - The application must ensure the command list and events were created, +/// and the memory was allocated, on the same context. +/// - The application must **not** call this function from simultaneous +/// threads with the same command list handle. +/// - The implementation of this function should be lock-free. +/// +/// @returns +/// - ::ZE_RESULT_SUCCESS +/// - ::ZE_RESULT_ERROR_UNINITIALIZED +/// - ::ZE_RESULT_ERROR_DEVICE_LOST +/// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `nullptr == hCommandList` +/// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER +/// + `nullptr == dstptr` +/// - ::ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT +/// - ::ZE_RESULT_ERROR_INVALID_SIZE +/// + `(nullptr == phWaitEvents) && (0 < numWaitEvents)` +ZE_APIEXPORT ze_result_t ZE_APICALL +zeCommandListAppendWriteGlobalTimestampExt( + ze_command_list_handle_t hCommandList, ///< [in] handle of the command list + uint64_t *dstptr, ///< [in,out] pointer to memory where timestamp value will be written; must + ///< be 8byte-aligned. + ze_event_handle_t hSignalEvent, ///< [in][optional] handle of the event to signal on completion + uint32_t numWaitEvents, ///< [in][optional] number of events to wait on before executing query; + ///< must be 0 if `nullptr == phWaitEvents` + ze_event_handle_t *phWaitEvents ///< [in][optional][range(0, numWaitEvents)] handle of the events to wait + ///< on before executing query +); + } //extern C #endif // _ZE_API_EXT_H