Add support for global timestamp write API

Change-Id: I709783839a60478a62415c4d87a6b3a01ad43636
Signed-off-by: Aravind Gopalakrishnan <Aravind.Gopalakrishnan@intel.com>
This commit is contained in:
Aravind Gopalakrishnan
2020-07-16 12:17:29 -07:00
committed by sys_ocldev
parent 6e20dfafab
commit 4e16e574e9
13 changed files with 274 additions and 1 deletions

View File

@ -8,6 +8,8 @@
#include "level_zero/core/source/cmdlist/cmdlist.h"
#include <level_zero/ze_api.h>
#include "third_party/level_zero/ze_api_ext.h"
extern "C" {
__zedllexport ze_result_t __zecall
@ -44,4 +46,14 @@ zeCommandListReset(
return L0::CommandList::fromHandle(hCommandList)->reset();
}
ZE_APIEXPORT ze_result_t ZE_APICALL
zeCommandListAppendWriteGlobalTimestampExt(
ze_command_list_handle_t hCommandList,
uint64_t *dstptr,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) {
return L0::CommandList::fromHandle(hCommandList)->appendWriteGlobalTimestamp(dstptr, hSignalEvent, numWaitEvents, phWaitEvents);
}
} // extern "C"

View File

@ -98,6 +98,8 @@ struct CommandList : _ze_command_list_handle_t {
virtual ze_result_t appendMemoryPrefetch(const void *ptr, size_t count) = 0;
virtual ze_result_t appendSignalEvent(ze_event_handle_t hEvent) = 0;
virtual ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent) = 0;
virtual ze_result_t appendWriteGlobalTimestamp(uint64_t *dstptr, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0;
virtual ze_result_t reserveSpace(size_t size, void **ptr) = 0;
virtual ze_result_t reset() = 0;

View File

@ -119,6 +119,8 @@ struct CommandListCoreFamily : CommandListImp {
ze_result_t appendSignalEvent(ze_event_handle_t hEvent) override;
ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent) override;
ze_result_t appendWriteGlobalTimestamp(uint64_t *dstptr, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override;
ze_result_t reserveSpace(size_t size, void **ptr) override;
ze_result_t reset() override;
ze_result_t executeCommandListImmediate(bool performMigration) override;

View File

@ -1399,6 +1399,39 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfiling(ze_event_hand
}
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(
uint64_t *dstptr, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using POST_SYNC_OPERATION = typename GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION;
if (numWaitEvents > 0) {
if (phWaitEvents) {
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(numWaitEvents, phWaitEvents);
} else {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
}
NEO::PipeControlArgs args(false);
NEO::MemorySynchronizationCommands<GfxFamily>::addPipeControlAndProgramPostSyncOperation(
*commandContainer.getCommandStream(),
POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_TIMESTAMP,
reinterpret_cast<uint64_t>(dstptr),
0,
commandContainer.getDevice()->getHardwareInfo(),
args);
if (hSignalEvent) {
CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(hSignalEvent);
}
return ZE_RESULT_SUCCESS;
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::reserveSpace(size_t size, void **ptr) {
auto availableSpace = commandContainer.getCommandStream()->getAvailableSpace();

View File

@ -65,6 +65,9 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent) override;
ze_result_t appendWriteGlobalTimestamp(uint64_t *dstptr, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override;
ze_result_t appendImageCopyFromMemory(ze_image_handle_t hDstImage,
const void *srcPtr,
const ze_image_region_t *pDstRegion,

View File

@ -131,6 +131,17 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendWaitOnEvents(ui
return ret;
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendWriteGlobalTimestamp(
uint64_t *dstptr, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) {
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(dstptr, hSignalEvent, numWaitEvents, phWaitEvents);
if (ret == ZE_RESULT_SUCCESS) {
executeCommandListImmediate(true);
}
return ret;
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyFromMemory(
ze_image_handle_t hDstImage,

View File

@ -150,6 +150,8 @@ struct Mock<CommandList> : public CommandList {
size_t pattern_size, size_t size, ze_event_handle_t hEvent));
MOCK_METHOD1(appendSignalEvent, ze_result_t(ze_event_handle_t hEvent));
MOCK_METHOD2(appendWaitOnEvents, ze_result_t(uint32_t numEvents, ze_event_handle_t *phEvent));
MOCK_METHOD4(appendWriteGlobalTimestamp, ze_result_t(uint64_t *dstptr, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents));
MOCK_METHOD2(reserveSpace, ze_result_t(size_t size, void **ptr));
MOCK_METHOD0(reset, ze_result_t());
MOCK_METHOD0(resetParameters, ze_result_t());

View File

@ -326,7 +326,7 @@ HWTEST_F(CommandListCreate, givenCommandListWithCopyOnlyWhenAppendSignalEventThe
EXPECT_NE(cmdList.end(), itor);
}
HWTEST_F(CommandListCreate, givenCommandListyWhenAppendSignalEventThePipeControlIsProgrammed) {
HWTEST_F(CommandListCreate, givenCommandListWhenAppendSignalEventThePipeControlIsProgrammed) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, false));
auto &commandContainer = commandList->commandContainer;

View File

@ -17,6 +17,7 @@
#include "level_zero/core/source/image/image_hw.h"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h"
#include "level_zero/core/test/unit_tests/mocks/mock_event.h"
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
@ -162,6 +163,63 @@ HWTEST2_F(CommandListCreate, givenCommandListAnd3DWhbufferenMemoryCopyRegionCall
EXPECT_GT(cmdList.appendMemoryCopyKernel3dCalledTimes, 0u);
}
HWTEST2_F(CommandListCreate, givenCommandListWhenAppendWriteGlobalTimestampCalledThenPipeControlWithTimestampWriteEncoded, Platforms) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, false));
auto &commandContainer = commandList->commandContainer;
uint64_t timestampAddress = 0x12345678555500;
uint32_t timestampAddressLow = (uint32_t)(timestampAddress & 0xFFFFFFFF);
uint32_t timestampAddressHigh = (uint32_t)(timestampAddress >> 32);
uint64_t *dstptr = reinterpret_cast<uint64_t *>(timestampAddress);
commandList->appendWriteGlobalTimestamp(dstptr, nullptr, 0, nullptr);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itorPC = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
EXPECT_NE(0u, itorPC.size());
bool postSyncFound = false;
for (auto it : itorPC) {
auto cmd = genCmdCast<PIPE_CONTROL *>(*it);
if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_TIMESTAMP) {
EXPECT_TRUE(cmd->getCommandStreamerStallEnable());
EXPECT_FALSE(cmd->getDcFlushEnable());
EXPECT_EQ(cmd->getAddressHigh(), timestampAddressHigh);
EXPECT_EQ(cmd->getAddress(), timestampAddressLow);
postSyncFound = true;
}
}
EXPECT_TRUE(postSyncFound);
}
HWTEST2_F(CommandListCreate, givenImmediateCommandListWhenAppendWriteGlobalTimestampReturnsSuccess, Platforms) {
Mock<CommandQueue> cmdQueue;
const ze_command_queue_desc_t desc = {};
uint64_t timestampAddress = 0x12345678555500;
uint64_t *dstptr = reinterpret_cast<uint64_t *>(timestampAddress);
auto commandList = std::make_unique<WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>>();
ASSERT_NE(nullptr, commandList);
bool ret = commandList->initialize(device, false);
ASSERT_TRUE(ret);
commandList->device = device;
commandList->cmdQImmediate = &cmdQueue;
commandList->cmdListType = CommandList::CommandListType::TYPE_IMMEDIATE;
commandList->cmdQImmediateDesc = &desc;
EXPECT_CALL(cmdQueue, executeCommandLists).Times(1).WillRepeatedly(::testing::Return(ZE_RESULT_SUCCESS));
EXPECT_CALL(cmdQueue, synchronize).Times(1).WillRepeatedly(::testing::Return(ZE_RESULT_SUCCESS));
auto result = commandList->appendWriteGlobalTimestamp(dstptr, nullptr, 0, nullptr);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
commandList->cmdQImmediate = nullptr;
}
using AppendMemoryCopy = CommandListCreate;
template <GFXCORE_FAMILY gfxCoreFamily>

View File

@ -11,6 +11,8 @@
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
#include "third_party/level_zero/ze_api_ext.h"
namespace L0 {
namespace ult {
@ -72,6 +74,15 @@ TEST(zeCommandListAppendWaitOnEvent, whenCalledThenRedirectedToObject) {
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
}
TEST(zeCommandListAppendWriteGlobalTimestampExt, whenCalledThenRedirectedToObject) {
Mock<CommandList> commandList;
EXPECT_CALL(commandList, appendWriteGlobalTimestamp(nullptr, nullptr, 0, nullptr)).Times(1);
auto result = zeCommandListAppendWriteGlobalTimestampExt(commandList.toHandle(), nullptr, nullptr, 0, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
}
TEST(zeCommandListAppendLaunchKernel, whenCalledThenRedirectedToObject) {
Mock<CommandList> commandList;
Mock<::L0::Kernel> kernel;

View File

@ -141,5 +141,51 @@ HWTEST_F(CommandListAppendSignalEvent, givenEventWithScopeFlagDeviceWhenAppendin
ASSERT_TRUE(postSyncFound);
}
using Platforms = IsAtLeastProduct<IGFX_SKYLAKE>;
HWTEST2_F(CommandListAppendSignalEvent, givenCommandListWhenAppendWriteGlobalTimestampCalledWithSignalEventThenPipeControlForTimestampAndSignalEncoded, Platforms) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
auto &commandContainer = commandList->commandContainer;
uint64_t timestampAddress = 0x12345678555500;
uint32_t timestampAddressLow = (uint32_t)(timestampAddress & 0xFFFFFFFF);
uint32_t timestampAddressHigh = (uint32_t)(timestampAddress >> 32);
uint64_t *dstptr = reinterpret_cast<uint64_t *>(timestampAddress);
commandList->appendWriteGlobalTimestamp(dstptr, event->toHandle(), 0, nullptr);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
auto itorPC = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), itorPC);
auto cmd = genCmdCast<PIPE_CONTROL *>(*itorPC);
while (cmd->getPostSyncOperation() != POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_TIMESTAMP) {
itorPC++;
itorPC = find<PIPE_CONTROL *>(itorPC, cmdList.end());
EXPECT_NE(cmdList.end(), itorPC);
cmd = genCmdCast<PIPE_CONTROL *>(*itorPC);
}
EXPECT_TRUE(cmd->getCommandStreamerStallEnable());
EXPECT_FALSE(cmd->getDcFlushEnable());
EXPECT_EQ(cmd->getAddressHigh(), timestampAddressHigh);
EXPECT_EQ(cmd->getAddress(), timestampAddressLow);
itorPC++;
itorPC = find<PIPE_CONTROL *>(itorPC, cmdList.end());
EXPECT_NE(cmdList.end(), itorPC);
cmd = genCmdCast<PIPE_CONTROL *>(*itorPC);
while (cmd->getPostSyncOperation() != POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
itorPC++;
itorPC = find<PIPE_CONTROL *>(itorPC, cmdList.end());
EXPECT_NE(cmdList.end(), itorPC);
cmd = genCmdCast<PIPE_CONTROL *>(*itorPC);
}
EXPECT_EQ(cmd->getImmediateData(), Event::STATE_SIGNALED);
EXPECT_TRUE(cmd->getCommandStreamerStallEnable());
EXPECT_FALSE(cmd->getDcFlushEnable());
}
} // namespace ult
} // namespace L0

View File

@ -130,5 +130,56 @@ HWTEST_F(CommandListAppendWaitOnEvent, givenEventWithWaitScopeFlagDeviceWhenAppe
}
}
using Platforms = IsAtLeastProduct<IGFX_SKYLAKE>;
HWTEST2_F(CommandListAppendWaitOnEvent, givenCommandListWhenAppendWriteGlobalTimestampCalledWithWaitOnEventsThenSemaphoreWaitAndPipeControlForTimestampEncoded, Platforms) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
uint64_t timestampAddress = 0x12345678555500;
uint32_t timestampAddressLow = (uint32_t)(timestampAddress & 0xFFFFFFFF);
uint32_t timestampAddressHigh = (uint32_t)(timestampAddress >> 32);
uint64_t *dstptr = reinterpret_cast<uint64_t *>(timestampAddress);
ze_event_handle_t hEventHandle = event->toHandle();
commandList->appendWriteGlobalTimestamp(dstptr, nullptr, 1, &hEventHandle);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0),
commandList->commandContainer.getCommandStream()->getUsed()));
auto itor = find<MI_SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itor);
auto cmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*itor);
EXPECT_EQ(cmd->getCompareOperation(),
MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD);
EXPECT_EQ(static_cast<uint32_t>(-1), cmd->getSemaphoreDataDword());
auto addressSpace = device->getHwInfo().capabilityTable.gpuAddressSpace;
EXPECT_EQ(cmd->getSemaphoreGraphicsAddress() & addressSpace, event->getGpuAddress() & addressSpace);
EXPECT_EQ(cmd->getWaitMode(),
MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE);
itor++;
auto itorPC = findAll<PIPE_CONTROL *>(itor, cmdList.end());
ASSERT_NE(0u, itorPC.size());
bool postSyncFound = false;
for (auto it : itorPC) {
auto cmdPC = genCmdCast<PIPE_CONTROL *>(*it);
if (cmdPC->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_TIMESTAMP) {
EXPECT_TRUE(cmdPC->getCommandStreamerStallEnable());
EXPECT_FALSE(cmdPC->getDcFlushEnable());
EXPECT_EQ(cmdPC->getAddressHigh(), timestampAddressHigh);
EXPECT_EQ(cmdPC->getAddress(), timestampAddressLow);
postSyncFound = true;
}
}
ASSERT_TRUE(postSyncFound);
}
} // namespace ult
} // namespace L0

View File

@ -241,6 +241,48 @@ zeDeviceGetCommandQueueGroupProperties(
///< command queue group properties
);
///////////////////////////////////////////////////////////////////////////////
/// @brief Appends a memory write of the device's global timestamp value into a
/// command list.
///
/// @details
/// - The application must ensure the events are accessible by the device on
/// which the command list was created.
/// - The timestamp frequency can be queried from
/// ::ze_device_properties_t.timerResolution.
/// - The number of valid bits in the timestamp value can be queried from
/// ::ze_device_properties_t.timestampValidBits.
/// - The application must ensure the memory pointed to by dstptr is
/// accessible by the device on which the command list was created.
/// - The application must ensure the command list and events were created,
/// and the memory was allocated, on the same context.
/// - The application must **not** call this function from simultaneous
/// threads with the same command list handle.
/// - The implementation of this function should be lock-free.
///
/// @returns
/// - ::ZE_RESULT_SUCCESS
/// - ::ZE_RESULT_ERROR_UNINITIALIZED
/// - ::ZE_RESULT_ERROR_DEVICE_LOST
/// - ::ZE_RESULT_ERROR_INVALID_NULL_HANDLE
/// + `nullptr == hCommandList`
/// - ::ZE_RESULT_ERROR_INVALID_NULL_POINTER
/// + `nullptr == dstptr`
/// - ::ZE_RESULT_ERROR_INVALID_SYNCHRONIZATION_OBJECT
/// - ::ZE_RESULT_ERROR_INVALID_SIZE
/// + `(nullptr == phWaitEvents) && (0 < numWaitEvents)`
ZE_APIEXPORT ze_result_t ZE_APICALL
zeCommandListAppendWriteGlobalTimestampExt(
ze_command_list_handle_t hCommandList, ///< [in] handle of the command list
uint64_t *dstptr, ///< [in,out] pointer to memory where timestamp value will be written; must
///< be 8byte-aligned.
ze_event_handle_t hSignalEvent, ///< [in][optional] handle of the event to signal on completion
uint32_t numWaitEvents, ///< [in][optional] number of events to wait on before executing query;
///< must be 0 if `nullptr == phWaitEvents`
ze_event_handle_t *phWaitEvents ///< [in][optional][range(0, numWaitEvents)] handle of the events to wait
///< on before executing query
);
} //extern C
#endif // _ZE_API_EXT_H