From 71ff85cf2c4174317f28485464815371a9ec14a4 Mon Sep 17 00:00:00 2001 From: Aravind Gopalakrishnan Date: Wed, 8 Apr 2020 12:44:14 -0700 Subject: [PATCH] Complete kernel prior to timestamp capture While at it, fix event flag check upon creation Change-Id: I2c57b0e20141fc38a4de695cae79bf2f14dd6cdd Signed-off-by: Aravind Gopalakrishnan --- .../core/source/cmdlist/cmdlist_hw_base.inl | 14 ++- level_zero/core/source/event/event.cpp | 25 ++--- level_zero/core/source/event/event.h | 7 +- .../test_cmdlist_append_launch_kernel.cpp | 89 +++++++++++++++ .../unit_tests/sources/event/CMakeLists.txt | 10 ++ .../unit_tests/sources/event/test_event.cpp | 105 ++++++++++++++++++ 6 files changed, 224 insertions(+), 26 deletions(-) create mode 100644 level_zero/core/test/unit_tests/sources/event/CMakeLists.txt create mode 100644 level_zero/core/test/unit_tests/sources/event/test_event.cpp diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl index 447978efb2..08a2b298cb 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl @@ -92,17 +92,12 @@ void CommandListCoreFamily::appendEventForProfiling(ze_event_hand commandContainer.addToResidencyContainer(&event->getAllocation()); if (beforeWalker) { - timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::GLOBAL_START_LOW); + timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::GLOBAL_START); NEO::EncodeStoreMMIO::encode(commandContainer, REG_GLOBAL_TIMESTAMP_LDW, timeStampAddress); - timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::GLOBAL_START_HIGH); - NEO::EncodeStoreMMIO::encode(commandContainer, REG_GLOBAL_TIMESTAMP_UN, timeStampAddress); - timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::CONTEXT_START); NEO::EncodeStoreMMIO::encode(commandContainer, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, timeStampAddress); } else { - timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::CONTEXT_END); - NEO::EncodeStoreMMIO::encode(commandContainer, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, timeStampAddress); timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::GLOBAL_END); bool dcFlushEnable = (event->signalScope == ZE_EVENT_SCOPE_FLAG_NONE) ? false : true; @@ -116,6 +111,13 @@ void CommandListCoreFamily::appendEventForProfiling(ze_event_hand 0llu, dcFlushEnable, device->getHwInfo()); + + timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::CONTEXT_END); + NEO::EncodeStoreMMIO::encode(commandContainer, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, timeStampAddress); + + if (dcFlushEnable) { + NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), true); + } } } } diff --git a/level_zero/core/source/event/event.cpp b/level_zero/core/source/event/event.cpp index e1f601e41f..faba5485a6 100644 --- a/level_zero/core/source/event/event.cpp +++ b/level_zero/core/source/event/event.cpp @@ -67,8 +67,8 @@ struct EventImp : public Event { EventPool *eventPool; protected: - ze_result_t hostEventSetValue(uint64_t eventValue); - ze_result_t hostEventSetValueTimestamps(uint64_t eventVal); + ze_result_t hostEventSetValue(uint32_t eventValue); + ze_result_t hostEventSetValueTimestamps(uint32_t eventVal); void makeAllocationResident(); }; @@ -81,7 +81,7 @@ struct EventPoolImp : public EventPool { } auto timestampMultiplier = 1; - if (flags == ZE_EVENT_POOL_FLAG_TIMESTAMP) { + if (flags & ZE_EVENT_POOL_FLAG_TIMESTAMP) { isEventPoolUsedForTimestamp = true; timestampMultiplier = numEventTimestampsToRead; } @@ -154,7 +154,7 @@ struct EventPoolImp : public EventPool { protected: const uint32_t eventSize = 16u; const uint32_t eventAlignment = MemoryConstants::cacheLineSize; - const int32_t numEventTimestampsToRead = 5u; + const int32_t numEventTimestampsToRead = 4u; }; Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *device) { @@ -206,7 +206,7 @@ void EventImp::makeAllocationResident() { } } -ze_result_t EventImp::hostEventSetValueTimestamps(uint64_t eventVal) { +ze_result_t EventImp::hostEventSetValueTimestamps(uint32_t eventVal) { for (uint32_t i = 0; i < this->eventPool->getNumEventTimestampsToRead(); i++) { auto baseAddr = reinterpret_cast(hostAddress); auto timeStampAddress = baseAddr + getOffsetOfEventTimestampRegister(i); @@ -224,7 +224,7 @@ ze_result_t EventImp::hostEventSetValueTimestamps(uint64_t eventVal) { return ZE_RESULT_SUCCESS; } -ze_result_t EventImp::hostEventSetValue(uint64_t eventVal) { +ze_result_t EventImp::hostEventSetValue(uint32_t eventVal) { if (isTimestampEvent) { hostEventSetValueTimestamps(eventVal); } @@ -292,8 +292,8 @@ ze_result_t EventImp::reset() { ze_result_t EventImp::getTimestamp(ze_event_timestamp_type_t timestampType, void *dstptr) { auto baseAddr = reinterpret_cast(hostAddress); uint64_t *tsptr = nullptr; - uint64_t tsData = Event::STATE_INITIAL; constexpr uint64_t tsMask = (1ull << 32) - 1; + uint64_t tsData = Event::STATE_INITIAL & tsMask; if (!this->isTimestampEvent) return ZE_RESULT_ERROR_INVALID_ARGUMENT; @@ -305,15 +305,8 @@ ze_result_t EventImp::getTimestamp(ze_event_timestamp_type_t timestampType, void } if (timestampType == ZE_EVENT_TIMESTAMP_GLOBAL_START) { - tsptr = reinterpret_cast(baseAddr + getOffsetOfEventTimestampRegister(Event::GLOBAL_START_LOW)); - auto tsptrUpper = reinterpret_cast(baseAddr + getOffsetOfEventTimestampRegister(Event::GLOBAL_START_HIGH)); - - tsData = ((*tsptrUpper & tsMask) << 32) | (*tsptr & tsMask); - memcpy_s(dstptr, sizeof(uint64_t), static_cast(&tsData), sizeof(uint64_t)); - return ZE_RESULT_SUCCESS; - } - - if (timestampType == ZE_EVENT_TIMESTAMP_GLOBAL_END) { + tsptr = reinterpret_cast(baseAddr + getOffsetOfEventTimestampRegister(Event::GLOBAL_START)); + } else if (timestampType == ZE_EVENT_TIMESTAMP_GLOBAL_END) { tsptr = reinterpret_cast(baseAddr + getOffsetOfEventTimestampRegister(Event::GLOBAL_END)); } else if (timestampType == ZE_EVENT_TIMESTAMP_CONTEXT_START) { tsptr = reinterpret_cast(baseAddr + getOffsetOfEventTimestampRegister(Event::CONTEXT_START)); diff --git a/level_zero/core/source/event/event.h b/level_zero/core/source/event/event.h index f47839e064..09167882ca 100644 --- a/level_zero/core/source/event/event.h +++ b/level_zero/core/source/event/event.h @@ -30,15 +30,14 @@ struct Event : _ze_event_handle_t { virtual ze_result_t reset() = 0; virtual ze_result_t getTimestamp(ze_event_timestamp_type_t timestampType, void *dstptr) = 0; - enum State : uint64_t { + enum State : uint32_t { STATE_SIGNALED = 0u, - STATE_CLEARED = static_cast(-1), + STATE_CLEARED = static_cast(-1), STATE_INITIAL = STATE_CLEARED }; enum EventTimestampRegister : uint32_t { - GLOBAL_START_LOW = 0u, - GLOBAL_START_HIGH, + GLOBAL_START = 0u, GLOBAL_END, CONTEXT_START, CONTEXT_END diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel.cpp index 5df4c39520..2e88a3ac9f 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel.cpp @@ -7,6 +7,7 @@ #include "shared/source/command_container/command_encoder.h" #include "shared/source/helpers/preamble.h" +#include "shared/source/helpers/register_offsets.h" #include "shared/test/unit_test/cmd_parse/gen_cmd_parse.h" #include "opencl/source/helpers/hardware_commands_helper.h" @@ -232,6 +233,94 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandListAppendLaunchKernel, givenEventsWhenAppend } } +using TimestampEventSupport = IsWithinProducts; +HWTEST2_F(CommandListAppendLaunchKernel, givenTimestampEventsWhenAppendingKernelThenSRMAndPCEncoded, TimestampEventSupport) { + using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER; + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; + using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM; + + Mock<::L0::Kernel> kernel; + std::unique_ptr commandList(L0::CommandList::create(productFamily, device, false)); + auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed(); + ze_event_pool_desc_t eventPoolDesc = { + ZE_EVENT_POOL_DESC_VERSION_CURRENT, + ZE_EVENT_POOL_FLAG_TIMESTAMP, + 1}; + + ze_event_desc_t eventDesc = { + ZE_EVENT_DESC_VERSION_CURRENT, + 0, + ZE_EVENT_SCOPE_FLAG_NONE, + ZE_EVENT_SCOPE_FLAG_NONE}; + + auto eventPool = std::unique_ptr(EventPool::create(device, &eventPoolDesc)); + auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); + + ze_group_count_t groupCount{1, 1, 1}; + auto result = commandList->appendLaunchKernel( + kernel.toHandle(), &groupCount, event->toHandle(), 0, nullptr); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); + EXPECT_GT(usedSpaceAfter, usedSpaceBefore); + + GenCmdList cmdList; + EXPECT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter)); + + auto itor = find(cmdList.begin(), cmdList.end()); + ASSERT_NE(cmdList.end(), itor); + { + auto cmd = genCmdCast(*itor); + EXPECT_EQ(REG_GLOBAL_TIMESTAMP_LDW, cmd->getRegisterAddress()); + } + itor++; + + itor = find(itor, cmdList.end()); + ASSERT_NE(cmdList.end(), itor); + { + auto cmd = genCmdCast(*itor); + EXPECT_EQ(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, cmd->getRegisterAddress()); + } + itor++; + + itor = find(cmdList.begin(), cmdList.end()); + ASSERT_NE(cmdList.end(), itor); + itor++; + + auto itorPC = findAll(cmdList.begin(), cmdList.end()); + EXPECT_NE(0u, itorPC.size()); + bool postSyncFound = false; + for (auto it : itorPC) { + auto cmd = genCmdCast(*it); + if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_TIMESTAMP) { + EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); + EXPECT_FALSE(cmd->getDcFlushEnable()); + auto gpuAddress = event->getGpuAddress() + + event->getOffsetOfEventTimestampRegister(Event::GLOBAL_END); + EXPECT_EQ(cmd->getAddressHigh(), gpuAddress >> 32u); + EXPECT_EQ(cmd->getAddress(), uint32_t(gpuAddress)); + postSyncFound = true; + } + } + EXPECT_TRUE(postSyncFound); + + itor = find(itor, cmdList.end()); + EXPECT_NE(cmdList.end(), itor); + { + auto cmd = genCmdCast(*itor); + EXPECT_EQ(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, cmd->getRegisterAddress()); + } + + { + auto itorEvent = std::find(std::begin(commandList->commandContainer.getResidencyContainer()), + std::end(commandList->commandContainer.getResidencyContainer()), + &event->getAllocation()); + EXPECT_NE(itorEvent, std::end(commandList->commandContainer.getResidencyContainer())); + } +} + HWTEST2_F(CommandListAppendLaunchKernel, givenImmediateCommandListWhenAppendingLaunchKernelThenKernelIsExecutedOnImmediateCmdQ, SklPlusMatcher) { createKernel(); diff --git a/level_zero/core/test/unit_tests/sources/event/CMakeLists.txt b/level_zero/core/test/unit_tests/sources/event/CMakeLists.txt new file mode 100644 index 0000000000..b5388d54c6 --- /dev/null +++ b/level_zero/core/test/unit_tests/sources/event/CMakeLists.txt @@ -0,0 +1,10 @@ +# +# Copyright (C) 2020 Intel Corporation +# +# SPDX-License-Identifier: MIT +# + +target_sources(${TARGET_NAME} PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt + ${CMAKE_CURRENT_SOURCE_DIR}/test_event.cpp +) diff --git a/level_zero/core/test/unit_tests/sources/event/test_event.cpp b/level_zero/core/test/unit_tests/sources/event/test_event.cpp new file mode 100644 index 0000000000..fd3c8bf1b6 --- /dev/null +++ b/level_zero/core/test/unit_tests/sources/event/test_event.cpp @@ -0,0 +1,105 @@ +/* + * Copyright (C) 2020 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "test.h" + +#include "level_zero/core/source/driver/driver_handle_imp.h" +#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" +#include "level_zero/core/test/unit_tests/mocks/mock_event.h" + +namespace L0 { +namespace ult { + +using EventPoolCreate = Test; + +TEST_F(EventPoolCreate, allocationContainsAtLeast16Bytes) { + ze_event_pool_desc_t eventPoolDesc = { + ZE_EVENT_POOL_DESC_VERSION_CURRENT, + ZE_EVENT_POOL_FLAG_HOST_VISIBLE, + 1}; + + std::unique_ptr eventPool(EventPool::create(device, &eventPoolDesc)); + ASSERT_NE(nullptr, eventPool); + + auto allocation = &eventPool->getAllocation(); + ASSERT_NE(nullptr, allocation); + + uint32_t minAllocationSize = eventPool->getEventSize(); + EXPECT_GE(allocation->getUnderlyingBufferSize(), minAllocationSize); +} + +TEST_F(EventPoolCreate, givenTimestampEventsThenVerifyNumTimestampsToRead) { + ze_event_pool_desc_t eventPoolDesc = { + ZE_EVENT_POOL_DESC_VERSION_CURRENT, + ZE_EVENT_POOL_FLAG_TIMESTAMP, // all events in pool are visible to Host + 1}; + + std::unique_ptr eventPool(EventPool::create(device, &eventPoolDesc)); + ASSERT_NE(nullptr, eventPool); + + uint32_t numTimestamps = 4u; + EXPECT_EQ(numTimestamps, eventPool->getNumEventTimestampsToRead()); +} + +class TimestampEventCreate : public Test { + public: + void SetUp() override { + DeviceFixture::SetUp(); + ze_event_pool_desc_t eventPoolDesc = { + ZE_EVENT_POOL_DESC_VERSION_CURRENT, + ZE_EVENT_POOL_FLAG_TIMESTAMP, + 1}; + + ze_event_desc_t eventDesc = { + ZE_EVENT_DESC_VERSION_CURRENT, + 0, + ZE_EVENT_SCOPE_FLAG_NONE, + ZE_EVENT_SCOPE_FLAG_NONE}; + + eventPool = std::unique_ptr(L0::EventPool::create(device, &eventPoolDesc)); + ASSERT_NE(nullptr, eventPool); + event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); + ASSERT_NE(nullptr, eventPool); + } + + void TearDown() override { + DeviceFixture::TearDown(); + } + + std::unique_ptr eventPool; + std::unique_ptr event; +}; + +TEST_F(TimestampEventCreate, givenEventCreatedWithTimestampThenIsTimestampEventFlagSet) { + EXPECT_TRUE(event->isTimestampEvent); +} + +TEST_F(TimestampEventCreate, givenEventTimestampsNotTriggeredThenValuesInInitialState) { + uint64_t globalStart, globalEnd, contextStart, contextEnd; + + event->getTimestamp(ZE_EVENT_TIMESTAMP_GLOBAL_START, &globalStart); + event->getTimestamp(ZE_EVENT_TIMESTAMP_GLOBAL_END, &globalEnd); + event->getTimestamp(ZE_EVENT_TIMESTAMP_CONTEXT_START, &contextStart); + event->getTimestamp(ZE_EVENT_TIMESTAMP_CONTEXT_END, &contextEnd); + + EXPECT_EQ(static_cast(Event::STATE_CLEARED), globalStart); + EXPECT_EQ(static_cast(Event::STATE_CLEARED), globalEnd); + EXPECT_EQ(static_cast(Event::STATE_CLEARED), contextStart); + EXPECT_EQ(static_cast(Event::STATE_CLEARED), contextEnd); +} + +TEST_F(TimestampEventCreate, givenSingleTimestampEventThenAllocationSizeCreatedForAllTimestamps) { + auto allocation = &eventPool->getAllocation(); + ASSERT_NE(nullptr, allocation); + + uint32_t minTimestampEventAllocation = eventPool->getEventSize() * + eventPool->getNumEventTimestampsToRead(); + EXPECT_GE(minTimestampEventAllocation, allocation->getUnderlyingBufferSize()); +} + +} // namespace ult +} // namespace L0 \ No newline at end of file