From f727d26aa0e47c28209e6114fc2ad9a245775045 Mon Sep 17 00:00:00 2001 From: Mateusz Jablonski Date: Mon, 6 Sep 2021 15:39:17 +0000 Subject: [PATCH] Revert "SW WA to add PIPE_CONTROL with dcFlush enabled when event scope..." This reverts commit c051495eb8d453aee1d9bfaf0d2b8aa1301bb8e2 Signed-off-by: Mateusz Jablonski --- .../cmdlist/cmdlist_hw_xehp_and_later.inl | 17 +-- level_zero/core/source/event/event.h | 7 +- level_zero/core/source/event/event_impl.inl | 114 ++++++++---------- .../unit_tests/sources/event/test_event.cpp | 26 ++-- 4 files changed, 70 insertions(+), 94 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl index 4372934ebd..e253e1c208 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl @@ -231,24 +231,9 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(z this->partitionCount = std::max(partitionCount, this->partitionCount); if (hEvent) { auto event = Event::fromHandle(hEvent); - if (partitionCount > 1) { + if (isTimestampEvent && partitionCount > 1) { event->setPacketsInUse(partitionCount); } - if (L3FlushEnable) { - using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; - using POST_SYNC_OPERATION = typename GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION; - auto &hwHelper = this->device->getHwHelper(); - eventAddress = event->getPacketAddress(this->device) + hwHelper.getSingleTimestampPacketSize(); - event->setPacketsInUse(event->getPacketsInUse() + 1); - - NEO::PipeControlArgs args; - args.dcFlushEnable = L3FlushEnable; - NEO::MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( - *commandContainer.getCommandStream(), POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, - eventAddress, Event::STATE_SIGNALED, - commandContainer.getDevice()->getHardwareInfo(), - args); - } } if (neoDevice->getDebugger()) { diff --git a/level_zero/core/source/event/event.h b/level_zero/core/source/event/event.h index ed4d6e1d65..16ed4003c2 100644 --- a/level_zero/core/source/event/event.h +++ b/level_zero/core/source/event/event.h @@ -95,7 +95,7 @@ struct Event : _ze_event_handle_t { }; template -class KernelEventCompletionData : public NEO::TimestampPackets { +class KernelTimestampsData : public NEO::TimestampPackets { public: uint32_t getPacketsUsed() const { return packetsUsed; } void setPacketsUsed(uint32_t value) { packetsUsed = value; } @@ -139,7 +139,7 @@ struct EventImp : public Event { size_t getSinglePacketSize() const override { return NEO::TimestampPackets::getSinglePacketSize(); }; ze_result_t hostEventSetValue(uint32_t eventValue) override; - std::unique_ptr[]> kernelEventCompletionData; + std::unique_ptr[]> kernelTimestampsData; Device *device; int index; @@ -148,9 +148,8 @@ struct EventImp : public Event { protected: ze_result_t calculateProfilingData(); ze_result_t queryStatusKernelTimestamp(); - ze_result_t queryStatusNonTimestamp(); ze_result_t hostEventSetValueTimestamps(TagSizeT eventVal); - void assignKernelEventCompletionData(void *address); + void assignTimestampData(void *address); }; struct EventPool : _ze_event_pool_handle_t { diff --git a/level_zero/core/source/event/event_impl.inl b/level_zero/core/source/event/event_impl.inl index 0a05f74ed0..fbe0192347 100644 --- a/level_zero/core/source/event/event_impl.inl +++ b/level_zero/core/source/event/event_impl.inl @@ -15,8 +15,8 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device * if (eventPool->isEventPoolTimestampFlagSet()) { event->setEventTimestampFlag(true); + event->kernelTimestampsData = std::make_unique[]>(EventPacketsCount::maxKernelSplit); } - event->kernelEventCompletionData = std::make_unique[]>(EventPacketsCount::maxKernelSplit); auto alloc = eventPool->getAllocation().getGraphicsAllocation(device->getNEODevice()->getRootDeviceIndex()); @@ -49,24 +49,24 @@ NEO::GraphicsAllocation &EventImp::getAllocation(Device *device) { template ze_result_t EventImp::calculateProfilingData() { - globalStartTS = kernelEventCompletionData[0].getGlobalStartValue(0); - globalEndTS = kernelEventCompletionData[0].getGlobalEndValue(0); - contextStartTS = kernelEventCompletionData[0].getContextStartValue(0); - contextEndTS = kernelEventCompletionData[0].getContextEndValue(0); + globalStartTS = kernelTimestampsData[0].getGlobalStartValue(0); + globalEndTS = kernelTimestampsData[0].getGlobalEndValue(0); + contextStartTS = kernelTimestampsData[0].getContextStartValue(0); + contextEndTS = kernelTimestampsData[0].getContextEndValue(0); for (uint32_t i = 0; i < kernelCount; i++) { - for (auto packetId = 0u; packetId < kernelEventCompletionData[i].getPacketsUsed(); packetId++) { - if (globalStartTS > kernelEventCompletionData[i].getGlobalStartValue(packetId)) { - globalStartTS = kernelEventCompletionData[i].getGlobalStartValue(packetId); + for (auto packetId = 0u; packetId < kernelTimestampsData[i].getPacketsUsed(); packetId++) { + if (globalStartTS > kernelTimestampsData[i].getGlobalStartValue(packetId)) { + globalStartTS = kernelTimestampsData[i].getGlobalStartValue(packetId); } - if (contextStartTS > kernelEventCompletionData[i].getContextStartValue(packetId)) { - contextStartTS = kernelEventCompletionData[i].getContextStartValue(packetId); + if (contextStartTS > kernelTimestampsData[i].getContextStartValue(packetId)) { + contextStartTS = kernelTimestampsData[i].getContextStartValue(packetId); } - if (contextEndTS < kernelEventCompletionData[i].getContextEndValue(packetId)) { - contextEndTS = kernelEventCompletionData[i].getContextEndValue(packetId); + if (contextEndTS < kernelTimestampsData[i].getContextEndValue(packetId)) { + contextEndTS = kernelTimestampsData[i].getContextEndValue(packetId); } - if (globalEndTS < kernelEventCompletionData[i].getGlobalEndValue(packetId)) { - globalEndTS = kernelEventCompletionData[i].getGlobalEndValue(packetId); + if (globalEndTS < kernelTimestampsData[i].getGlobalEndValue(packetId)) { + globalEndTS = kernelTimestampsData[i].getGlobalEndValue(packetId); } } } @@ -75,12 +75,11 @@ ze_result_t EventImp::calculateProfilingData() { } template -void EventImp::assignKernelEventCompletionData(void *address) { +void EventImp::assignTimestampData(void *address) { for (uint32_t i = 0; i < kernelCount; i++) { - uint32_t packetsToCopy = 0; - packetsToCopy = kernelEventCompletionData[i].getPacketsUsed(); + uint32_t packetsToCopy = kernelTimestampsData[i].getPacketsUsed(); for (uint32_t packetId = 0; packetId < packetsToCopy; packetId++) { - kernelEventCompletionData[i].assignDataToAllTimestamps(packetId, address); + kernelTimestampsData[i].assignDataToAllTimestamps(packetId, address); address = ptrOffset(address, NEO::TimestampPackets::getSinglePacketSize()); } } @@ -88,27 +87,11 @@ void EventImp::assignKernelEventCompletionData(void *address) { template ze_result_t EventImp::queryStatusKernelTimestamp() { - assignKernelEventCompletionData(hostAddress); - uint32_t queryVal = Event::STATE_CLEARED; + assignTimestampData(hostAddress); for (uint32_t i = 0; i < kernelCount; i++) { - uint32_t packetsToCheck = kernelEventCompletionData[i].getPacketsUsed(); + uint32_t packetsToCheck = kernelTimestampsData[i].getPacketsUsed(); for (uint32_t packetId = 0; packetId < packetsToCheck; packetId++) { - if (kernelEventCompletionData[i].getContextEndValue(packetId) == queryVal) { - return ZE_RESULT_NOT_READY; - } - } - } - return ZE_RESULT_SUCCESS; -} - -template -ze_result_t EventImp::queryStatusNonTimestamp() { - assignKernelEventCompletionData(hostAddress); - uint32_t queryVal = Event::STATE_CLEARED; - for (uint32_t i = 0; i < kernelCount; i++) { - uint32_t packetsToCheck = kernelEventCompletionData[i].getPacketsUsed(); - for (uint32_t packetId = 0; packetId < packetsToCheck; packetId++) { - if (kernelEventCompletionData[i].getContextStartValue(packetId) == queryVal) { + if (kernelTimestampsData[i].getContextEndValue(packetId) == Event::STATE_CLEARED) { return ZE_RESULT_NOT_READY; } } @@ -119,6 +102,7 @@ ze_result_t EventImp::queryStatusNonTimestamp() { template ze_result_t EventImp::queryStatus() { uint64_t *hostAddr = static_cast(hostAddress); + uint32_t queryVal = Event::STATE_CLEARED; if (metricStreamer != nullptr) { *hostAddr = metricStreamer->getNotificationState(); @@ -126,9 +110,9 @@ ze_result_t EventImp::queryStatus() { this->csr->downloadAllocations(); if (isEventTimestampFlagSet()) { return queryStatusKernelTimestamp(); - } else { - return queryStatusNonTimestamp(); } + memcpy_s(static_cast(&queryVal), sizeof(uint32_t), static_cast(hostAddr), sizeof(uint32_t)); + return (queryVal == Event::STATE_CLEARED) ? ZE_RESULT_NOT_READY : ZE_RESULT_SUCCESS; } template @@ -146,7 +130,7 @@ ze_result_t EventImp::hostEventSetValueTimestamps(TagSizeT eventVal) { } }; for (uint32_t i = 0; i < kernelCount; i++) { - uint32_t packetsToSet = kernelEventCompletionData[i].getPacketsUsed(); + uint32_t packetsToSet = kernelTimestampsData[i].getPacketsUsed(); for (uint32_t j = 0; j < packetsToSet; j++) { eventTsSetFunc(baseAddr + NEO::TimestampPackets::getContextStartOffset()); eventTsSetFunc(baseAddr + NEO::TimestampPackets::getGlobalStartOffset()); @@ -155,7 +139,7 @@ ze_result_t EventImp::hostEventSetValueTimestamps(TagSizeT eventVal) { baseAddr += NEO::TimestampPackets::getSinglePacketSize(); } } - assignKernelEventCompletionData(hostAddress); + assignTimestampData(hostAddress); return ZE_RESULT_SUCCESS; } @@ -224,12 +208,14 @@ ze_result_t EventImp::reset() { if (isEventTimestampFlagSet()) { kernelCount = EventPacketsCount::maxKernelSplit; for (uint32_t i = 0; i < kernelCount; i++) { - kernelEventCompletionData[i].setPacketsUsed(NEO::TimestampPacketSizeControl::preferredPacketCount); + kernelTimestampsData[i].setPacketsUsed(NEO::TimestampPacketSizeControl::preferredPacketCount); } + hostEventSetValue(Event::STATE_INITIAL); + resetPackets(); + return ZE_RESULT_SUCCESS; + } else { + return hostEventSetValue(Event::STATE_INITIAL); } - hostEventSetValue(Event::STATE_INITIAL); - resetPackets(); - return ZE_RESULT_SUCCESS; } template @@ -241,7 +227,7 @@ ze_result_t EventImp::queryKernelTimestamp(ze_kernel_timestamp_result_ return ZE_RESULT_NOT_READY; } - assignKernelEventCompletionData(hostAddress); + assignTimestampData(hostAddress); calculateProfilingData(); auto eventTsSetFunc = [&](uint64_t ×tampFieldToCopy, uint64_t ×tampFieldForWriting) { @@ -280,7 +266,7 @@ ze_result_t EventImp::queryTimestampsExp(Device *device, uint32_t *pCo } if ((*pCount == 0) || - (*pCount > kernelEventCompletionData[timestampPacket].getPacketsUsed())) { + (*pCount > kernelTimestampsData[timestampPacket].getPacketsUsed())) { *pCount = this->getPacketsInUse(); return ZE_RESULT_SUCCESS; } @@ -292,10 +278,10 @@ ze_result_t EventImp::queryTimestampsExp(Device *device, uint32_t *pCo memcpy_s(×tampFieldForWriting, sizeof(uint64_t), static_cast(×tampFieldToCopy), sizeof(uint64_t)); }; - globalStartTs = kernelEventCompletionData[timestampPacket].getGlobalStartValue(packetId); - contextStartTs = kernelEventCompletionData[timestampPacket].getContextStartValue(packetId); - contextEndTs = kernelEventCompletionData[timestampPacket].getContextEndValue(packetId); - globalEndTs = kernelEventCompletionData[timestampPacket].getGlobalEndValue(packetId); + globalStartTs = kernelTimestampsData[timestampPacket].getGlobalStartValue(packetId); + contextStartTs = kernelTimestampsData[timestampPacket].getContextStartValue(packetId); + contextEndTs = kernelTimestampsData[timestampPacket].getContextEndValue(packetId); + globalEndTs = kernelTimestampsData[timestampPacket].getGlobalEndValue(packetId); queryTsEventAssignFunc(result.global.kernelStart, globalStartTs); queryTsEventAssignFunc(result.context.kernelStart, contextStartTs); @@ -309,31 +295,37 @@ ze_result_t EventImp::queryTimestampsExp(Device *device, uint32_t *pCo template void EventImp::resetPackets() { for (uint32_t i = 0; i < kernelCount; i++) { - kernelEventCompletionData[i].setPacketsUsed(1); + kernelTimestampsData[i].setPacketsUsed(1); } kernelCount = 1; } template uint32_t EventImp::getPacketsInUse() { - uint32_t packetsInUse = 0; - for (uint32_t i = 0; i < kernelCount; i++) { - packetsInUse += kernelEventCompletionData[i].getPacketsUsed(); + if (isEventTimestampFlagSet()) { + uint32_t packetsInUse = 0; + for (uint32_t i = 0; i < kernelCount; i++) { + packetsInUse += kernelTimestampsData[i].getPacketsUsed(); + }; + return packetsInUse; + } else { + return 1; } - return packetsInUse; } template void EventImp::setPacketsInUse(uint32_t value) { - kernelEventCompletionData[getCurrKernelDataIndex()].setPacketsUsed(value); -} + kernelTimestampsData[getCurrKernelDataIndex()].setPacketsUsed(value); +}; template uint64_t EventImp::getPacketAddress(Device *device) { uint64_t address = getGpuAddress(device); - for (uint32_t i = 0; i < kernelCount - 1; i++) { - address += kernelEventCompletionData[i].getPacketsUsed() * - NEO::TimestampPackets::getSinglePacketSize(); + if (isEventTimestampFlagSet() && kernelCount > 1) { + for (uint32_t i = 0; i < kernelCount - 1; i++) { + address += kernelTimestampsData[i].getPacketsUsed() * + NEO::TimestampPackets::getSinglePacketSize(); + } } return address; } diff --git a/level_zero/core/test/unit_tests/sources/event/test_event.cpp b/level_zero/core/test/unit_tests/sources/event/test_event.cpp index 2354d16b03..0d3080ec73 100644 --- a/level_zero/core/test/unit_tests/sources/event/test_event.cpp +++ b/level_zero/core/test/unit_tests/sources/event/test_event.cpp @@ -640,15 +640,15 @@ TEST_F(TimestampEventCreate, givenEventCreatedWithTimestampThenIsTimestampEventF } TEST_F(TimestampEventCreate, givenEventTimestampsCreatedWhenResetIsInvokeThenCorrectDataAreSet) { - EXPECT_NE(nullptr, event->kernelEventCompletionData); + EXPECT_NE(nullptr, event->kernelTimestampsData); for (auto j = 0u; j < EventPacketsCount::maxKernelSplit; j++) { for (auto i = 0u; i < NEO::TimestampPacketSizeControl::preferredPacketCount; i++) { - EXPECT_EQ(static_cast(Event::State::STATE_INITIAL), event->kernelEventCompletionData[j].getContextStartValue(i)); - EXPECT_EQ(static_cast(Event::State::STATE_INITIAL), event->kernelEventCompletionData[j].getGlobalStartValue(i)); - EXPECT_EQ(static_cast(Event::State::STATE_INITIAL), event->kernelEventCompletionData[j].getContextEndValue(i)); - EXPECT_EQ(static_cast(Event::State::STATE_INITIAL), event->kernelEventCompletionData[j].getGlobalEndValue(i)); + EXPECT_EQ(static_cast(Event::State::STATE_INITIAL), event->kernelTimestampsData[j].getContextStartValue(i)); + EXPECT_EQ(static_cast(Event::State::STATE_INITIAL), event->kernelTimestampsData[j].getGlobalStartValue(i)); + EXPECT_EQ(static_cast(Event::State::STATE_INITIAL), event->kernelTimestampsData[j].getContextEndValue(i)); + EXPECT_EQ(static_cast(Event::State::STATE_INITIAL), event->kernelTimestampsData[j].getGlobalEndValue(i)); } - EXPECT_EQ(1u, event->kernelEventCompletionData[j].getPacketsUsed()); + EXPECT_EQ(1u, event->kernelTimestampsData[j].getPacketsUsed()); } EXPECT_EQ(1u, event->kernelCount); @@ -692,7 +692,7 @@ TEST_F(TimestampEventCreate, givenEventTimestampWhenPacketCountIsSetThenCorrectO } TEST_F(TimestampEventCreate, givenEventWhenSignaledAndResetFromTheHostThenCorrectDataAreSet) { - EXPECT_NE(nullptr, event->kernelEventCompletionData); + EXPECT_NE(nullptr, event->kernelTimestampsData); event->hostSignal(); ze_result_t result = event->queryStatus(); EXPECT_EQ(ZE_RESULT_SUCCESS, result); @@ -702,12 +702,12 @@ TEST_F(TimestampEventCreate, givenEventWhenSignaledAndResetFromTheHostThenCorrec EXPECT_EQ(ZE_RESULT_NOT_READY, result); for (auto j = 0u; j < EventPacketsCount::maxKernelSplit; j++) { for (auto i = 0u; i < NEO::TimestampPacketSizeControl::preferredPacketCount; i++) { - EXPECT_EQ(Event::State::STATE_INITIAL, event->kernelEventCompletionData[j].getContextStartValue(i)); - EXPECT_EQ(Event::State::STATE_INITIAL, event->kernelEventCompletionData[j].getGlobalStartValue(i)); - EXPECT_EQ(Event::State::STATE_INITIAL, event->kernelEventCompletionData[j].getContextEndValue(i)); - EXPECT_EQ(Event::State::STATE_INITIAL, event->kernelEventCompletionData[j].getGlobalEndValue(i)); + EXPECT_EQ(Event::State::STATE_INITIAL, event->kernelTimestampsData[j].getContextStartValue(i)); + EXPECT_EQ(Event::State::STATE_INITIAL, event->kernelTimestampsData[j].getGlobalStartValue(i)); + EXPECT_EQ(Event::State::STATE_INITIAL, event->kernelTimestampsData[j].getContextEndValue(i)); + EXPECT_EQ(Event::State::STATE_INITIAL, event->kernelTimestampsData[j].getGlobalEndValue(i)); } - EXPECT_EQ(1u, event->kernelEventCompletionData[j].getPacketsUsed()); + EXPECT_EQ(1u, event->kernelTimestampsData[j].getPacketsUsed()); } EXPECT_EQ(1u, event->kernelCount); } @@ -828,7 +828,7 @@ TEST_F(TimestampEventCreate, givenEventWhenQueryingTimestampExpThenCorrectDataSe uint32_t pCount = 2; for (uint32_t packetId = 0; packetId < pCount; packetId++) { - event->kernelEventCompletionData[0].assignDataToAllTimestamps(packetId, event->hostAddress); + event->kernelTimestampsData[0].assignDataToAllTimestamps(packetId, event->hostAddress); event->hostAddress = ptrOffset(event->hostAddress, NEO::TimestampPackets::getSinglePacketSize()); }