fix: Reset additional properties usage after profiling

Related-To: NEO-13003

Signed-off-by: Aravind Gopalakrishnan <aravind.gopalakrishnan@intel.com>
Signed-off-by: Young Jin Yoon <young.jin.yoon@intel.com>
This commit is contained in:
Young Jin Yoon 2025-05-10 00:43:18 +00:00 committed by Compute-Runtime-Automation
parent 0765b13a10
commit 4763d7268f
10 changed files with 129 additions and 31 deletions

View File

@ -4375,7 +4375,7 @@ bool CommandListCoreFamily<gfxCoreFamily>::handleCounterBasedEventOperations(Eve
auto tag = device->getInOrderTimestampAllocator()->getTag();
this->commandContainer.addToResidencyContainer(tag->getBaseGraphicsAllocation()->getGraphicsAllocation(device->getRootDeviceIndex()));
signalEvent->resetInOrderTimestampNode(tag, this->partitionCount, false);
signalEvent->resetInOrderTimestampNode(tag, this->partitionCount);
}
}

View File

@ -334,7 +334,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
if (!compactEvent || this->asMutable() || !compactEvent->isCounterBased() || compactEvent->isUsingContextEndOffset()) {
if (inOrderNonWalkerSignalling) {
if (!eventForInOrderExec->getAllocation(this->device)) {
eventForInOrderExec->resetInOrderTimestampNode(device->getInOrderTimestampAllocator()->getTag(), this->partitionCount, false);
eventForInOrderExec->resetInOrderTimestampNode(device->getInOrderTimestampAllocator()->getTag(), this->partitionCount);
}
if (this->asMutable() || !eventForInOrderExec->isCounterBased()) {
dispatchEventPostSyncOperation(eventForInOrderExec, nullptr, launchParams.outListCommands, Event::STATE_CLEARED, false, false, false, false, false);

View File

@ -533,7 +533,7 @@ void Event::releaseTempInOrderTimestampNodes() {
}
ze_result_t Event::destroy() {
resetInOrderTimestampNode(nullptr, 0, false);
resetInOrderTimestampNode(nullptr, 0);
releaseTempInOrderTimestampNodes();
if (isCounterBasedExplicitlyEnabled() && isFromIpcPool) {
@ -677,13 +677,13 @@ void Event::setReferenceTs(uint64_t currentCpuTimeStamp) {
}
void Event::unsetInOrderExecInfo() {
resetInOrderTimestampNode(nullptr, 0, false);
resetInOrderTimestampNode(nullptr, 0);
inOrderExecInfo.reset();
inOrderAllocationOffset = 0;
inOrderExecSignalValue = 0;
}
void Event::resetInOrderTimestampNode(NEO::TagNodeBase *newNode, uint32_t partitionCount, bool blitAdditionalPropertiesUsed) {
void Event::resetInOrderTimestampNode(NEO::TagNodeBase *newNode, uint32_t partitionCount) {
if (inOrderIncrementValue == 0 || !newNode) {
for (auto &node : inOrderTimestampNode) {
inOrderExecInfo->pushTempTimestampNode(node, inOrderExecSignalValue);
@ -696,15 +696,33 @@ void Event::resetInOrderTimestampNode(NEO::TagNodeBase *newNode, uint32_t partit
inOrderTimestampNode.push_back(newNode);
if (NEO::debugManager.flags.ClearStandaloneInOrderTimestampAllocation.get() != 0) {
clearLatestInOrderTimestampData(partitionCount);
}
if (blitAdditionalPropertiesUsed) {
this->blitAdditionalPropertiesUsed = true;
clearTimestampTagData(partitionCount, true, nullptr);
}
}
}
void Event::resetAdditionalTimestampNode(NEO::TagNodeBase *newNode, uint32_t partitionCount) {
if (!newNode) {
additionalTimestampNode.clear();
return;
}
if (inOrderIncrementValue > 0) {
// Aggregated events do not reset
additionalTimestampNode.push_back(newNode);
return;
}
additionalTimestampNode.clear();
additionalTimestampNode.push_back(newNode);
clearTimestampTagData(partitionCount, false, newNode);
}
NEO::TagNodeBase *Event::getEventAdditionalTimestampNode() {
return additionalTimestampNode.empty() ? nullptr : additionalTimestampNode.back();
}
NEO::GraphicsAllocation *Event::getExternalCounterAllocationFromAddress(uint64_t *address) const {
NEO::SvmAllocationData *allocData = nullptr;
if (!address || !device->getDriverHandle()->findAllocationDataForRange(address, sizeof(uint64_t), allocData)) {

View File

@ -326,7 +326,9 @@ struct Event : _ze_event_handle_t {
void setExternalInterruptId(uint32_t interruptId) { externalInterruptId = interruptId; }
void resetInOrderTimestampNode(NEO::TagNodeBase *newNode, uint32_t partitionCount, bool blitAdditionalPropertiesUsed);
void resetInOrderTimestampNode(NEO::TagNodeBase *newNode, uint32_t partitionCount);
void resetAdditionalTimestampNode(NEO::TagNodeBase *newNode, uint32_t partitionCount);
NEO::TagNodeBase *getEventAdditionalTimestampNode();
bool hasInOrderTimestampNode() const { return !inOrderTimestampNode.empty(); }
@ -348,7 +350,7 @@ struct Event : _ze_event_handle_t {
void unsetCmdQueue();
void releaseTempInOrderTimestampNodes();
virtual void clearLatestInOrderTimestampData(uint32_t partitionCount) = 0;
virtual void clearTimestampTagData(uint32_t partitionCount, bool latestInorderData, NEO::TagNodeBase *newNode) = 0;
EventPool *eventPool = nullptr;
@ -388,7 +390,7 @@ struct Event : _ze_event_handle_t {
std::shared_ptr<NEO::InOrderExecInfo> inOrderExecInfo;
CommandQueue *latestUsedCmdQueue = nullptr;
std::vector<NEO::TagNodeBase *> inOrderTimestampNode;
bool blitAdditionalPropertiesUsed = false;
std::vector<NEO::TagNodeBase *> additionalTimestampNode;
uint32_t maxKernelCount = 0;
uint32_t kernelCount = 1u;

View File

@ -72,7 +72,7 @@ struct EventImp : public Event {
ze_result_t queryCounterBasedEventStatus();
void handleSuccessfulHostSynchronization();
MOCKABLE_VIRTUAL ze_result_t hostEventSetValueTimestamps(State eventState);
void clearLatestInOrderTimestampData(uint32_t partitionCount) override;
void clearTimestampTagData(uint32_t partitionCount, bool latestInorderData, NEO::TagNodeBase *newNode) override;
MOCKABLE_VIRTUAL void assignKernelEventCompletionData(void *address);
void setRemainingPackets(TagSizeT eventVal, uint64_t nextPacketGpuVa, void *nextPacketAddress, uint32_t packetsAlreadySet);
void getSynchronizedKernelTimestamps(ze_synchronized_timestamp_result_ext_t *pSynchronizedTimestampsBuffer,

View File

@ -200,8 +200,8 @@ ze_result_t EventImp<TagSizeT>::calculateProfilingData() {
numPackets *= static_cast<uint32_t>(inOrderTimestampNode.size());
}
if (blitAdditionalPropertiesUsed) {
numPackets += static_cast<uint32_t>(inOrderTimestampNode.size());
if (additionalTimestampNode.size() > 0) {
numPackets += static_cast<uint32_t>(additionalTimestampNode.size());
}
for (auto packetId = 0u; packetId < numPackets; packetId++) {
@ -228,8 +228,11 @@ ze_result_t EventImp<TagSizeT>::calculateProfilingData() {
}
template <typename TagSizeT>
void EventImp<TagSizeT>::clearLatestInOrderTimestampData(uint32_t partitionCount) {
auto node = inOrderTimestampNode.back();
void EventImp<TagSizeT>::clearTimestampTagData(uint32_t partitionCount, bool latestInorderData, NEO::TagNodeBase *newNode) {
auto node = newNode;
if (latestInorderData) {
node = inOrderTimestampNode.back();
}
auto hostAddress = node->getCpuBase();
auto deviceAddress = node->getGpuAddress();
@ -262,6 +265,28 @@ void EventImp<TagSizeT>::assignKernelEventCompletionData(void *address) {
kernelEventCompletionData[i].assignDataToAllTimestamps(packetId, address);
address = ptrOffset(address, singlePacketSize);
}
// Account for additional timestamp nodes
uint32_t remainingPackets = static_cast<uint32_t>(additionalTimestampNode.size());
if (remainingPackets == 0) {
continue;
}
nodeId = 0;
uint32_t normalizedPacketId = 0;
for (uint32_t packetId = packetsToCopy; packetId < packetsToCopy + remainingPackets; packetId++) {
if (inOrderIncrementValue > 0) {
if (normalizedPacketId % kernelEventCompletionData[i].getPacketsUsed() == 0) {
address = additionalTimestampNode[nodeId++]->getCpuBase();
}
} else {
address = additionalTimestampNode[nodeId++]->getCpuBase();
}
kernelEventCompletionData[i].assignDataToAllTimestamps(packetId, address);
address = ptrOffset(address, singlePacketSize);
normalizedPacketId++;
}
}
}
@ -808,6 +833,7 @@ ze_result_t EventImp<TagSizeT>::reset() {
this->resetCompletionStatus();
this->resetDeviceCompletionData(false);
this->l3FlushAppliedOnKernel.reset();
this->resetAdditionalTimestampNode(nullptr, 0);
return ZE_RESULT_SUCCESS;
}

View File

@ -41,9 +41,10 @@ struct InOrderFixtureMockEvent : public EventImp<uint32_t> {
using EventImp<uint32_t>::externalInterruptId;
using EventImp<uint32_t>::latestUsedCmdQueue;
using EventImp<uint32_t>::inOrderTimestampNode;
using EventImp<uint32_t>::additionalTimestampNode;
void makeCounterBasedInitiallyDisabled(MultiGraphicsAllocation &poolAllocation) {
resetInOrderTimestampNode(nullptr, 0, false);
resetInOrderTimestampNode(nullptr, 0);
counterBasedMode = CounterBasedMode::initiallyDisabled;
resetCompletionStatus();
counterBasedFlags = 0;
@ -53,7 +54,7 @@ struct InOrderFixtureMockEvent : public EventImp<uint32_t> {
}
void makeCounterBasedImplicitlyDisabled(MultiGraphicsAllocation &poolAllocation) {
resetInOrderTimestampNode(nullptr, 0, false);
resetInOrderTimestampNode(nullptr, 0);
counterBasedMode = CounterBasedMode::implicitlyDisabled;
resetCompletionStatus();
counterBasedFlags = 0;

View File

@ -19,7 +19,7 @@ namespace ult {
template <>
struct WhiteBox<::L0::Event> : public ::L0::Event {
using BaseClass = ::L0::Event;
using BaseClass::blitAdditionalPropertiesUsed;
using BaseClass::additionalTimestampNode;
using BaseClass::contextEndTS;
using BaseClass::contextStartTS;
using BaseClass::counterBasedMode;
@ -45,7 +45,7 @@ using Event = WhiteBox<::L0::Event>;
template <typename TagSizeT>
struct WhiteBox<::L0::EventImp<TagSizeT>> : public L0::EventImp<TagSizeT> {
using BaseClass = ::L0::EventImp<TagSizeT>;
using BaseClass::blitAdditionalPropertiesUsed;
using BaseClass::additionalTimestampNode;
using BaseClass::calculateProfilingData;
using BaseClass::contextEndTS;
using BaseClass::contextStartTS;
@ -191,7 +191,7 @@ class MockEvent : public ::L0::Event {
ze_result_t hostEventSetValue(State eventState) override {
return ZE_RESULT_SUCCESS;
}
void clearLatestInOrderTimestampData(uint32_t partitionCount) override {}
void clearTimestampTagData(uint32_t partitionCount, bool latestInorderData, NEO::TagNodeBase *newNode) override {}
uint32_t getPacketsUsedInLastKernel() override { return 1; }
uint32_t getPacketsInUse() const override { return 1; }
void resetPackets(bool resetAllPackets) override {}

View File

@ -19,6 +19,7 @@
#include "shared/test/common/mocks/mock_device.h"
#include "shared/test/common/mocks/mock_direct_submission_hw.h"
#include "shared/test/common/mocks/mock_graphics_allocation.h"
#include "shared/test/common/mocks/mock_timestamp_container.h"
#include "shared/test/common/mocks/ult_device_factory.h"
#include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h"
@ -5499,6 +5500,28 @@ HWTEST_F(InOrderCmdListTests, givenExternalSyncStorageWhenCreatingCounterBasedEv
context->freeMem(devAddress);
}
HWTEST_F(InOrderCmdListTests, givenExternalSyncStorageWhenCreatingCounterBasedEventAndAdditionalTimestampNodeThenSetAdditionalParamsVectorCorrectly) {
uint64_t counterValue = 4;
uint64_t incValue = 2;
auto devAddress = reinterpret_cast<uint64_t *>(allocDeviceMem(sizeof(uint64_t)));
auto event = createExternalSyncStorageEvent(counterValue, incValue, devAddress);
event->isTimestampEvent = true;
ASSERT_NE(nullptr, event->getInOrderExecInfo());
MockTagAllocator<DeviceAllocNodeType<true>> eventTagAllocator0(0, neoDevice->getMemoryManager());
MockTagAllocator<DeviceAllocNodeType<true>> eventTagAllocator1(0, neoDevice->getMemoryManager());
EXPECT_EQ(nullptr, event->getEventAdditionalTimestampNode());
event->resetAdditionalTimestampNode(eventTagAllocator0.getTag(), 1);
EXPECT_NE(nullptr, event->getEventAdditionalTimestampNode());
event->resetAdditionalTimestampNode(eventTagAllocator1.getTag(), 1);
EXPECT_NE(nullptr, event->getEventAdditionalTimestampNode());
EXPECT_EQ(2u, event->additionalTimestampNode.size());
context->freeMem(devAddress);
}
HWTEST_F(InOrderCmdListTests, givenExternalSyncStorageWhenCallingAppendThenDontResetInOrderExecInfo) {
uint64_t counterValue = 4;
uint64_t incValue = 2;

View File

@ -2820,9 +2820,8 @@ TEST_F(TimestampEventUsedPacketSignalCreate, givenEventWithBlitAdditionalPropert
event->enableCounterBasedMode(true, ZE_EVENT_POOL_COUNTER_BASED_EXP_FLAG_IMMEDIATE);
event->updateInOrderExecState(inOrderExecInfo, 1, 0);
event->resetInOrderTimestampNode(blitTagAllocator.getTag(), 1, true);
EXPECT_EQ(1u, event->inOrderTimestampNode.size());
ASSERT_TRUE(event->blitAdditionalPropertiesUsed);
event->resetAdditionalTimestampNode(blitTagAllocator.getTag(), 1);
EXPECT_NE(nullptr, event->getEventAdditionalTimestampNode());
event->setPacketsInUse(2u);
@ -3914,8 +3913,7 @@ HWTEST_F(EventTests, GivenEventUsedOnNonDefaultCsrWhenHostSynchronizeCalledThenA
event->destroy();
}
HWTEST_F(EventTests, givenInOrderEventWhenCallingResetInOrderTimestampNodeWithBlitAdditionalPropertiesUsedThenBlitAdditionalPropertiesUsedIsSet) {
std::map<GraphicsAllocation *, uint32_t> downloadAllocationTrack;
HWTEST_F(EventTests, givenInOrderEventWhenCallingResetAdditionalTimestampNodeWithTagAllocatorThenTagAddedToAdditionalTimestampNodeVector) {
MockTagAllocator<DeviceAllocNodeType<true>> deviceTagAllocator(0, neoDevice->getMemoryManager());
MockTagAllocator<DeviceAllocNodeType<true>> eventTagAllocator(0, neoDevice->getMemoryManager());
@ -3927,9 +3925,39 @@ HWTEST_F(EventTests, givenInOrderEventWhenCallingResetInOrderTimestampNodeWithBl
event->enableCounterBasedMode(true, ZE_EVENT_POOL_COUNTER_BASED_EXP_FLAG_IMMEDIATE);
event->updateInOrderExecState(inOrderExecInfo, 1, 0);
EXPECT_FALSE(event->blitAdditionalPropertiesUsed);
event->resetInOrderTimestampNode(eventTagAllocator.getTag(), 1, true);
EXPECT_TRUE(event->blitAdditionalPropertiesUsed);
EXPECT_EQ(nullptr, event->getEventAdditionalTimestampNode());
event->resetAdditionalTimestampNode(eventTagAllocator.getTag(), 1);
EXPECT_NE(nullptr, event->getEventAdditionalTimestampNode());
}
HWTEST_F(EventTests, givenRegularEventWhenCallingResetAdditionalTimestampNodeMultipleTimesWithTagAllocatorThenTagAddedToAdditionalTimestampNodeVectorOnce) {
MockTagAllocator<DeviceAllocNodeType<true>> eventTagAllocator(0, neoDevice->getMemoryManager());
auto event = zeUniquePtr(whiteboxCast(getHelper<L0GfxCoreHelper>().createEvent(eventPool.get(), &eventDesc, device)));
ASSERT_NE(event, nullptr);
EXPECT_EQ(nullptr, event->getEventAdditionalTimestampNode());
event->resetAdditionalTimestampNode(eventTagAllocator.getTag(), 1);
EXPECT_NE(nullptr, event->getEventAdditionalTimestampNode());
event->resetAdditionalTimestampNode(eventTagAllocator.getTag(), 1);
EXPECT_NE(nullptr, event->getEventAdditionalTimestampNode());
EXPECT_EQ(1u, event->additionalTimestampNode.size());
}
HWTEST_F(EventTests, givenRegularEventWhenCallingResetAdditionalTimestampNodeWithNullptrThenVectorCleared) {
MockTagAllocator<DeviceAllocNodeType<true>> eventTagAllocator(0, neoDevice->getMemoryManager());
auto event = zeUniquePtr(whiteboxCast(getHelper<L0GfxCoreHelper>().createEvent(eventPool.get(), &eventDesc, device)));
ASSERT_NE(event, nullptr);
EXPECT_EQ(nullptr, event->getEventAdditionalTimestampNode());
event->resetAdditionalTimestampNode(eventTagAllocator.getTag(), 1);
EXPECT_NE(nullptr, event->getEventAdditionalTimestampNode());
event->resetAdditionalTimestampNode(nullptr, 0);
EXPECT_EQ(nullptr, event->getEventAdditionalTimestampNode());
EXPECT_EQ(0u, event->additionalTimestampNode.size());
}
HWTEST_F(EventTests, givenInOrderEventWhenHostSynchronizeIsCalledThenAllocationIsDonwloadedOnlyAfterEventWasUsedOnGpu) {