fix: Fix additional timestamp node logic

Related-To: NEO-13003

- assign additional node only if needed
- ensure kernelEventCompletionData packet count is correct
- add ult for multi tile timestamp event
- remove unneeded function

Signed-off-by: Aravind Gopalakrishnan <aravind.gopalakrishnan@intel.com>
This commit is contained in:
Aravind Gopalakrishnan
2025-05-13 22:15:57 +00:00
committed by Compute-Runtime-Automation
parent 85ed1a15e4
commit fd2a073fc6
13 changed files with 91 additions and 56 deletions

View File

@@ -337,7 +337,7 @@ struct CommandListCoreFamily : public CommandListImp {
void addFlushRequiredCommand(bool flushOperationRequired, Event *signalEvent, bool copyOperation, bool flushL3InPipeControl);
void handlePostSubmissionState();
MOCKABLE_VIRTUAL void setAdditionalBlitProperties(NEO::BlitProperties &blitProperties, Event *signalEvent);
MOCKABLE_VIRTUAL void setAdditionalBlitProperties(NEO::BlitProperties &blitProperties, Event *signalEvent, bool useAdditionalTimestamp);
void setupFillKernelArguments(size_t baseOffset,
size_t patternSize,

View File

@@ -1440,8 +1440,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlit(uintptr_t
commandContainer.addToResidencyContainer(srcPtrAlloc);
commandContainer.addToResidencyContainer(clearColorAllocation);
size_t nBlitsPerRow = NEO::BlitCommandsHelper<GfxFamily>::getNumberOfBlitsForCopyPerRow(blitProperties.copySize, device->getNEODevice()->getRootDeviceEnvironmentRef(), blitProperties.isSystemMemoryPoolUsed);
bool useAdditionalTimestamp = nBlitsPerRow > 1;
if (useAdditionalBlitProperties) {
setAdditionalBlitProperties(blitProperties, signalEvent);
setAdditionalBlitProperties(blitProperties, signalEvent, useAdditionalTimestamp);
}
NEO::BlitPropertiesContainer blitPropertiesContainer{blitProperties};
@@ -1497,13 +1499,16 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(Ali
const bool copyOnly = isCopyOnly(dualStreamCopyOffload);
auto &rootDeviceEnvironment = device->getNEODevice()->getRootDeviceEnvironmentRef();
bool copyRegionPreferred = NEO::BlitCommandsHelper<GfxFamily>::isCopyRegionPreferred(copySizeModified, rootDeviceEnvironment, blitProperties.isSystemMemoryPoolUsed);
size_t nBlits = copyRegionPreferred ? NEO::BlitCommandsHelper<GfxFamily>::getNumberOfBlitsForCopyRegion(blitProperties.copySize, rootDeviceEnvironment, blitProperties.isSystemMemoryPoolUsed) : NEO::BlitCommandsHelper<GfxFamily>::getNumberOfBlitsForCopyPerRow(blitProperties.copySize, rootDeviceEnvironment, blitProperties.isSystemMemoryPoolUsed);
bool useAdditionalTimestamp = nBlits > 1;
if (useAdditionalBlitProperties) {
setAdditionalBlitProperties(blitProperties, signalEvent);
setAdditionalBlitProperties(blitProperties, signalEvent, useAdditionalTimestamp);
} else if (copyOnly) {
appendEventForProfiling(signalEvent, nullptr, true, false, false, true);
}
auto &rootDeviceEnvironment = device->getNEODevice()->getRootDeviceEnvironmentRef();
bool copyRegionPreferred = NEO::BlitCommandsHelper<GfxFamily>::isCopyRegionPreferred(copySizeModified, rootDeviceEnvironment, blitProperties.isSystemMemoryPoolUsed);
NEO::BlitCommandsResult blitResult{};
if (copyRegionPreferred) {
blitResult = NEO::BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForBufferRegion(blitProperties, *commandContainer.getCommandStream(), rootDeviceEnvironment);
@@ -1549,8 +1554,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendCopyImageBlit(NEO::Graph
commandContainer.addToResidencyContainer(src);
commandContainer.addToResidencyContainer(clearColorAllocation);
bool useAdditionalTimestamp = blitProperties.copySize.z > 1;
if (useAdditionalBlitProperties) {
setAdditionalBlitProperties(blitProperties, signalEvent);
setAdditionalBlitProperties(blitProperties, signalEvent, useAdditionalTimestamp);
} else {
appendEventForProfiling(signalEvent, nullptr, true, false, false, true);
}
@@ -2484,7 +2490,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBlitFill(void *ptr, cons
auto blitProperties = NEO::BlitProperties::constructPropertiesForMemoryFill(gpuAllocation, size, patternToCommand, patternSize, offset);
if (useAdditionalBlitProperties) {
setAdditionalBlitProperties(blitProperties, signalEvent);
setAdditionalBlitProperties(blitProperties, signalEvent, false);
}
blitProperties.computeStreamPartitionCount = this->partitionCount;

View File

@@ -15,7 +15,7 @@ constexpr bool CommandListCoreFamily<gfxCoreFamily>::checkIfAllocationImportedRe
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::setAdditionalBlitProperties(NEO::BlitProperties &blitProperties, Event *signalEvent) {
void CommandListCoreFamily<gfxCoreFamily>::setAdditionalBlitProperties(NEO::BlitProperties &blitProperties, Event *signalEvent, bool useAdditionalTimestamp) {
}
} // namespace L0

View File

@@ -535,6 +535,7 @@ void Event::releaseTempInOrderTimestampNodes() {
ze_result_t Event::destroy() {
resetInOrderTimestampNode(nullptr, 0);
releaseTempInOrderTimestampNodes();
resetAdditionalTimestampNode(nullptr, 0);
if (isCounterBasedExplicitlyEnabled() && isFromIpcPool) {
auto memoryManager = device->getNEODevice()->getMemoryManager();
@@ -703,6 +704,9 @@ void Event::resetInOrderTimestampNode(NEO::TagNodeBase *newNode, uint32_t partit
void Event::resetAdditionalTimestampNode(NEO::TagNodeBase *newNode, uint32_t partitionCount) {
if (!newNode) {
for (auto &node : additionalTimestampNode) {
node->returnTag();
}
additionalTimestampNode.clear();
return;
}
@@ -713,16 +717,16 @@ void Event::resetAdditionalTimestampNode(NEO::TagNodeBase *newNode, uint32_t par
return;
}
additionalTimestampNode.clear();
if (additionalTimestampNode.size() > 0) {
auto existingNode = additionalTimestampNode.back();
existingNode->returnTag();
additionalTimestampNode.clear();
}
additionalTimestampNode.push_back(newNode);
clearTimestampTagData(partitionCount, false, newNode);
}
NEO::TagNodeBase *Event::getEventAdditionalTimestampNode() {
return additionalTimestampNode.empty() ? nullptr : additionalTimestampNode.back();
}
NEO::GraphicsAllocation *Event::getExternalCounterAllocationFromAddress(uint64_t *address) const {
NEO::SvmAllocationData *allocData = nullptr;
if (!address || !device->getDriverHandle()->findAllocationDataForRange(address, sizeof(uint64_t), allocData)) {

View File

@@ -161,6 +161,7 @@ struct Event : _ze_event_handle_t {
virtual void resetKernelCountAndPacketUsedCount() = 0;
void *getHostAddress() const;
virtual void setPacketsInUse(uint32_t value) = 0;
virtual void setAdditionalPacketsInUse(uint32_t value) = 0;
uint32_t getCurrKernelDataIndex() const { return kernelCount - 1; }
MOCKABLE_VIRTUAL void setGpuStartTimestamp();
MOCKABLE_VIRTUAL void setGpuEndTimestamp();
@@ -328,7 +329,6 @@ struct Event : _ze_event_handle_t {
void resetInOrderTimestampNode(NEO::TagNodeBase *newNode, uint32_t partitionCount);
void resetAdditionalTimestampNode(NEO::TagNodeBase *newNode, uint32_t partitionCount);
NEO::TagNodeBase *getEventAdditionalTimestampNode();
bool hasInOrderTimestampNode() const { return !inOrderTimestampNode.empty(); }

View File

@@ -18,9 +18,12 @@ class KernelEventCompletionData : public NEO::TimestampPackets<TagSizeT, NEO::Ti
public:
uint32_t getPacketsUsed() const { return packetsUsed; }
void setPacketsUsed(uint32_t value) { packetsUsed = value; }
uint32_t getAdditionalPacketsUsed() const { return additionalPacketsUsed; }
void setAdditionalPacketsUsed(uint32_t value) { additionalPacketsUsed = value; }
protected:
uint32_t packetsUsed = 1;
uint32_t additionalPacketsUsed = 0;
};
template <typename TagSizeT>
@@ -52,6 +55,7 @@ struct EventImp : public Event {
uint32_t getPacketsInUse() const override;
uint32_t getPacketsUsedInLastKernel() override;
void setPacketsInUse(uint32_t value) override;
void setAdditionalPacketsInUse(uint32_t value) override;
ze_result_t hostEventSetValue(State eventState) override;
std::unique_ptr<KernelEventCompletionData<TagSizeT>[]> kernelEventCompletionData;

View File

@@ -267,7 +267,14 @@ void EventImp<TagSizeT>::assignKernelEventCompletionData(void *address) {
}
// Account for additional timestamp nodes
uint32_t remainingPackets = static_cast<uint32_t>(additionalTimestampNode.size());
uint32_t remainingPackets = 0;
if (!additionalTimestampNode.empty()) {
remainingPackets = kernelEventCompletionData[i].getAdditionalPacketsUsed();
if (inOrderIncrementValue > 0) {
remainingPackets *= static_cast<uint32_t>(additionalTimestampNode.size());
}
}
if (remainingPackets == 0) {
continue;
}
@@ -275,11 +282,7 @@ void EventImp<TagSizeT>::assignKernelEventCompletionData(void *address) {
nodeId = 0;
uint32_t normalizedPacketId = 0;
for (uint32_t packetId = packetsToCopy; packetId < packetsToCopy + remainingPackets; packetId++) {
if (inOrderIncrementValue > 0) {
if (normalizedPacketId % kernelEventCompletionData[i].getPacketsUsed() == 0) {
address = additionalTimestampNode[nodeId++]->getCpuBase();
}
} else {
if (normalizedPacketId % kernelEventCompletionData[i].getPacketsUsed() == 0) {
address = additionalTimestampNode[nodeId++]->getCpuBase();
}
@@ -1086,6 +1089,11 @@ void EventImp<TagSizeT>::setPacketsInUse(uint32_t value) {
kernelEventCompletionData[getCurrKernelDataIndex()].setPacketsUsed(value);
}
template <typename TagSizeT>
void EventImp<TagSizeT>::setAdditionalPacketsInUse(uint32_t value) {
kernelEventCompletionData[getCurrKernelDataIndex()].setAdditionalPacketsUsed(value);
}
template <typename TagSizeT>
void EventImp<TagSizeT>::resetKernelCountAndPacketUsedCount() {
for (auto i = 0u; i < this->kernelCount; i++) {

View File

@@ -197,6 +197,7 @@ class MockEvent : public ::L0::Event {
void resetPackets(bool resetAllPackets) override {}
void resetKernelCountAndPacketUsedCount() override {}
void setPacketsInUse(uint32_t value) override {}
void setAdditionalPacketsInUse(uint32_t value) override {}
uint64_t getPacketAddress(L0::Device *) override { return 0; }
void setGpuStartTimestamp() override {}
void setGpuEndTimestamp() override {}

View File

@@ -108,7 +108,7 @@ class MockCommandListHw : public WhiteBox<::L0::CommandListCoreFamily<gfxCoreFam
return ZE_RESULT_SUCCESS;
}
void setAdditionalBlitProperties(NEO::BlitProperties &blitProperties, Event *signalEvent) override {}
void setAdditionalBlitProperties(NEO::BlitProperties &blitProperties, Event *signalEvent, bool useAdditionalTimestamp) override {}
ze_result_t appendMemoryCopyKernel2d(AlignedAllocationData *dstAlignedAllocation, AlignedAllocationData *srcAlignedAllocation,
Builtin builtin, const ze_copy_region_t *dstRegion,
uint32_t dstPitch, size_t dstOffset,

View File

@@ -445,14 +445,14 @@ HWTEST_F(AppendMemoryCopyTests, givenBlitPropertiesWhenCallingSetAdditionalBlitP
auto commandList = std::make_unique<MockCommandListForAdditionalBlitProperties<FamilyType::gfxCoreFamily>>();
EXPECT_FALSE(commandList->useAdditionalBlitProperties);
commandList->setAdditionalBlitProperties(blitProperties, nullptr);
commandList->setAdditionalBlitProperties(blitProperties, nullptr, false);
EXPECT_EQ(postSyncArgs.isTimestampEvent, postSyncArgsExpected.isTimestampEvent);
EXPECT_EQ(postSyncArgs.postSyncImmValue, postSyncArgsExpected.postSyncImmValue);
EXPECT_EQ(postSyncArgs.interruptEvent, postSyncArgsExpected.interruptEvent);
EXPECT_EQ(postSyncArgs.eventAddress, postSyncArgsExpected.eventAddress);
commandList->useAdditionalBlitProperties = true;
commandList->setAdditionalBlitProperties(blitProperties2, nullptr);
commandList->setAdditionalBlitProperties(blitProperties2, nullptr, false);
EXPECT_EQ(postSyncArgs2.isTimestampEvent, postSyncArgsExpected.isTimestampEvent);
EXPECT_EQ(postSyncArgs2.postSyncImmValue, postSyncArgsExpected.postSyncImmValue);
EXPECT_EQ(postSyncArgs2.interruptEvent, postSyncArgsExpected.interruptEvent);
@@ -465,9 +465,9 @@ class MockCommandListForAdditionalBlitProperties2 : public WhiteBox<::L0::Comman
public:
using BaseClass = WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>;
using BaseClass::useAdditionalBlitProperties;
void setAdditionalBlitProperties(NEO::BlitProperties &blitProperties, Event *signalEvent) override {
void setAdditionalBlitProperties(NEO::BlitProperties &blitProperties, Event *signalEvent, bool useAdditionalTimestamp) override {
additionalBlitPropertiesCalled++;
BaseClass::setAdditionalBlitProperties(blitProperties, signalEvent);
BaseClass::setAdditionalBlitProperties(blitProperties, signalEvent, useAdditionalTimestamp);
}
void appendSignalInOrderDependencyCounter(Event *signalEvent, bool copyOffloadOperation, bool stall, bool textureFlushRequired) override {
appendSignalInOrderDependencyCounterCalled++;

View File

@@ -128,7 +128,7 @@ class MockCommandListExtensionHw : public WhiteBox<::L0::CommandListCoreFamily<g
return ZE_RESULT_SUCCESS;
}
void setAdditionalBlitProperties(NEO::BlitProperties &blitProperties, Event *signalEvent) override {}
void setAdditionalBlitProperties(NEO::BlitProperties &blitProperties, Event *signalEvent, bool useAdditionalTimestamp) override {}
ze_result_t appendMemoryCopyBlitRegion(AlignedAllocationData *srcAllocationData,
AlignedAllocationData *dstAllocationData,

View File

@@ -5509,14 +5509,12 @@ HWTEST_F(InOrderCmdListTests, givenExternalSyncStorageWhenCreatingCounterBasedEv
event->isTimestampEvent = true;
ASSERT_NE(nullptr, event->getInOrderExecInfo());
MockTagAllocator<DeviceAllocNodeType<true>> eventTagAllocator0(0, neoDevice->getMemoryManager());
MockTagAllocator<DeviceAllocNodeType<true>> eventTagAllocator1(0, neoDevice->getMemoryManager());
EXPECT_EQ(nullptr, event->getEventAdditionalTimestampNode());
event->resetAdditionalTimestampNode(eventTagAllocator0.getTag(), 1);
EXPECT_NE(nullptr, event->getEventAdditionalTimestampNode());
auto node0 = device->getDeviceInOrderCounterAllocator()->getTag();
event->resetAdditionalTimestampNode(eventTagAllocator1.getTag(), 1);
EXPECT_NE(nullptr, event->getEventAdditionalTimestampNode());
event->resetAdditionalTimestampNode(node0, 1);
auto node1 = device->getDeviceInOrderCounterAllocator()->getTag();
event->resetAdditionalTimestampNode(node1, 1);
EXPECT_EQ(2u, event->additionalTimestampNode.size());
context->freeMem(devAddress);

View File

@@ -2821,7 +2821,6 @@ TEST_F(TimestampEventUsedPacketSignalCreate, givenEventWithBlitAdditionalPropert
event->updateInOrderExecState(inOrderExecInfo, 1, 0);
event->resetAdditionalTimestampNode(blitTagAllocator.getTag(), 1);
EXPECT_NE(nullptr, event->getEventAdditionalTimestampNode());
event->setPacketsInUse(2u);
@@ -3485,6 +3484,42 @@ TEST_F(EventTests, givenRegularEventUseMultiplePacketsWhenHostSignalThenExpectAl
}
}
TEST_F(EventTests, givenRegularEventWithoutAdditionalPacketsThenGetAdditionalPacketsRetursZero) {
eventDesc.index = 0;
eventDesc.signal = 0;
eventDesc.wait = 0;
auto event = std::unique_ptr<L0::EventImp<uint32_t>>(static_cast<L0::EventImp<uint32_t> *>(L0::Event::create<uint32_t>(eventPool.get(),
&eventDesc,
device)));
ASSERT_NE(event, nullptr);
uint32_t *hostAddr = static_cast<uint32_t *>(event->getCompletionFieldHostAddress());
EXPECT_EQ(*hostAddr, Event::STATE_INITIAL);
EXPECT_EQ(1u, event->getPacketsInUse());
event->setAdditionalPacketsInUse(0u);
EXPECT_EQ(event->kernelEventCompletionData[0].getAdditionalPacketsUsed(), 0u);
}
TEST_F(EventTests, givenRegularEventUseOneAdditionalPacketsThenGetAdditionalPacketsRetursOne) {
eventDesc.index = 0;
eventDesc.signal = 0;
eventDesc.wait = 0;
auto event = std::unique_ptr<L0::EventImp<uint32_t>>(static_cast<L0::EventImp<uint32_t> *>(L0::Event::create<uint32_t>(eventPool.get(),
&eventDesc,
device)));
ASSERT_NE(event, nullptr);
uint32_t *hostAddr = static_cast<uint32_t *>(event->getCompletionFieldHostAddress());
EXPECT_EQ(*hostAddr, Event::STATE_INITIAL);
EXPECT_EQ(1u, event->getPacketsInUse());
event->setAdditionalPacketsInUse(1u);
EXPECT_EQ(event->kernelEventCompletionData[0].getAdditionalPacketsUsed(), 1u);
}
TEST_F(EventUsedPacketSignalTests, givenEventUseMultiplePacketsWhenHostSignalThenExpectAllPacketsAreSignaled) {
eventDesc.index = 0;
eventDesc.signal = 0;
@@ -3913,22 +3948,6 @@ HWTEST_F(EventTests, GivenEventUsedOnNonDefaultCsrWhenHostSynchronizeCalledThenA
event->destroy();
}
HWTEST_F(EventTests, givenInOrderEventWhenCallingResetAdditionalTimestampNodeWithTagAllocatorThenTagAddedToAdditionalTimestampNodeVector) {
MockTagAllocator<DeviceAllocNodeType<true>> deviceTagAllocator(0, neoDevice->getMemoryManager());
MockTagAllocator<DeviceAllocNodeType<true>> eventTagAllocator(0, neoDevice->getMemoryManager());
auto event = zeUniquePtr(whiteboxCast(getHelper<L0GfxCoreHelper>().createEvent(eventPool.get(), &eventDesc, device)));
ASSERT_NE(event, nullptr);
auto inOrderExecInfo = std::make_shared<NEO::InOrderExecInfo>(deviceTagAllocator.getTag(), nullptr, *neoDevice, 1, false, false);
event->enableCounterBasedMode(true, ZE_EVENT_POOL_COUNTER_BASED_EXP_FLAG_IMMEDIATE);
event->updateInOrderExecState(inOrderExecInfo, 1, 0);
EXPECT_EQ(nullptr, event->getEventAdditionalTimestampNode());
event->resetAdditionalTimestampNode(eventTagAllocator.getTag(), 1);
EXPECT_NE(nullptr, event->getEventAdditionalTimestampNode());
}
HWTEST_F(EventTests, givenRegularEventWhenCallingResetAdditionalTimestampNodeMultipleTimesWithTagAllocatorThenTagAddedToAdditionalTimestampNodeVectorOnce) {
@@ -3936,12 +3955,9 @@ HWTEST_F(EventTests, givenRegularEventWhenCallingResetAdditionalTimestampNodeMul
auto event = zeUniquePtr(whiteboxCast(getHelper<L0GfxCoreHelper>().createEvent(eventPool.get(), &eventDesc, device)));
ASSERT_NE(event, nullptr);
EXPECT_EQ(nullptr, event->getEventAdditionalTimestampNode());
event->resetAdditionalTimestampNode(eventTagAllocator.getTag(), 1);
EXPECT_NE(nullptr, event->getEventAdditionalTimestampNode());
event->resetAdditionalTimestampNode(eventTagAllocator.getTag(), 1);
EXPECT_NE(nullptr, event->getEventAdditionalTimestampNode());
EXPECT_EQ(1u, event->additionalTimestampNode.size());
}
@@ -3951,12 +3967,10 @@ HWTEST_F(EventTests, givenRegularEventWhenCallingResetAdditionalTimestampNodeWit
auto event = zeUniquePtr(whiteboxCast(getHelper<L0GfxCoreHelper>().createEvent(eventPool.get(), &eventDesc, device)));
ASSERT_NE(event, nullptr);
EXPECT_EQ(nullptr, event->getEventAdditionalTimestampNode());
event->resetAdditionalTimestampNode(eventTagAllocator.getTag(), 1);
EXPECT_NE(nullptr, event->getEventAdditionalTimestampNode());
EXPECT_EQ(1u, event->additionalTimestampNode.size());
event->resetAdditionalTimestampNode(nullptr, 0);
EXPECT_EQ(nullptr, event->getEventAdditionalTimestampNode());
EXPECT_EQ(0u, event->additionalTimestampNode.size());
}