fix: counter based event overflow handling

Source: 3291d25bb4

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski
2025-12-10 13:54:32 +00:00
committed by Compute-Runtime-Automation
parent 33246200ae
commit 59ed8c0f5b
11 changed files with 165 additions and 77 deletions

View File

@@ -200,15 +200,15 @@ void CommandListCoreFamily<gfxCoreFamily>::handleInOrderCounterOverflow(bool cop
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(inOrderExecInfo, nullptr, inOrderExecInfo->getCounterValue() + 1, inOrderExecInfo->getAllocationOffset(), false, true, false, false,
isDualStreamCopyOffloadOperation(copyOffloadOperation));
inOrderExecInfo->resetCounterValue();
uint32_t newOffset = 0;
if (inOrderExecInfo->getAllocationOffset() == 0) {
// multitile immediate writes are uint64_t aligned
newOffset = alignUp(this->partitionCount * device->getL0GfxCoreHelper().getImmediateWritePostSyncOffset(), MemoryConstants::cacheLineSize * 4);
UNRECOVERABLE_IF(newOffset == 0);
}
inOrderExecInfo->setAllocationOffset(newOffset);
inOrderExecInfo->resetCounterValue();
inOrderExecInfo->initializeAllocationsFromHost();
CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(nullptr, copyOffloadOperation, false, false, false); // signal counter on new offset
@@ -2970,7 +2970,7 @@ bool CommandListCoreFamily<gfxCoreFamily>::handleInOrderImplicitDependencies(boo
}
if (hasInOrderDependencies()) {
if (inOrderExecInfo->isCounterAlreadyDone(inOrderExecInfo->getCounterValue())) {
if (inOrderExecInfo->isCounterAlreadyDone(inOrderExecInfo->getCounterValue(), inOrderExecInfo->getAllocationOffset())) {
this->latestOperationHasOptimizedCbEvent = false;
return false;
}
@@ -4697,7 +4697,7 @@ void CommandListCoreFamily<gfxCoreFamily>::patchInOrderCmds() {
}
template <GFXCORE_FAMILY gfxCoreFamily>
bool CommandListCoreFamily<gfxCoreFamily>::hasInOrderDependencies() const {
return (inOrderExecInfo.get() && inOrderExecInfo->getCounterValue() > 0);
return (inOrderExecInfo.get() && inOrderExecInfo->getCounterValue() > inOrderExecInfo->getInitialCounterValue());
}
template <GFXCORE_FAMILY gfxCoreFamily>

View File

@@ -1215,9 +1215,9 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::hostSynchronize(uint6
uint64_t inOrderSyncValue = this->inOrderExecInfo.get() ? inOrderExecInfo->getCounterValue() : 0;
if (inOrderWaitAllowed) {
if (inOrderWaitAllowed && !inOrderExecInfo->isCounterAlreadyDone(inOrderExecInfo->getCounterValue(), inOrderExecInfo->getAllocationOffset())) {
status = synchronizeInOrderExecution(timeout, (waitQueue == this->cmdQImmediateCopyOffload));
} else {
} else if (!inOrderWaitAllowed) {
const int64_t timeoutInMicroSeconds = timeout / 1000;
const auto indefinitelyPoll = timeout == std::numeric_limits<uint64_t>::max();
const auto waitStatus = waitCsr->waitForCompletionWithTimeout(NEO::WaitParams{indefinitelyPoll, !indefinitelyPoll, false, timeoutInMicroSeconds}, waitTaskCount);
@@ -1230,7 +1230,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::hostSynchronize(uint6
if (status != ZE_RESULT_NOT_READY) {
if (isInOrderExecutionEnabled()) {
inOrderExecInfo->setLastWaitedCounterValue(inOrderSyncValue);
inOrderExecInfo->setLastWaitedCounterValue(inOrderSyncValue, inOrderExecInfo->getAllocationOffset());
}
if (this->isTbxMode && (status == ZE_RESULT_SUCCESS)) {

View File

@@ -695,7 +695,7 @@ void Event::unsetInOrderExecInfo() {
void Event::resetInOrderTimestampNode(NEO::TagNodeBase *newNode, uint32_t partitionCount) {
if (inOrderIncrementValue == 0 || !newNode) {
for (auto &node : inOrderTimestampNode) {
inOrderExecInfo->pushTempTimestampNode(node, inOrderExecSignalValue);
inOrderExecInfo->pushTempTimestampNode(node, inOrderExecSignalValue, this->getInOrderAllocationOffset());
}
inOrderTimestampNode.clear();
@@ -720,7 +720,7 @@ void Event::resetAdditionalTimestampNode(NEO::TagNodeBase *newNode, uint32_t par
} else if (resetAggregatedEvent) {
// If we are resetting aggregated event, we need to clear all additional timestamp nodes
for (auto &node : additionalTimestampNode) {
inOrderExecInfo->pushTempTimestampNode(node, inOrderExecSignalValue);
inOrderExecInfo->pushTempTimestampNode(node, inOrderExecSignalValue, this->getInOrderAllocationOffset());
}
additionalTimestampNode.clear();
}
@@ -731,7 +731,7 @@ void Event::resetAdditionalTimestampNode(NEO::TagNodeBase *newNode, uint32_t par
for (auto &node : additionalTimestampNode) {
if (inOrderExecInfo) {
// Push to temp node vector and releaseNotUsedTempTimestampNodes will clear when needed
inOrderExecInfo->pushTempTimestampNode(node, inOrderExecSignalValue);
inOrderExecInfo->pushTempTimestampNode(node, inOrderExecSignalValue, this->getInOrderAllocationOffset());
} else {
node->returnTag();
}

View File

@@ -305,7 +305,7 @@ ze_result_t EventImp<TagSizeT>::queryCounterBasedEventStatus() {
auto waitValue = getInOrderExecSignalValueWithSubmissionCounter();
if (!inOrderExecInfo->isCounterAlreadyDone(waitValue)) {
if (!inOrderExecInfo->isCounterAlreadyDone(waitValue, this->getInOrderAllocationOffset())) {
bool signaled = true;
if (this->optimizedCbEvent) {
@@ -327,7 +327,7 @@ ze_result_t EventImp<TagSizeT>::queryCounterBasedEventStatus() {
if (!signaled) {
return ZE_RESULT_NOT_READY;
}
inOrderExecInfo->setLastWaitedCounterValue(waitValue);
inOrderExecInfo->setLastWaitedCounterValue(waitValue, this->getInOrderAllocationOffset());
}
handleSuccessfulHostSynchronization();
@@ -770,7 +770,7 @@ ze_result_t EventImp<TagSizeT>::hostSynchronize(uint64_t timeout) {
if (this->optimizedCbEvent) {
synchronizeTimestampCompletionWithTimeout();
if (this->isTimestampPopulated()) {
inOrderExecInfo->setLastWaitedCounterValue(getInOrderExecSignalValueWithSubmissionCounter());
inOrderExecInfo->setLastWaitedCounterValue(getInOrderExecSignalValueWithSubmissionCounter(), this->getInOrderAllocationOffset());
handleSuccessfulHostSynchronization();
ret = ZE_RESULT_SUCCESS;
this->optimizedCbEvent = false;

View File

@@ -328,25 +328,24 @@ HWTEST_F(InOrderCmdListTests, givenCounterBasedEventsWhenHostWaitsAreCalledThenL
EXPECT_EQ(ZE_RESULT_SUCCESS, status);
auto counterValue = events[1]->inOrderExecSignalValue;
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(counterValue));
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(events[0]->inOrderExecSignalValue));
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(counterValue + 1));
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(counterValue, 0));
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(events[0]->inOrderExecSignalValue, 0));
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(counterValue + 1, 0));
// setting lower counter ignored
inOrderExecInfo->setLastWaitedCounterValue(counterValue - 1);
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(counterValue));
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(events[0]->inOrderExecSignalValue));
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(counterValue + 1));
inOrderExecInfo->setLastWaitedCounterValue(counterValue - 1, 0);
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(counterValue, 0));
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(events[0]->inOrderExecSignalValue, 0));
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(counterValue + 1, 0));
status = events[0]->hostSynchronize(-1);
EXPECT_EQ(ZE_RESULT_SUCCESS, status);
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(counterValue));
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(counterValue + 1));
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(counterValue, 0));
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(counterValue + 1, 0));
// setting offset disables mechanism
inOrderExecInfo->setAllocationOffset(4u);
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(0u));
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(counterValue));
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(0u, 0));
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(counterValue, 0));
completeHostAddress<FamilyType::gfxCoreFamily, WhiteBox<L0::CommandListCoreFamilyImmediate<FamilyType::gfxCoreFamily>>>(immCmdList.get());
}
@@ -529,9 +528,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, InOrderCmdListTests, givenCounterBasedTimestampEven
cmdList->appendLaunchKernel(kernel->toHandle(), groupCount, event3->toHandle(), 0, nullptr, launchParams);
event3->hostEventSetValue(Event::STATE_CLEARED);
event1->getInOrderExecInfo()->setLastWaitedCounterValue(2);
event2->getInOrderExecInfo()->setLastWaitedCounterValue(2);
event3->getInOrderExecInfo()->setLastWaitedCounterValue(3);
event1->getInOrderExecInfo()->setLastWaitedCounterValue(2, 0);
event2->getInOrderExecInfo()->setLastWaitedCounterValue(2, 0);
event3->getInOrderExecInfo()->setLastWaitedCounterValue(3, 0);
EXPECT_EQ(ZE_RESULT_SUCCESS, event1->queryStatus());
EXPECT_EQ(ZE_RESULT_SUCCESS, event2->queryStatus());
@@ -1781,7 +1780,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, InOrderCmdListTests, givenImmediateCmdListWhenDispa
EXPECT_EQ(Event::CounterBasedMode::implicitlyEnabled, events[0]->counterBasedMode);
}
if (!events[0]->inOrderTimestampNode.empty()) {
copyOnlyCmdList->inOrderExecInfo->pushTempTimestampNode(events[0]->inOrderTimestampNode[0], events[0]->inOrderExecSignalValue);
copyOnlyCmdList->inOrderExecInfo->pushTempTimestampNode(events[0]->inOrderTimestampNode[0], events[0]->inOrderExecSignalValue, 0);
}
events[0]->inOrderTimestampNode.clear();
events[0]->makeCounterBasedInitiallyDisabled(eventPool->getAllocation());
@@ -5206,12 +5205,13 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, InOrderCmdListTests, givenInOrderModeWhenCallingSyn
ultCsr->forceReturnGpuHang = false;
forceFail = false;
callCounter = 0;
immCmdList->getInOrderExecInfo()->addCounterValue(1);
EXPECT_EQ(ZE_RESULT_SUCCESS, immCmdList->hostSynchronize(std::numeric_limits<uint64_t>::max(), false));
EXPECT_EQ(downloadedAlloc, expectedAlloc);
EXPECT_EQ(failCounter, callCounter);
EXPECT_EQ(failCounter - 1, ultCsr->checkGpuHangDetectedCalled);
EXPECT_EQ(1u, *hostAddress);
EXPECT_EQ(failCounter + 1, callCounter);
EXPECT_EQ(failCounter, ultCsr->checkGpuHangDetectedCalled);
EXPECT_EQ(2u, *hostAddress);
}
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams);
@@ -5299,6 +5299,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, InOrderCmdListTests, givenDebugFlagSetWhenCallingSy
// success
{
immCmdList->getInOrderExecInfo()->addCounterValue(1);
ultCsr->checkGpuHangDetectedCalled = 0;
ultCsr->forceReturnGpuHang = false;
forceFail = false;
@@ -5306,9 +5308,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, InOrderCmdListTests, givenDebugFlagSetWhenCallingSy
EXPECT_EQ(downloadedAlloc, hostAlloc);
EXPECT_EQ(ZE_RESULT_SUCCESS, immCmdList->hostSynchronize(std::numeric_limits<uint64_t>::max(), false));
EXPECT_EQ(failCounter, callCounter);
EXPECT_EQ(failCounter - 1, ultCsr->checkGpuHangDetectedCalled);
EXPECT_EQ(1u, *hostAddress);
EXPECT_EQ(failCounter + 1, callCounter);
EXPECT_EQ(failCounter, ultCsr->checkGpuHangDetectedCalled);
EXPECT_EQ(2u, *hostAddress);
}
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams);

View File

@@ -4948,12 +4948,12 @@ HWTEST2_F(EventMultiTileDynamicPacketUseTest, givenEventCounterBasedUsedCreatedO
event2->eventPoolAllocation = nullptr;
auto inOrderExecInfo0 = NEO::InOrderExecInfo::create(device->getDeviceInOrderCounterAllocator()->getTag(), nullptr, *device->getNEODevice(), 1, false);
inOrderExecInfo0->setLastWaitedCounterValue(1);
inOrderExecInfo0->setLastWaitedCounterValue(1, 0);
event0->updateInOrderExecState(inOrderExecInfo0, 1, 0);
uint64_t counter = 2;
auto inOrderExecInfo1 = NEO::InOrderExecInfo::createFromExternalAllocation(*device->getNEODevice(), nullptr, 0x1, nullptr, &counter, 1, 1, 1);
inOrderExecInfo1->setLastWaitedCounterValue(1);
inOrderExecInfo1->setLastWaitedCounterValue(1, 0);
event1->updateInOrderExecState(inOrderExecInfo1, 1, 0);
MockGraphicsAllocation mockAlloc(rootDeviceIndex, nullptr, 1);

View File

@@ -268,6 +268,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, ForceInOrderImmediateCmdListExecution, -1, "-1:
DECLARE_DEBUG_VARIABLE(int32_t, ForceInOrderEvents, -1, "-1: default, 0: disabled, 1: Enable all Events as in-order, to rely on command list counter value")
DECLARE_DEBUG_VARIABLE(int32_t, ForceCopyOperationOffloadForComputeCmdList, -1, "-1: default, 0: disabled, 1: Enabled for immediate in-order cmd lists, 2: Enabled for all types. If enabled, all compute cmdlist will try to offload copy operations to copy engine")
DECLARE_DEBUG_VARIABLE(int32_t, EnableImplicitConvertionToCounterBasedEvents, -1, "-1: default, 0: Disable, 1: Enable. If enabled, try to convert Regular Events used on Immediate CL to CounterBased")
DECLARE_DEBUG_VARIABLE(int64_t, InitialCounterBasedEventValue, -1, "-1: default, >=0: initial value set during counter creation")
DECLARE_DEBUG_VARIABLE(int32_t, ForceTlbFlush, -1, "-1: default, 0: Tlb flush disabled, 1: Tlb Flush enabled")
DECLARE_DEBUG_VARIABLE(int32_t, AllowDcFlush, -1, "-1: default, 0: DC flush disabled, 1: DC flush enabled")
DECLARE_DEBUG_VARIABLE(int32_t, DebugSetMemoryDiagnosticsDelay, -1, "-1: default, >=0: delay time in minutes necessary for completion of Memory diagnostics")

View File

@@ -73,7 +73,9 @@ InOrderExecInfo::InOrderExecInfo(TagNodeBase *deviceCounterNode, TagNodeBase *ho
deviceAddress = deviceCounterNode->getGpuAddress();
}
isTbx = device.getDefaultEngine().commandStreamReceiver->isTbxMode();
auto csr = device.getDefaultEngine().commandStreamReceiver;
isTbx = csr->isTbxMode();
immWritePostSyncWriteOffset = std::max(csr->getImmWritePostSyncWriteOffset(), static_cast<uint32_t>(sizeof(uint64_t)));
reset();
}
@@ -98,20 +100,28 @@ void InOrderExecInfo::uploadToTbx(TagNodeBase &node, size_t size) {
}
void InOrderExecInfo::initializeAllocationsFromHost() {
const uint64_t initialValue = getInitialCounterValue();
if (deviceCounterNode) {
const size_t deviceAllocationWriteSize = sizeof(uint64_t) * numDevicePartitionsToWait;
memset(ptrOffset(deviceCounterNode->getCpuBase(), allocationOffset), 0, deviceAllocationWriteSize);
for (uint32_t i = 0; i < numDevicePartitionsToWait; i++) {
uint64_t *ptr = reinterpret_cast<uint64_t *>(ptrOffset(deviceCounterNode->getCpuBase(), allocationOffset + (i * immWritePostSyncWriteOffset)));
*ptr = initialValue;
}
if (isTbx) {
const size_t deviceAllocationWriteSize = alignUp(sizeof(uint64_t), immWritePostSyncWriteOffset) * numDevicePartitionsToWait;
uploadToTbx(*deviceCounterNode, deviceAllocationWriteSize);
}
}
if (hostCounterNode) {
const size_t hostAllocationWriteSize = sizeof(uint64_t) * numHostPartitionsToWait;
memset(ptrOffset(hostCounterNode->getCpuBase(), allocationOffset), 0, hostAllocationWriteSize);
for (uint32_t i = 0; i < numHostPartitionsToWait; i++) {
uint64_t *ptr = reinterpret_cast<uint64_t *>(ptrOffset(hostCounterNode->getCpuBase(), allocationOffset + (i * immWritePostSyncWriteOffset)));
*ptr = initialValue;
}
if (isTbx) {
const size_t hostAllocationWriteSize = alignUp(sizeof(uint64_t), immWritePostSyncWriteOffset) * numHostPartitionsToWait;
uploadToTbx(*hostCounterNode, hostAllocationWriteSize);
}
}
@@ -125,6 +135,11 @@ void InOrderExecInfo::reset() {
initializeAllocationsFromHost();
}
void InOrderExecInfo::resetCounterValue() {
counterValue = getInitialCounterValue();
lastWaitedCounterValue[allocationOffset != 0].store(getInitialCounterValue());
}
NEO::GraphicsAllocation *InOrderExecInfo::getDeviceCounterAllocation() const {
if (externalDeviceAllocation) {
return externalDeviceAllocation;
@@ -143,19 +158,20 @@ uint64_t InOrderExecInfo::getBaseHostGpuAddress() const {
return hostCounterNode->getGpuAddress();
}
void InOrderExecInfo::pushTempTimestampNode(TagNodeBase *node, uint64_t value) {
void InOrderExecInfo::pushTempTimestampNode(TagNodeBase *node, uint64_t value, uint32_t allocationOffset) {
std::unique_lock<std::mutex> lock(mutex);
tempTimestampNodes.emplace_back(node, value);
tempTimestampNodes.emplace_back(node, std::make_pair(value, allocationOffset));
}
void InOrderExecInfo::releaseNotUsedTempTimestampNodes(bool forceReturn) {
std::unique_lock<std::mutex> lock(mutex);
std::vector<std::pair<TagNodeBase *, uint64_t>> tempVector;
std::vector<std::pair<TagNodeBase *, CounterAndOffsetPairT>> tempVector;
for (auto &node : tempTimestampNodes) {
if (forceReturn || lastWaitedCounterValue >= node.second) {
const auto &counterAndOffsetPair = node.second;
if (forceReturn || isCounterAlreadyDone(counterAndOffsetPair.first, counterAndOffsetPair.second)) {
node.first->returnTag();
} else {
tempVector.push_back(node);
@@ -179,4 +195,8 @@ uint64_t InOrderExecInfo::getDeviceNodeGpuAddress() const {
return 0;
}
uint64_t InOrderExecInfo::getInitialCounterValue() const {
return debugManager.flags.InitialCounterBasedEventValue.getIfNotDefault<uint64_t>(0);
}
} // namespace NEO

View File

@@ -8,6 +8,7 @@
#pragma once
#include "shared/source/helpers/common_types.h"
#include "shared/source/helpers/mt_helpers.h"
#include "shared/source/helpers/non_copyable_or_moveable.h"
#include "shared/source/helpers/ptr_math.h"
#include "shared/source/memory_manager/allocation_type.h"
@@ -82,7 +83,7 @@ class InOrderExecInfo : public NEO::NonCopyableClass {
uint64_t getCounterValue() const { return counterValue; }
void addCounterValue(uint64_t addValue) { counterValue += addValue; }
void resetCounterValue() { counterValue = 0; }
void resetCounterValue();
uint64_t getRegularCmdListSubmissionCounter() const { return regularCmdListSubmissionCounter; }
void addRegularCmdListSubmissionCounter(uint64_t addValue) { regularCmdListSubmissionCounter += addValue; }
@@ -100,23 +101,27 @@ class InOrderExecInfo : public NEO::NonCopyableClass {
void reset();
bool isExternalMemoryExecInfo() const { return deviceCounterNode == nullptr; }
void setLastWaitedCounterValue(uint64_t value) {
void setLastWaitedCounterValue(uint64_t value, uint32_t allocationOffset) {
if (!isExternalMemoryExecInfo()) {
lastWaitedCounterValue = std::max(value, lastWaitedCounterValue);
NEO::MultiThreadHelpers::interlockedMax(lastWaitedCounterValue[allocationOffset != 0], value);
}
}
bool isCounterAlreadyDone(uint64_t waitValue) const {
return lastWaitedCounterValue >= waitValue && this->allocationOffset == 0u;
bool isCounterAlreadyDone(uint64_t waitValue, uint32_t allocationOffset) const {
return lastWaitedCounterValue[allocationOffset != 0] >= waitValue;
}
NEO::GraphicsAllocation *getExternalHostAllocation() const { return externalHostAllocation; }
NEO::GraphicsAllocation *getExternalDeviceAllocation() const { return externalDeviceAllocation; }
void pushTempTimestampNode(TagNodeBase *node, uint64_t value);
void pushTempTimestampNode(TagNodeBase *node, uint64_t value, uint32_t allocationOffset);
void releaseNotUsedTempTimestampNodes(bool forceReturn);
uint64_t getInitialCounterValue() const;
protected:
using CounterAndOffsetPairT = std::pair<uint64_t, uint32_t>;
void uploadToTbx(TagNodeBase &node, size_t size);
NEO::Device &device;
@@ -124,12 +129,12 @@ class InOrderExecInfo : public NEO::NonCopyableClass {
NEO::TagNodeBase *hostCounterNode = nullptr;
NEO::GraphicsAllocation *externalHostAllocation = nullptr;
NEO::GraphicsAllocation *externalDeviceAllocation = nullptr;
std::vector<std::pair<NEO::TagNodeBase *, uint64_t>> tempTimestampNodes;
std::vector<std::pair<NEO::TagNodeBase *, CounterAndOffsetPairT>> tempTimestampNodes;
std::mutex mutex;
std::atomic<uint64_t> lastWaitedCounterValue[2] = {0, 0}; // [0] for offset == 0, [1] for offset != 0
uint64_t counterValue = 0;
uint64_t lastWaitedCounterValue = 0;
uint64_t regularCmdListSubmissionCounter = 0;
uint64_t deviceAddress = 0;
uint64_t *hostAddress = nullptr;
@@ -137,6 +142,7 @@ class InOrderExecInfo : public NEO::NonCopyableClass {
uint32_t numHostPartitionsToWait = 0;
uint32_t allocationOffset = 0;
uint32_t rootDeviceIndex = 0;
uint32_t immWritePostSyncWriteOffset = 0;
bool regularCmdList = false;
bool duplicatedHostStorage = false;
bool atomicDeviceSignalling = false;

View File

@@ -673,4 +673,5 @@ LimitIsaPrefetchSize = -1
EnableUsmAllocationPoolManager = -1
ForceTotalWMTPDataSize = -1
DirectSubmissionInitialSemaphoreValue = -1
InitialCounterBasedEventValue = -1
# Please don't edit below this line

View File

@@ -120,10 +120,10 @@ HWTEST_F(CommandEncoderTests, givenTsNodesWhenStoringOnTempListThenHandleOwnersh
{
MyMockInOrderExecInfo inOrderExecInfo(nullptr, nullptr, mockDevice, 1, false, false);
inOrderExecInfo.lastWaitedCounterValue = 0;
inOrderExecInfo.lastWaitedCounterValue[0] = 0;
inOrderExecInfo.pushTempTimestampNode(node0, 1);
inOrderExecInfo.pushTempTimestampNode(node1, 2);
inOrderExecInfo.pushTempTimestampNode(node0, 1, 0);
inOrderExecInfo.pushTempTimestampNode(node1, 2, 0);
EXPECT_EQ(2u, inOrderExecInfo.tempTimestampNodes.size());
@@ -133,7 +133,7 @@ HWTEST_F(CommandEncoderTests, givenTsNodesWhenStoringOnTempListThenHandleOwnersh
EXPECT_FALSE(tsAllocator.freeTags.peekContains(*node0));
EXPECT_FALSE(tsAllocator.freeTags.peekContains(*node1));
inOrderExecInfo.lastWaitedCounterValue = 1;
inOrderExecInfo.lastWaitedCounterValue[0] = 1;
inOrderExecInfo.releaseNotUsedTempTimestampNodes(false);
EXPECT_EQ(1u, inOrderExecInfo.tempTimestampNodes.size());
EXPECT_EQ(node1, inOrderExecInfo.tempTimestampNodes[0].first);
@@ -141,7 +141,7 @@ HWTEST_F(CommandEncoderTests, givenTsNodesWhenStoringOnTempListThenHandleOwnersh
EXPECT_TRUE(tsAllocator.freeTags.peekContains(*node0));
EXPECT_FALSE(tsAllocator.freeTags.peekContains(*node1));
inOrderExecInfo.lastWaitedCounterValue = 2;
inOrderExecInfo.lastWaitedCounterValue[0] = 2;
inOrderExecInfo.releaseNotUsedTempTimestampNodes(false);
EXPECT_EQ(0u, inOrderExecInfo.tempTimestampNodes.size());
EXPECT_TRUE(tsAllocator.freeTags.peekContains(*node0));
@@ -153,8 +153,8 @@ HWTEST_F(CommandEncoderTests, givenTsNodesWhenStoringOnTempListThenHandleOwnersh
EXPECT_FALSE(tsAllocator.freeTags.peekContains(*node0));
EXPECT_FALSE(tsAllocator.freeTags.peekContains(*node1));
inOrderExecInfo.pushTempTimestampNode(node0, 3);
inOrderExecInfo.pushTempTimestampNode(node1, 4);
inOrderExecInfo.pushTempTimestampNode(node0, 3, 0);
inOrderExecInfo.pushTempTimestampNode(node1, 4, 0);
}
// forced release on destruction
@@ -162,6 +162,64 @@ HWTEST_F(CommandEncoderTests, givenTsNodesWhenStoringOnTempListThenHandleOwnersh
EXPECT_TRUE(tsAllocator.freeTags.peekContains(*node1));
}
HWTEST_F(CommandEncoderTests, givenDebugFlagSetWhenHandlingTheCounterThenUseInitialValue) {
DebugManagerStateRestore restore;
constexpr uint64_t initialValue = 16;
debugManager.flags.InOrderDuplicatedCounterStorageEnabled.set(1);
debugManager.flags.InOrderAtomicSignallingEnabled.set(0);
debugManager.flags.InitialCounterBasedEventValue.set(static_cast<int64_t>(initialValue));
constexpr uint32_t partitionCount = 2u;
MockDevice mockDevice;
MockTagAllocator<DeviceAllocNodeType<true>> deviceTagAllocator(0, mockDevice.getMemoryManager());
MockTagAllocator<DeviceAllocNodeType<true>> hostTagAllocator(0, mockDevice.getMemoryManager());
const auto immWritePartitionOffset = ImplicitScalingDispatch<FamilyType>::getImmediateWritePostSyncOffset();
// initialize
auto deviceNode = deviceTagAllocator.getTag();
auto hostNode = hostTagAllocator.getTag();
auto devicePtrBase = reinterpret_cast<uint64_t *>(deviceNode->getCpuBase());
auto hostPtrBase = reinterpret_cast<uint64_t *>(hostNode->getCpuBase());
auto inOrderExecInfo = InOrderExecInfo::create(deviceNode, hostNode, mockDevice, partitionCount, false);
for (uint32_t i = 0; i < partitionCount; i++) {
auto devicePtr = ptrOffset(devicePtrBase, i * immWritePartitionOffset);
EXPECT_EQ(initialValue, *devicePtr);
auto hostPtr = ptrOffset(hostPtrBase, i * immWritePartitionOffset);
EXPECT_EQ(initialValue, *hostPtr);
}
// update
for (uint32_t i = 0; i < partitionCount; i++) {
auto devicePtr = ptrOffset(devicePtrBase, i * immWritePartitionOffset);
*devicePtr = initialValue + 10;
auto hostPtr = ptrOffset(hostPtrBase, i * immWritePartitionOffset);
*hostPtr = initialValue + 20;
}
inOrderExecInfo->setLastWaitedCounterValue(initialValue + 5, 0);
// reset
inOrderExecInfo->reset();
for (uint32_t i = 0; i < partitionCount; i++) {
auto devicePtr = ptrOffset(devicePtrBase, i * immWritePartitionOffset);
EXPECT_EQ(initialValue, *devicePtr);
auto hostPtr = ptrOffset(hostPtrBase, i * immWritePartitionOffset);
EXPECT_EQ(initialValue, *hostPtr);
}
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(initialValue + 1, 0));
}
HWTEST_F(CommandEncoderTests, givenDifferentInputParamsWhenCreatingInOrderExecInfoThenSetupCorrectly) {
MockDevice mockDevice;
@@ -296,29 +354,29 @@ HWTEST_F(CommandEncoderTests, givenInOrderExecutionInfoWhenSetLastCounterValueIs
auto node = tagAllocator.getTag();
auto inOrderExecInfo = std::make_unique<InOrderExecInfo>(node, nullptr, mockDevice, 2, true, false);
inOrderExecInfo->setLastWaitedCounterValue(1u);
inOrderExecInfo->setLastWaitedCounterValue(1u, 0);
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(2u));
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(1u));
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(0u));
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(2u, 0));
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(1u, 0));
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(0u, 0));
inOrderExecInfo->setLastWaitedCounterValue(0u);
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(2u));
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(1u));
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(0u));
inOrderExecInfo->setLastWaitedCounterValue(0u, 0);
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(2u, 0));
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(1u, 0));
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(0u, 0));
inOrderExecInfo->setLastWaitedCounterValue(3u);
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(2u));
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(3u));
inOrderExecInfo->setLastWaitedCounterValue(3u, 0);
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(2u, 0));
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(3u, 0));
inOrderExecInfo->setAllocationOffset(4u);
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(2u));
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(3u));
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(0u));
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(2u, 4));
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(3u, 4));
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(0u, 4));
inOrderExecInfo = std::make_unique<InOrderExecInfo>(nullptr, nullptr, mockDevice, 2, true, false);
inOrderExecInfo->setLastWaitedCounterValue(2);
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(1));
inOrderExecInfo->setLastWaitedCounterValue(2, 0);
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(1, 0));
}
HWTEST_F(CommandEncoderTests, givenInOrderExecutionInfoWhenResetCalledThenUploadToTbx) {