mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-18 22:08:53 +08:00
fix: counter based event overflow handling
Source: 3291d25bb4
Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
33246200ae
commit
59ed8c0f5b
@@ -200,15 +200,15 @@ void CommandListCoreFamily<gfxCoreFamily>::handleInOrderCounterOverflow(bool cop
|
||||
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(inOrderExecInfo, nullptr, inOrderExecInfo->getCounterValue() + 1, inOrderExecInfo->getAllocationOffset(), false, true, false, false,
|
||||
isDualStreamCopyOffloadOperation(copyOffloadOperation));
|
||||
|
||||
inOrderExecInfo->resetCounterValue();
|
||||
|
||||
uint32_t newOffset = 0;
|
||||
if (inOrderExecInfo->getAllocationOffset() == 0) {
|
||||
// multitile immediate writes are uint64_t aligned
|
||||
newOffset = alignUp(this->partitionCount * device->getL0GfxCoreHelper().getImmediateWritePostSyncOffset(), MemoryConstants::cacheLineSize * 4);
|
||||
UNRECOVERABLE_IF(newOffset == 0);
|
||||
}
|
||||
|
||||
inOrderExecInfo->setAllocationOffset(newOffset);
|
||||
inOrderExecInfo->resetCounterValue();
|
||||
inOrderExecInfo->initializeAllocationsFromHost();
|
||||
|
||||
CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(nullptr, copyOffloadOperation, false, false, false); // signal counter on new offset
|
||||
@@ -2970,7 +2970,7 @@ bool CommandListCoreFamily<gfxCoreFamily>::handleInOrderImplicitDependencies(boo
|
||||
}
|
||||
|
||||
if (hasInOrderDependencies()) {
|
||||
if (inOrderExecInfo->isCounterAlreadyDone(inOrderExecInfo->getCounterValue())) {
|
||||
if (inOrderExecInfo->isCounterAlreadyDone(inOrderExecInfo->getCounterValue(), inOrderExecInfo->getAllocationOffset())) {
|
||||
this->latestOperationHasOptimizedCbEvent = false;
|
||||
return false;
|
||||
}
|
||||
@@ -4697,7 +4697,7 @@ void CommandListCoreFamily<gfxCoreFamily>::patchInOrderCmds() {
|
||||
}
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
bool CommandListCoreFamily<gfxCoreFamily>::hasInOrderDependencies() const {
|
||||
return (inOrderExecInfo.get() && inOrderExecInfo->getCounterValue() > 0);
|
||||
return (inOrderExecInfo.get() && inOrderExecInfo->getCounterValue() > inOrderExecInfo->getInitialCounterValue());
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
|
||||
@@ -1215,9 +1215,9 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::hostSynchronize(uint6
|
||||
|
||||
uint64_t inOrderSyncValue = this->inOrderExecInfo.get() ? inOrderExecInfo->getCounterValue() : 0;
|
||||
|
||||
if (inOrderWaitAllowed) {
|
||||
if (inOrderWaitAllowed && !inOrderExecInfo->isCounterAlreadyDone(inOrderExecInfo->getCounterValue(), inOrderExecInfo->getAllocationOffset())) {
|
||||
status = synchronizeInOrderExecution(timeout, (waitQueue == this->cmdQImmediateCopyOffload));
|
||||
} else {
|
||||
} else if (!inOrderWaitAllowed) {
|
||||
const int64_t timeoutInMicroSeconds = timeout / 1000;
|
||||
const auto indefinitelyPoll = timeout == std::numeric_limits<uint64_t>::max();
|
||||
const auto waitStatus = waitCsr->waitForCompletionWithTimeout(NEO::WaitParams{indefinitelyPoll, !indefinitelyPoll, false, timeoutInMicroSeconds}, waitTaskCount);
|
||||
@@ -1230,7 +1230,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::hostSynchronize(uint6
|
||||
|
||||
if (status != ZE_RESULT_NOT_READY) {
|
||||
if (isInOrderExecutionEnabled()) {
|
||||
inOrderExecInfo->setLastWaitedCounterValue(inOrderSyncValue);
|
||||
inOrderExecInfo->setLastWaitedCounterValue(inOrderSyncValue, inOrderExecInfo->getAllocationOffset());
|
||||
}
|
||||
|
||||
if (this->isTbxMode && (status == ZE_RESULT_SUCCESS)) {
|
||||
|
||||
@@ -695,7 +695,7 @@ void Event::unsetInOrderExecInfo() {
|
||||
void Event::resetInOrderTimestampNode(NEO::TagNodeBase *newNode, uint32_t partitionCount) {
|
||||
if (inOrderIncrementValue == 0 || !newNode) {
|
||||
for (auto &node : inOrderTimestampNode) {
|
||||
inOrderExecInfo->pushTempTimestampNode(node, inOrderExecSignalValue);
|
||||
inOrderExecInfo->pushTempTimestampNode(node, inOrderExecSignalValue, this->getInOrderAllocationOffset());
|
||||
}
|
||||
|
||||
inOrderTimestampNode.clear();
|
||||
@@ -720,7 +720,7 @@ void Event::resetAdditionalTimestampNode(NEO::TagNodeBase *newNode, uint32_t par
|
||||
} else if (resetAggregatedEvent) {
|
||||
// If we are resetting aggregated event, we need to clear all additional timestamp nodes
|
||||
for (auto &node : additionalTimestampNode) {
|
||||
inOrderExecInfo->pushTempTimestampNode(node, inOrderExecSignalValue);
|
||||
inOrderExecInfo->pushTempTimestampNode(node, inOrderExecSignalValue, this->getInOrderAllocationOffset());
|
||||
}
|
||||
additionalTimestampNode.clear();
|
||||
}
|
||||
@@ -731,7 +731,7 @@ void Event::resetAdditionalTimestampNode(NEO::TagNodeBase *newNode, uint32_t par
|
||||
for (auto &node : additionalTimestampNode) {
|
||||
if (inOrderExecInfo) {
|
||||
// Push to temp node vector and releaseNotUsedTempTimestampNodes will clear when needed
|
||||
inOrderExecInfo->pushTempTimestampNode(node, inOrderExecSignalValue);
|
||||
inOrderExecInfo->pushTempTimestampNode(node, inOrderExecSignalValue, this->getInOrderAllocationOffset());
|
||||
} else {
|
||||
node->returnTag();
|
||||
}
|
||||
|
||||
@@ -305,7 +305,7 @@ ze_result_t EventImp<TagSizeT>::queryCounterBasedEventStatus() {
|
||||
|
||||
auto waitValue = getInOrderExecSignalValueWithSubmissionCounter();
|
||||
|
||||
if (!inOrderExecInfo->isCounterAlreadyDone(waitValue)) {
|
||||
if (!inOrderExecInfo->isCounterAlreadyDone(waitValue, this->getInOrderAllocationOffset())) {
|
||||
bool signaled = true;
|
||||
|
||||
if (this->optimizedCbEvent) {
|
||||
@@ -327,7 +327,7 @@ ze_result_t EventImp<TagSizeT>::queryCounterBasedEventStatus() {
|
||||
if (!signaled) {
|
||||
return ZE_RESULT_NOT_READY;
|
||||
}
|
||||
inOrderExecInfo->setLastWaitedCounterValue(waitValue);
|
||||
inOrderExecInfo->setLastWaitedCounterValue(waitValue, this->getInOrderAllocationOffset());
|
||||
}
|
||||
|
||||
handleSuccessfulHostSynchronization();
|
||||
@@ -770,7 +770,7 @@ ze_result_t EventImp<TagSizeT>::hostSynchronize(uint64_t timeout) {
|
||||
if (this->optimizedCbEvent) {
|
||||
synchronizeTimestampCompletionWithTimeout();
|
||||
if (this->isTimestampPopulated()) {
|
||||
inOrderExecInfo->setLastWaitedCounterValue(getInOrderExecSignalValueWithSubmissionCounter());
|
||||
inOrderExecInfo->setLastWaitedCounterValue(getInOrderExecSignalValueWithSubmissionCounter(), this->getInOrderAllocationOffset());
|
||||
handleSuccessfulHostSynchronization();
|
||||
ret = ZE_RESULT_SUCCESS;
|
||||
this->optimizedCbEvent = false;
|
||||
|
||||
@@ -328,25 +328,24 @@ HWTEST_F(InOrderCmdListTests, givenCounterBasedEventsWhenHostWaitsAreCalledThenL
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, status);
|
||||
|
||||
auto counterValue = events[1]->inOrderExecSignalValue;
|
||||
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(counterValue));
|
||||
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(events[0]->inOrderExecSignalValue));
|
||||
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(counterValue + 1));
|
||||
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(counterValue, 0));
|
||||
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(events[0]->inOrderExecSignalValue, 0));
|
||||
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(counterValue + 1, 0));
|
||||
|
||||
// setting lower counter ignored
|
||||
inOrderExecInfo->setLastWaitedCounterValue(counterValue - 1);
|
||||
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(counterValue));
|
||||
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(events[0]->inOrderExecSignalValue));
|
||||
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(counterValue + 1));
|
||||
inOrderExecInfo->setLastWaitedCounterValue(counterValue - 1, 0);
|
||||
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(counterValue, 0));
|
||||
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(events[0]->inOrderExecSignalValue, 0));
|
||||
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(counterValue + 1, 0));
|
||||
|
||||
status = events[0]->hostSynchronize(-1);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, status);
|
||||
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(counterValue));
|
||||
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(counterValue + 1));
|
||||
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(counterValue, 0));
|
||||
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(counterValue + 1, 0));
|
||||
|
||||
// setting offset disables mechanism
|
||||
inOrderExecInfo->setAllocationOffset(4u);
|
||||
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(0u));
|
||||
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(counterValue));
|
||||
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(0u, 0));
|
||||
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(counterValue, 0));
|
||||
|
||||
completeHostAddress<FamilyType::gfxCoreFamily, WhiteBox<L0::CommandListCoreFamilyImmediate<FamilyType::gfxCoreFamily>>>(immCmdList.get());
|
||||
}
|
||||
@@ -529,9 +528,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, InOrderCmdListTests, givenCounterBasedTimestampEven
|
||||
cmdList->appendLaunchKernel(kernel->toHandle(), groupCount, event3->toHandle(), 0, nullptr, launchParams);
|
||||
event3->hostEventSetValue(Event::STATE_CLEARED);
|
||||
|
||||
event1->getInOrderExecInfo()->setLastWaitedCounterValue(2);
|
||||
event2->getInOrderExecInfo()->setLastWaitedCounterValue(2);
|
||||
event3->getInOrderExecInfo()->setLastWaitedCounterValue(3);
|
||||
event1->getInOrderExecInfo()->setLastWaitedCounterValue(2, 0);
|
||||
event2->getInOrderExecInfo()->setLastWaitedCounterValue(2, 0);
|
||||
event3->getInOrderExecInfo()->setLastWaitedCounterValue(3, 0);
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, event1->queryStatus());
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, event2->queryStatus());
|
||||
@@ -1781,7 +1780,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, InOrderCmdListTests, givenImmediateCmdListWhenDispa
|
||||
EXPECT_EQ(Event::CounterBasedMode::implicitlyEnabled, events[0]->counterBasedMode);
|
||||
}
|
||||
if (!events[0]->inOrderTimestampNode.empty()) {
|
||||
copyOnlyCmdList->inOrderExecInfo->pushTempTimestampNode(events[0]->inOrderTimestampNode[0], events[0]->inOrderExecSignalValue);
|
||||
copyOnlyCmdList->inOrderExecInfo->pushTempTimestampNode(events[0]->inOrderTimestampNode[0], events[0]->inOrderExecSignalValue, 0);
|
||||
}
|
||||
events[0]->inOrderTimestampNode.clear();
|
||||
events[0]->makeCounterBasedInitiallyDisabled(eventPool->getAllocation());
|
||||
@@ -5206,12 +5205,13 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, InOrderCmdListTests, givenInOrderModeWhenCallingSyn
|
||||
ultCsr->forceReturnGpuHang = false;
|
||||
forceFail = false;
|
||||
callCounter = 0;
|
||||
immCmdList->getInOrderExecInfo()->addCounterValue(1);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, immCmdList->hostSynchronize(std::numeric_limits<uint64_t>::max(), false));
|
||||
EXPECT_EQ(downloadedAlloc, expectedAlloc);
|
||||
|
||||
EXPECT_EQ(failCounter, callCounter);
|
||||
EXPECT_EQ(failCounter - 1, ultCsr->checkGpuHangDetectedCalled);
|
||||
EXPECT_EQ(1u, *hostAddress);
|
||||
EXPECT_EQ(failCounter + 1, callCounter);
|
||||
EXPECT_EQ(failCounter, ultCsr->checkGpuHangDetectedCalled);
|
||||
EXPECT_EQ(2u, *hostAddress);
|
||||
}
|
||||
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams);
|
||||
@@ -5299,6 +5299,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, InOrderCmdListTests, givenDebugFlagSetWhenCallingSy
|
||||
|
||||
// success
|
||||
{
|
||||
immCmdList->getInOrderExecInfo()->addCounterValue(1);
|
||||
|
||||
ultCsr->checkGpuHangDetectedCalled = 0;
|
||||
ultCsr->forceReturnGpuHang = false;
|
||||
forceFail = false;
|
||||
@@ -5306,9 +5308,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, InOrderCmdListTests, givenDebugFlagSetWhenCallingSy
|
||||
EXPECT_EQ(downloadedAlloc, hostAlloc);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, immCmdList->hostSynchronize(std::numeric_limits<uint64_t>::max(), false));
|
||||
|
||||
EXPECT_EQ(failCounter, callCounter);
|
||||
EXPECT_EQ(failCounter - 1, ultCsr->checkGpuHangDetectedCalled);
|
||||
EXPECT_EQ(1u, *hostAddress);
|
||||
EXPECT_EQ(failCounter + 1, callCounter);
|
||||
EXPECT_EQ(failCounter, ultCsr->checkGpuHangDetectedCalled);
|
||||
EXPECT_EQ(2u, *hostAddress);
|
||||
}
|
||||
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams);
|
||||
|
||||
@@ -4948,12 +4948,12 @@ HWTEST2_F(EventMultiTileDynamicPacketUseTest, givenEventCounterBasedUsedCreatedO
|
||||
event2->eventPoolAllocation = nullptr;
|
||||
|
||||
auto inOrderExecInfo0 = NEO::InOrderExecInfo::create(device->getDeviceInOrderCounterAllocator()->getTag(), nullptr, *device->getNEODevice(), 1, false);
|
||||
inOrderExecInfo0->setLastWaitedCounterValue(1);
|
||||
inOrderExecInfo0->setLastWaitedCounterValue(1, 0);
|
||||
event0->updateInOrderExecState(inOrderExecInfo0, 1, 0);
|
||||
|
||||
uint64_t counter = 2;
|
||||
auto inOrderExecInfo1 = NEO::InOrderExecInfo::createFromExternalAllocation(*device->getNEODevice(), nullptr, 0x1, nullptr, &counter, 1, 1, 1);
|
||||
inOrderExecInfo1->setLastWaitedCounterValue(1);
|
||||
inOrderExecInfo1->setLastWaitedCounterValue(1, 0);
|
||||
event1->updateInOrderExecState(inOrderExecInfo1, 1, 0);
|
||||
|
||||
MockGraphicsAllocation mockAlloc(rootDeviceIndex, nullptr, 1);
|
||||
|
||||
@@ -268,6 +268,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, ForceInOrderImmediateCmdListExecution, -1, "-1:
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, ForceInOrderEvents, -1, "-1: default, 0: disabled, 1: Enable all Events as in-order, to rely on command list counter value")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, ForceCopyOperationOffloadForComputeCmdList, -1, "-1: default, 0: disabled, 1: Enabled for immediate in-order cmd lists, 2: Enabled for all types. If enabled, all compute cmdlist will try to offload copy operations to copy engine")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableImplicitConvertionToCounterBasedEvents, -1, "-1: default, 0: Disable, 1: Enable. If enabled, try to convert Regular Events used on Immediate CL to CounterBased")
|
||||
DECLARE_DEBUG_VARIABLE(int64_t, InitialCounterBasedEventValue, -1, "-1: default, >=0: initial value set during counter creation")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, ForceTlbFlush, -1, "-1: default, 0: Tlb flush disabled, 1: Tlb Flush enabled")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, AllowDcFlush, -1, "-1: default, 0: DC flush disabled, 1: DC flush enabled")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DebugSetMemoryDiagnosticsDelay, -1, "-1: default, >=0: delay time in minutes necessary for completion of Memory diagnostics")
|
||||
|
||||
@@ -73,7 +73,9 @@ InOrderExecInfo::InOrderExecInfo(TagNodeBase *deviceCounterNode, TagNodeBase *ho
|
||||
deviceAddress = deviceCounterNode->getGpuAddress();
|
||||
}
|
||||
|
||||
isTbx = device.getDefaultEngine().commandStreamReceiver->isTbxMode();
|
||||
auto csr = device.getDefaultEngine().commandStreamReceiver;
|
||||
isTbx = csr->isTbxMode();
|
||||
immWritePostSyncWriteOffset = std::max(csr->getImmWritePostSyncWriteOffset(), static_cast<uint32_t>(sizeof(uint64_t)));
|
||||
|
||||
reset();
|
||||
}
|
||||
@@ -98,20 +100,28 @@ void InOrderExecInfo::uploadToTbx(TagNodeBase &node, size_t size) {
|
||||
}
|
||||
|
||||
void InOrderExecInfo::initializeAllocationsFromHost() {
|
||||
const uint64_t initialValue = getInitialCounterValue();
|
||||
|
||||
if (deviceCounterNode) {
|
||||
const size_t deviceAllocationWriteSize = sizeof(uint64_t) * numDevicePartitionsToWait;
|
||||
memset(ptrOffset(deviceCounterNode->getCpuBase(), allocationOffset), 0, deviceAllocationWriteSize);
|
||||
for (uint32_t i = 0; i < numDevicePartitionsToWait; i++) {
|
||||
uint64_t *ptr = reinterpret_cast<uint64_t *>(ptrOffset(deviceCounterNode->getCpuBase(), allocationOffset + (i * immWritePostSyncWriteOffset)));
|
||||
*ptr = initialValue;
|
||||
}
|
||||
|
||||
if (isTbx) {
|
||||
const size_t deviceAllocationWriteSize = alignUp(sizeof(uint64_t), immWritePostSyncWriteOffset) * numDevicePartitionsToWait;
|
||||
uploadToTbx(*deviceCounterNode, deviceAllocationWriteSize);
|
||||
}
|
||||
}
|
||||
|
||||
if (hostCounterNode) {
|
||||
const size_t hostAllocationWriteSize = sizeof(uint64_t) * numHostPartitionsToWait;
|
||||
memset(ptrOffset(hostCounterNode->getCpuBase(), allocationOffset), 0, hostAllocationWriteSize);
|
||||
for (uint32_t i = 0; i < numHostPartitionsToWait; i++) {
|
||||
uint64_t *ptr = reinterpret_cast<uint64_t *>(ptrOffset(hostCounterNode->getCpuBase(), allocationOffset + (i * immWritePostSyncWriteOffset)));
|
||||
*ptr = initialValue;
|
||||
}
|
||||
|
||||
if (isTbx) {
|
||||
const size_t hostAllocationWriteSize = alignUp(sizeof(uint64_t), immWritePostSyncWriteOffset) * numHostPartitionsToWait;
|
||||
uploadToTbx(*hostCounterNode, hostAllocationWriteSize);
|
||||
}
|
||||
}
|
||||
@@ -125,6 +135,11 @@ void InOrderExecInfo::reset() {
|
||||
initializeAllocationsFromHost();
|
||||
}
|
||||
|
||||
void InOrderExecInfo::resetCounterValue() {
|
||||
counterValue = getInitialCounterValue();
|
||||
lastWaitedCounterValue[allocationOffset != 0].store(getInitialCounterValue());
|
||||
}
|
||||
|
||||
NEO::GraphicsAllocation *InOrderExecInfo::getDeviceCounterAllocation() const {
|
||||
if (externalDeviceAllocation) {
|
||||
return externalDeviceAllocation;
|
||||
@@ -143,19 +158,20 @@ uint64_t InOrderExecInfo::getBaseHostGpuAddress() const {
|
||||
return hostCounterNode->getGpuAddress();
|
||||
}
|
||||
|
||||
void InOrderExecInfo::pushTempTimestampNode(TagNodeBase *node, uint64_t value) {
|
||||
void InOrderExecInfo::pushTempTimestampNode(TagNodeBase *node, uint64_t value, uint32_t allocationOffset) {
|
||||
std::unique_lock<std::mutex> lock(mutex);
|
||||
|
||||
tempTimestampNodes.emplace_back(node, value);
|
||||
tempTimestampNodes.emplace_back(node, std::make_pair(value, allocationOffset));
|
||||
}
|
||||
|
||||
void InOrderExecInfo::releaseNotUsedTempTimestampNodes(bool forceReturn) {
|
||||
std::unique_lock<std::mutex> lock(mutex);
|
||||
|
||||
std::vector<std::pair<TagNodeBase *, uint64_t>> tempVector;
|
||||
std::vector<std::pair<TagNodeBase *, CounterAndOffsetPairT>> tempVector;
|
||||
|
||||
for (auto &node : tempTimestampNodes) {
|
||||
if (forceReturn || lastWaitedCounterValue >= node.second) {
|
||||
const auto &counterAndOffsetPair = node.second;
|
||||
if (forceReturn || isCounterAlreadyDone(counterAndOffsetPair.first, counterAndOffsetPair.second)) {
|
||||
node.first->returnTag();
|
||||
} else {
|
||||
tempVector.push_back(node);
|
||||
@@ -179,4 +195,8 @@ uint64_t InOrderExecInfo::getDeviceNodeGpuAddress() const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint64_t InOrderExecInfo::getInitialCounterValue() const {
|
||||
return debugManager.flags.InitialCounterBasedEventValue.getIfNotDefault<uint64_t>(0);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "shared/source/helpers/common_types.h"
|
||||
#include "shared/source/helpers/mt_helpers.h"
|
||||
#include "shared/source/helpers/non_copyable_or_moveable.h"
|
||||
#include "shared/source/helpers/ptr_math.h"
|
||||
#include "shared/source/memory_manager/allocation_type.h"
|
||||
@@ -82,7 +83,7 @@ class InOrderExecInfo : public NEO::NonCopyableClass {
|
||||
|
||||
uint64_t getCounterValue() const { return counterValue; }
|
||||
void addCounterValue(uint64_t addValue) { counterValue += addValue; }
|
||||
void resetCounterValue() { counterValue = 0; }
|
||||
void resetCounterValue();
|
||||
|
||||
uint64_t getRegularCmdListSubmissionCounter() const { return regularCmdListSubmissionCounter; }
|
||||
void addRegularCmdListSubmissionCounter(uint64_t addValue) { regularCmdListSubmissionCounter += addValue; }
|
||||
@@ -100,23 +101,27 @@ class InOrderExecInfo : public NEO::NonCopyableClass {
|
||||
|
||||
void reset();
|
||||
bool isExternalMemoryExecInfo() const { return deviceCounterNode == nullptr; }
|
||||
void setLastWaitedCounterValue(uint64_t value) {
|
||||
void setLastWaitedCounterValue(uint64_t value, uint32_t allocationOffset) {
|
||||
if (!isExternalMemoryExecInfo()) {
|
||||
lastWaitedCounterValue = std::max(value, lastWaitedCounterValue);
|
||||
NEO::MultiThreadHelpers::interlockedMax(lastWaitedCounterValue[allocationOffset != 0], value);
|
||||
}
|
||||
}
|
||||
|
||||
bool isCounterAlreadyDone(uint64_t waitValue) const {
|
||||
return lastWaitedCounterValue >= waitValue && this->allocationOffset == 0u;
|
||||
bool isCounterAlreadyDone(uint64_t waitValue, uint32_t allocationOffset) const {
|
||||
return lastWaitedCounterValue[allocationOffset != 0] >= waitValue;
|
||||
}
|
||||
|
||||
NEO::GraphicsAllocation *getExternalHostAllocation() const { return externalHostAllocation; }
|
||||
NEO::GraphicsAllocation *getExternalDeviceAllocation() const { return externalDeviceAllocation; }
|
||||
|
||||
void pushTempTimestampNode(TagNodeBase *node, uint64_t value);
|
||||
void pushTempTimestampNode(TagNodeBase *node, uint64_t value, uint32_t allocationOffset);
|
||||
void releaseNotUsedTempTimestampNodes(bool forceReturn);
|
||||
|
||||
uint64_t getInitialCounterValue() const;
|
||||
|
||||
protected:
|
||||
using CounterAndOffsetPairT = std::pair<uint64_t, uint32_t>;
|
||||
|
||||
void uploadToTbx(TagNodeBase &node, size_t size);
|
||||
|
||||
NEO::Device &device;
|
||||
@@ -124,12 +129,12 @@ class InOrderExecInfo : public NEO::NonCopyableClass {
|
||||
NEO::TagNodeBase *hostCounterNode = nullptr;
|
||||
NEO::GraphicsAllocation *externalHostAllocation = nullptr;
|
||||
NEO::GraphicsAllocation *externalDeviceAllocation = nullptr;
|
||||
std::vector<std::pair<NEO::TagNodeBase *, uint64_t>> tempTimestampNodes;
|
||||
std::vector<std::pair<NEO::TagNodeBase *, CounterAndOffsetPairT>> tempTimestampNodes;
|
||||
|
||||
std::mutex mutex;
|
||||
std::atomic<uint64_t> lastWaitedCounterValue[2] = {0, 0}; // [0] for offset == 0, [1] for offset != 0
|
||||
|
||||
uint64_t counterValue = 0;
|
||||
uint64_t lastWaitedCounterValue = 0;
|
||||
uint64_t regularCmdListSubmissionCounter = 0;
|
||||
uint64_t deviceAddress = 0;
|
||||
uint64_t *hostAddress = nullptr;
|
||||
@@ -137,6 +142,7 @@ class InOrderExecInfo : public NEO::NonCopyableClass {
|
||||
uint32_t numHostPartitionsToWait = 0;
|
||||
uint32_t allocationOffset = 0;
|
||||
uint32_t rootDeviceIndex = 0;
|
||||
uint32_t immWritePostSyncWriteOffset = 0;
|
||||
bool regularCmdList = false;
|
||||
bool duplicatedHostStorage = false;
|
||||
bool atomicDeviceSignalling = false;
|
||||
|
||||
@@ -673,4 +673,5 @@ LimitIsaPrefetchSize = -1
|
||||
EnableUsmAllocationPoolManager = -1
|
||||
ForceTotalWMTPDataSize = -1
|
||||
DirectSubmissionInitialSemaphoreValue = -1
|
||||
InitialCounterBasedEventValue = -1
|
||||
# Please don't edit below this line
|
||||
|
||||
@@ -120,10 +120,10 @@ HWTEST_F(CommandEncoderTests, givenTsNodesWhenStoringOnTempListThenHandleOwnersh
|
||||
{
|
||||
MyMockInOrderExecInfo inOrderExecInfo(nullptr, nullptr, mockDevice, 1, false, false);
|
||||
|
||||
inOrderExecInfo.lastWaitedCounterValue = 0;
|
||||
inOrderExecInfo.lastWaitedCounterValue[0] = 0;
|
||||
|
||||
inOrderExecInfo.pushTempTimestampNode(node0, 1);
|
||||
inOrderExecInfo.pushTempTimestampNode(node1, 2);
|
||||
inOrderExecInfo.pushTempTimestampNode(node0, 1, 0);
|
||||
inOrderExecInfo.pushTempTimestampNode(node1, 2, 0);
|
||||
|
||||
EXPECT_EQ(2u, inOrderExecInfo.tempTimestampNodes.size());
|
||||
|
||||
@@ -133,7 +133,7 @@ HWTEST_F(CommandEncoderTests, givenTsNodesWhenStoringOnTempListThenHandleOwnersh
|
||||
EXPECT_FALSE(tsAllocator.freeTags.peekContains(*node0));
|
||||
EXPECT_FALSE(tsAllocator.freeTags.peekContains(*node1));
|
||||
|
||||
inOrderExecInfo.lastWaitedCounterValue = 1;
|
||||
inOrderExecInfo.lastWaitedCounterValue[0] = 1;
|
||||
inOrderExecInfo.releaseNotUsedTempTimestampNodes(false);
|
||||
EXPECT_EQ(1u, inOrderExecInfo.tempTimestampNodes.size());
|
||||
EXPECT_EQ(node1, inOrderExecInfo.tempTimestampNodes[0].first);
|
||||
@@ -141,7 +141,7 @@ HWTEST_F(CommandEncoderTests, givenTsNodesWhenStoringOnTempListThenHandleOwnersh
|
||||
EXPECT_TRUE(tsAllocator.freeTags.peekContains(*node0));
|
||||
EXPECT_FALSE(tsAllocator.freeTags.peekContains(*node1));
|
||||
|
||||
inOrderExecInfo.lastWaitedCounterValue = 2;
|
||||
inOrderExecInfo.lastWaitedCounterValue[0] = 2;
|
||||
inOrderExecInfo.releaseNotUsedTempTimestampNodes(false);
|
||||
EXPECT_EQ(0u, inOrderExecInfo.tempTimestampNodes.size());
|
||||
EXPECT_TRUE(tsAllocator.freeTags.peekContains(*node0));
|
||||
@@ -153,8 +153,8 @@ HWTEST_F(CommandEncoderTests, givenTsNodesWhenStoringOnTempListThenHandleOwnersh
|
||||
EXPECT_FALSE(tsAllocator.freeTags.peekContains(*node0));
|
||||
EXPECT_FALSE(tsAllocator.freeTags.peekContains(*node1));
|
||||
|
||||
inOrderExecInfo.pushTempTimestampNode(node0, 3);
|
||||
inOrderExecInfo.pushTempTimestampNode(node1, 4);
|
||||
inOrderExecInfo.pushTempTimestampNode(node0, 3, 0);
|
||||
inOrderExecInfo.pushTempTimestampNode(node1, 4, 0);
|
||||
}
|
||||
|
||||
// forced release on destruction
|
||||
@@ -162,6 +162,64 @@ HWTEST_F(CommandEncoderTests, givenTsNodesWhenStoringOnTempListThenHandleOwnersh
|
||||
EXPECT_TRUE(tsAllocator.freeTags.peekContains(*node1));
|
||||
}
|
||||
|
||||
HWTEST_F(CommandEncoderTests, givenDebugFlagSetWhenHandlingTheCounterThenUseInitialValue) {
|
||||
DebugManagerStateRestore restore;
|
||||
|
||||
constexpr uint64_t initialValue = 16;
|
||||
|
||||
debugManager.flags.InOrderDuplicatedCounterStorageEnabled.set(1);
|
||||
debugManager.flags.InOrderAtomicSignallingEnabled.set(0);
|
||||
debugManager.flags.InitialCounterBasedEventValue.set(static_cast<int64_t>(initialValue));
|
||||
|
||||
constexpr uint32_t partitionCount = 2u;
|
||||
MockDevice mockDevice;
|
||||
|
||||
MockTagAllocator<DeviceAllocNodeType<true>> deviceTagAllocator(0, mockDevice.getMemoryManager());
|
||||
MockTagAllocator<DeviceAllocNodeType<true>> hostTagAllocator(0, mockDevice.getMemoryManager());
|
||||
|
||||
const auto immWritePartitionOffset = ImplicitScalingDispatch<FamilyType>::getImmediateWritePostSyncOffset();
|
||||
|
||||
// initialize
|
||||
auto deviceNode = deviceTagAllocator.getTag();
|
||||
auto hostNode = hostTagAllocator.getTag();
|
||||
|
||||
auto devicePtrBase = reinterpret_cast<uint64_t *>(deviceNode->getCpuBase());
|
||||
auto hostPtrBase = reinterpret_cast<uint64_t *>(hostNode->getCpuBase());
|
||||
|
||||
auto inOrderExecInfo = InOrderExecInfo::create(deviceNode, hostNode, mockDevice, partitionCount, false);
|
||||
|
||||
for (uint32_t i = 0; i < partitionCount; i++) {
|
||||
auto devicePtr = ptrOffset(devicePtrBase, i * immWritePartitionOffset);
|
||||
EXPECT_EQ(initialValue, *devicePtr);
|
||||
|
||||
auto hostPtr = ptrOffset(hostPtrBase, i * immWritePartitionOffset);
|
||||
EXPECT_EQ(initialValue, *hostPtr);
|
||||
}
|
||||
|
||||
// update
|
||||
for (uint32_t i = 0; i < partitionCount; i++) {
|
||||
auto devicePtr = ptrOffset(devicePtrBase, i * immWritePartitionOffset);
|
||||
*devicePtr = initialValue + 10;
|
||||
|
||||
auto hostPtr = ptrOffset(hostPtrBase, i * immWritePartitionOffset);
|
||||
*hostPtr = initialValue + 20;
|
||||
}
|
||||
|
||||
inOrderExecInfo->setLastWaitedCounterValue(initialValue + 5, 0);
|
||||
|
||||
// reset
|
||||
inOrderExecInfo->reset();
|
||||
|
||||
for (uint32_t i = 0; i < partitionCount; i++) {
|
||||
auto devicePtr = ptrOffset(devicePtrBase, i * immWritePartitionOffset);
|
||||
EXPECT_EQ(initialValue, *devicePtr);
|
||||
|
||||
auto hostPtr = ptrOffset(hostPtrBase, i * immWritePartitionOffset);
|
||||
EXPECT_EQ(initialValue, *hostPtr);
|
||||
}
|
||||
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(initialValue + 1, 0));
|
||||
}
|
||||
|
||||
HWTEST_F(CommandEncoderTests, givenDifferentInputParamsWhenCreatingInOrderExecInfoThenSetupCorrectly) {
|
||||
MockDevice mockDevice;
|
||||
|
||||
@@ -296,29 +354,29 @@ HWTEST_F(CommandEncoderTests, givenInOrderExecutionInfoWhenSetLastCounterValueIs
|
||||
auto node = tagAllocator.getTag();
|
||||
|
||||
auto inOrderExecInfo = std::make_unique<InOrderExecInfo>(node, nullptr, mockDevice, 2, true, false);
|
||||
inOrderExecInfo->setLastWaitedCounterValue(1u);
|
||||
inOrderExecInfo->setLastWaitedCounterValue(1u, 0);
|
||||
|
||||
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(2u));
|
||||
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(1u));
|
||||
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(0u));
|
||||
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(2u, 0));
|
||||
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(1u, 0));
|
||||
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(0u, 0));
|
||||
|
||||
inOrderExecInfo->setLastWaitedCounterValue(0u);
|
||||
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(2u));
|
||||
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(1u));
|
||||
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(0u));
|
||||
inOrderExecInfo->setLastWaitedCounterValue(0u, 0);
|
||||
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(2u, 0));
|
||||
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(1u, 0));
|
||||
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(0u, 0));
|
||||
|
||||
inOrderExecInfo->setLastWaitedCounterValue(3u);
|
||||
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(2u));
|
||||
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(3u));
|
||||
inOrderExecInfo->setLastWaitedCounterValue(3u, 0);
|
||||
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(2u, 0));
|
||||
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(3u, 0));
|
||||
|
||||
inOrderExecInfo->setAllocationOffset(4u);
|
||||
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(2u));
|
||||
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(3u));
|
||||
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(0u));
|
||||
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(2u, 4));
|
||||
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(3u, 4));
|
||||
EXPECT_TRUE(inOrderExecInfo->isCounterAlreadyDone(0u, 4));
|
||||
|
||||
inOrderExecInfo = std::make_unique<InOrderExecInfo>(nullptr, nullptr, mockDevice, 2, true, false);
|
||||
inOrderExecInfo->setLastWaitedCounterValue(2);
|
||||
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(1));
|
||||
inOrderExecInfo->setLastWaitedCounterValue(2, 0);
|
||||
EXPECT_FALSE(inOrderExecInfo->isCounterAlreadyDone(1, 0));
|
||||
}
|
||||
|
||||
HWTEST_F(CommandEncoderTests, givenInOrderExecutionInfoWhenResetCalledThenUploadToTbx) {
|
||||
|
||||
Reference in New Issue
Block a user