fix: dont download tbx allocations on heapless first device submission

Related-To: HSD-18039476929

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski 2024-08-06 12:54:11 +00:00 committed by Compute-Runtime-Automation
parent d52ca080bd
commit 24cfd203ab
18 changed files with 40 additions and 30 deletions

View File

@ -994,7 +994,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::hostSynchronize(uint6
} else {
const int64_t timeoutInMicroSeconds = timeout / 1000;
const auto indefinitelyPoll = timeout == std::numeric_limits<uint64_t>::max();
const auto waitStatus = waitCsr->waitForCompletionWithTimeout(NEO::WaitParams{indefinitelyPoll, !indefinitelyPoll, timeoutInMicroSeconds}, waitTaskCount);
const auto waitStatus = waitCsr->waitForCompletionWithTimeout(NEO::WaitParams{indefinitelyPoll, !indefinitelyPoll, false, timeoutInMicroSeconds}, waitTaskCount);
if (waitStatus == NEO::WaitStatus::gpuHang) {
status = ZE_RESULT_ERROR_DEVICE_LOST;
} else if (waitStatus == NEO::WaitStatus::notReady) {

View File

@ -51,7 +51,7 @@ ze_result_t CommandListImp::destroy() {
if (isImmediateType() && this->isFlushTaskSubmissionEnabled && !this->isSyncModeQueue) {
auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout;
getCsr(false)->waitForCompletionWithTimeout(NEO::WaitParams{false, false, timeoutMicroseconds}, getCsr(false)->peekTaskCount());
getCsr(false)->waitForCompletionWithTimeout(NEO::WaitParams{false, false, false, timeoutMicroseconds}, getCsr(false)->peekTaskCount());
}
if (!isImmediateType() &&

View File

@ -177,7 +177,7 @@ ze_result_t CommandQueueImp::synchronizeByPollingForTaskCount(uint64_t timeoutNa
timeoutMicroseconds = NEO::TimeoutControls::maxTimeout;
}
const auto waitStatus = csr->waitForCompletionWithTimeout(NEO::WaitParams{false, enableTimeout, timeoutMicroseconds}, taskCountToWait);
const auto waitStatus = csr->waitForCompletionWithTimeout(NEO::WaitParams{false, enableTimeout, false, timeoutMicroseconds}, taskCountToWait);
if (waitStatus == NEO::WaitStatus::notReady) {
return ZE_RESULT_NOT_READY;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2023 Intel Corporation
* Copyright (C) 2018-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -784,7 +784,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenWaitForT
auto cmdBuffer = cmdBufferList.peekHead();
EXPECT_EQ(1u, cmdBuffer->taskCount);
mockCsr->waitForCompletionWithTimeout(WaitParams{false, false, 1}, 1);
mockCsr->waitForCompletionWithTimeout(WaitParams{false, false, false, 1}, 1);
EXPECT_EQ(1u, mockCsr->peekLatestFlushedTaskCount());
@ -2090,5 +2090,5 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenWaitForCompletionWithTimeoutI
mockCsr.latestSentTaskCount = 1;
auto cmdBuffer = std::make_unique<CommandBuffer>(*pDevice);
mockCsr.submissionAggregator->recordCommandBuffer(cmdBuffer.release());
EXPECT_EQ(NEO::WaitStatus::notReady, mockCsr.waitForCompletionWithTimeout(WaitParams{false, false, 0}, 1));
EXPECT_EQ(NEO::WaitStatus::notReady, mockCsr.waitForCompletionWithTimeout(WaitParams{false, false, false, 0}, 1));
}

View File

@ -816,7 +816,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenTagValueNotMeetingTaskCountTo
CpuIntrinsicsTests::pauseAddress = mockCsr->tagAddress;
CpuIntrinsicsTests::pauseValue = taskCountToWait;
const auto ret = mockCsr->waitForCompletionWithTimeout(WaitParams{false, false, 1}, taskCountToWait);
const auto ret = mockCsr->waitForCompletionWithTimeout(WaitParams{false, false, false, 1}, taskCountToWait);
EXPECT_EQ(NEO::WaitStatus::ready, ret);
}
@ -836,7 +836,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenTagValueNotMeetingTaskCountTo
CpuIntrinsicsTests::pauseAddress = mockCsr->tagAddress;
CpuIntrinsicsTests::pauseValue = taskCountToWait;
const auto ret = mockCsr->waitForCompletionWithTimeout(WaitParams{true, true, 10}, taskCountToWait);
const auto ret = mockCsr->waitForCompletionWithTimeout(WaitParams{true, true, false, 10}, taskCountToWait);
EXPECT_EQ(NEO::WaitStatus::notReady, ret);
}

View File

@ -215,7 +215,7 @@ WaitStatus CommandStreamReceiver::waitForTaskCount(TaskCountType requiredTaskCou
auto address = getTagAddress();
if (!skipResourceCleanup() && address) {
this->downloadTagAllocation(requiredTaskCount);
return baseWaitFunction(address, WaitParams{false, false, 0}, requiredTaskCount);
return baseWaitFunction(address, WaitParams{false, false, false, 0}, requiredTaskCount);
}
return WaitStatus::ready;

View File

@ -828,7 +828,7 @@ inline WaitStatus CommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNot
if (status == WaitStatus::notReady) {
waitForFlushStamp(flushStampToWait);
// now call blocking wait, this is to ensure that task count is reached
status = waitForCompletionWithTimeout(WaitParams{false, false, 0}, taskCountToWait);
status = waitForCompletionWithTimeout(WaitParams{false, false, false, 0}, taskCountToWait);
}
// If GPU hang occured, then propagate it to the caller.

View File

@ -17,7 +17,7 @@ SubmissionStatus CommandStreamReceiverHw<GfxFamily>::initializeDeviceWithFirstSu
auto status = flushTagUpdate();
if (isTbxMode() && (status == SubmissionStatus::success)) {
waitForTaskCountWithKmdNotifyFallback(this->taskCount, 0, false, QueueThrottle::MEDIUM);
waitForCompletionWithTimeout({true, false, true, TimeoutControls::maxTimeout}, this->taskCount);
}
return status;

View File

@ -30,7 +30,7 @@ class TbxCommandStreamReceiverHw : public CommandStreamReceiverSimulatedHw<GfxFa
uint32_t getMaskAndValueForPollForCompletion() const;
bool getpollNotEqualValueForPollForCompletion() const;
void flushSubmissionsAndDownloadAllocations(TaskCountType taskCount);
void flushSubmissionsAndDownloadAllocations(TaskCountType taskCount, bool skipAllocationsDownload);
public:
using CommandStreamReceiverSimulatedCommonHw<GfxFamily>::initAdditionalMMIO;

View File

@ -487,7 +487,7 @@ bool TbxCommandStreamReceiverHw<GfxFamily>::expectMemory(const void *gfxAddress,
}
template <typename GfxFamily>
void TbxCommandStreamReceiverHw<GfxFamily>::flushSubmissionsAndDownloadAllocations(TaskCountType taskCountToWait) {
void TbxCommandStreamReceiverHw<GfxFamily>::flushSubmissionsAndDownloadAllocations(TaskCountType taskCountToWait, bool skipAllocationsDownload) {
this->flushBatchedSubmissions();
if (this->latestFlushedTaskCount < taskCountToWait) {
@ -502,6 +502,10 @@ void TbxCommandStreamReceiverHw<GfxFamily>::flushSubmissionsAndDownloadAllocatio
pollAddress = ptrOffset(pollAddress, this->immWritePostSyncWriteOffset);
}
if (skipAllocationsDownload) {
return;
}
auto lockCSR = this->obtainUniqueOwnership();
for (GraphicsAllocation *graphicsAllocation : this->allocationsForDownload) {
this->downloadAllocation(*graphicsAllocation);
@ -511,13 +515,13 @@ void TbxCommandStreamReceiverHw<GfxFamily>::flushSubmissionsAndDownloadAllocatio
template <typename GfxFamily>
WaitStatus TbxCommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) {
flushSubmissionsAndDownloadAllocations(taskCountToWait);
flushSubmissionsAndDownloadAllocations(taskCountToWait, false);
return BaseClass::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, throttle);
}
template <typename GfxFamily>
WaitStatus TbxCommandStreamReceiverHw<GfxFamily>::waitForCompletionWithTimeout(const WaitParams &params, TaskCountType taskCountToWait) {
flushSubmissionsAndDownloadAllocations(taskCountToWait);
flushSubmissionsAndDownloadAllocations(taskCountToWait, params.skipTbxDownload);
return BaseClass::waitForCompletionWithTimeout(params, taskCountToWait);
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2023 Intel Corporation
* Copyright (C) 2018-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -18,8 +18,13 @@ enum class WaitStatus {
};
struct WaitParams {
WaitParams() = default;
WaitParams(bool indefinitelyPoll, bool enableTimeout, bool skipTbxDownload, int64_t waitTimeout)
: indefinitelyPoll(indefinitelyPoll), enableTimeout(enableTimeout), skipTbxDownload(skipTbxDownload), waitTimeout(waitTimeout){};
bool indefinitelyPoll = false;
bool enableTimeout = false;
bool skipTbxDownload = false;
int64_t waitTimeout = 0;
};

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2023 Intel Corporation
* Copyright (C) 2018-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -24,7 +24,7 @@ WaitParams KmdNotifyHelper::obtainTimeoutParams(bool quickKmdSleepRequest,
bool kmdWaitModeActive,
bool directSubmissionEnabled) {
if (throttle == QueueThrottle::HIGH) {
return WaitParams{true};
return WaitParams{true, false, false, 0};
}
if (flushStampToWait == 0) {
@ -36,7 +36,7 @@ WaitParams KmdNotifyHelper::obtainTimeoutParams(bool quickKmdSleepRequest,
}
if (debugManager.flags.PowerSavingMode.get() || throttle == QueueThrottle::LOW) {
return WaitParams{false, true, 1};
return WaitParams{false, true, false, 1};
}
const int64_t taskCountDiff = (currentHwTag < taskCountToWait) ? static_cast<int64_t>(taskCountToWait - currentHwTag) : 1;

View File

@ -902,7 +902,7 @@ void MemoryManager::waitForEnginesCompletion(GraphicsAllocation &graphicsAllocat
if (graphicsAllocation.isUsedByOsContext(osContextId) &&
engine.commandStreamReceiver->getTagAllocation() != nullptr &&
allocationTaskCount > *engine.commandStreamReceiver->getTagAddress()) {
engine.commandStreamReceiver->waitForCompletionWithTimeout(WaitParams{false, false, TimeoutControls::maxTimeout}, allocationTaskCount);
engine.commandStreamReceiver->waitForCompletionWithTimeout(WaitParams{false, false, false, TimeoutControls::maxTimeout}, allocationTaskCount);
}
}
}
@ -925,7 +925,7 @@ void MemoryManager::cleanTemporaryAllocationListOnAllEngines(bool waitForComplet
for (auto &engine : engineContainer) {
auto csr = engine.commandStreamReceiver;
if (waitForCompletion) {
csr->waitForCompletionWithTimeout(WaitParams{false, false, 0}, csr->peekLatestSentTaskCount());
csr->waitForCompletionWithTimeout(WaitParams{false, false, false, 0}, csr->peekLatestSentTaskCount());
}
csr->getInternalAllocationStorage()->cleanAllocationList(*csr->getTagAddress(), AllocationUsage::TEMPORARY_ALLOCATION);
}

View File

@ -78,7 +78,7 @@ DrmCommandStreamReceiver<GfxFamily>::DrmCommandStreamReceiver(ExecutionEnvironme
template <typename GfxFamily>
inline DrmCommandStreamReceiver<GfxFamily>::~DrmCommandStreamReceiver() {
if (this->isUpdateTagFromWaitEnabled()) {
this->waitForCompletionWithTimeout(WaitParams{false, false, 0}, this->peekTaskCount());
this->waitForCompletionWithTimeout(WaitParams{false, false, false, 0}, this->peekTaskCount());
}
}

View File

@ -198,7 +198,7 @@ MemoryOperationsStatus DrmMemoryOperationsHandlerBind::evictUnusedAllocationsImp
break;
}
if (waitForCompletion) {
const auto waitStatus = engine.commandStreamReceiver->waitForCompletionWithTimeout(WaitParams{false, false, 0}, engine.commandStreamReceiver->peekLatestFlushedTaskCount());
const auto waitStatus = engine.commandStreamReceiver->waitForCompletionWithTimeout(WaitParams{false, false, false, 0}, engine.commandStreamReceiver->peekLatestFlushedTaskCount());
if (waitStatus == WaitStatus::gpuHang) {
return MemoryOperationsStatus::gpuHangDetectedDuringOperation;
}

View File

@ -304,7 +304,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
}
WaitStatus waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, TaskCountType taskCountToWait) {
return waitForCompletionWithTimeout(WaitParams{false, enableTimeout, timeoutMicroseconds}, taskCountToWait);
return waitForCompletionWithTimeout(WaitParams{false, enableTimeout, false, timeoutMicroseconds}, taskCountToWait);
}
WaitStatus waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) override {
@ -523,7 +523,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
std::atomic<TaskCountType> latestWaitForCompletionWithTimeoutTaskCount{0};
TaskCountType latestSentTaskCountValueDuringFlush = 0;
WaitParams latestWaitForCompletionWithTimeoutWaitParams{0};
WaitParams latestWaitForCompletionWithTimeoutWaitParams{};
WaitUserFenceParams waitUserFenecParams;
WriteMemoryParams writeMemoryParams;
TaskCountType flushBcsTaskReturnValue{};

View File

@ -5349,15 +5349,16 @@ HWTEST_F(CommandStreamReceiverTest, givenTbxCsrWhenInitializingThenWaitForComple
commandStreamReceiver.initializeTagAllocation();
EXPECT_EQ(0u, commandStreamReceiver.taskCount);
EXPECT_EQ(0u, commandStreamReceiver.waitForTaskCountWithKmdNotifyFallbackCalled);
EXPECT_EQ(0u, commandStreamReceiver.waitForCompletionWithTimeoutTaskCountCalled);
EXPECT_EQ(SubmissionStatus::success, commandStreamReceiver.initializeDeviceWithFirstSubmission(*pDevice));
EXPECT_EQ(1u, commandStreamReceiver.taskCount);
EXPECT_EQ(1u, commandStreamReceiver.waitForTaskCountWithKmdNotifyFallbackCalled);
EXPECT_EQ(1u, commandStreamReceiver.waitForCompletionWithTimeoutTaskCountCalled);
EXPECT_TRUE(commandStreamReceiver.latestWaitForCompletionWithTimeoutWaitParams.skipTbxDownload);
EXPECT_EQ(SubmissionStatus::success, commandStreamReceiver.initializeDeviceWithFirstSubmission(*pDevice));
EXPECT_EQ(1u, commandStreamReceiver.taskCount);
EXPECT_EQ(1u, commandStreamReceiver.waitForTaskCountWithKmdNotifyFallbackCalled);
EXPECT_EQ(1u, commandStreamReceiver.waitForCompletionWithTimeoutTaskCountCalled);
MockCsrHw<FamilyType> failingCommandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
failingCommandStreamReceiver.commandStreamReceiverType = CommandStreamReceiverType::tbx;

View File

@ -437,7 +437,7 @@ HWTEST_F(TbxCommandSteamSimpleTest, givenTbxCsrWhenCallingWaitForCompletionWithT
tbxCsr.allocationsForDownload = {&allocation1, &allocation2, &allocation3};
tbxCsr.waitForCompletionWithTimeout(WaitParams{false, true, 0}, 0);
tbxCsr.waitForCompletionWithTimeout(WaitParams{false, true, false, 0}, 0);
std::set<GraphicsAllocation *> expectedDownloadedAllocations = {tbxCsr.getTagAllocation(), &allocation1, &allocation2, &allocation3};
EXPECT_EQ(expectedDownloadedAllocations, tbxCsr.downloadedAllocations);
@ -456,7 +456,7 @@ HWTEST_F(TbxCommandSteamSimpleTest, givenLatestFlushedTaskCountLowerThanTagWhenF
EXPECT_FALSE(tbxCsr.flushTagCalled);
EXPECT_EQ(0u, tbxCsr.obtainUniqueOwnershipCalled);
tbxCsr.flushSubmissionsAndDownloadAllocations(1u);
tbxCsr.flushSubmissionsAndDownloadAllocations(1u, false);
EXPECT_EQ(1u, tbxCsr.obtainUniqueOwnershipCalled);
EXPECT_TRUE(tbxCsr.flushTagCalled);