fix: dont download tbx allocations on heapless first device submission
Related-To: HSD-18039476929 Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
parent
d52ca080bd
commit
24cfd203ab
|
@ -994,7 +994,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::hostSynchronize(uint6
|
|||
} else {
|
||||
const int64_t timeoutInMicroSeconds = timeout / 1000;
|
||||
const auto indefinitelyPoll = timeout == std::numeric_limits<uint64_t>::max();
|
||||
const auto waitStatus = waitCsr->waitForCompletionWithTimeout(NEO::WaitParams{indefinitelyPoll, !indefinitelyPoll, timeoutInMicroSeconds}, waitTaskCount);
|
||||
const auto waitStatus = waitCsr->waitForCompletionWithTimeout(NEO::WaitParams{indefinitelyPoll, !indefinitelyPoll, false, timeoutInMicroSeconds}, waitTaskCount);
|
||||
if (waitStatus == NEO::WaitStatus::gpuHang) {
|
||||
status = ZE_RESULT_ERROR_DEVICE_LOST;
|
||||
} else if (waitStatus == NEO::WaitStatus::notReady) {
|
||||
|
|
|
@ -51,7 +51,7 @@ ze_result_t CommandListImp::destroy() {
|
|||
|
||||
if (isImmediateType() && this->isFlushTaskSubmissionEnabled && !this->isSyncModeQueue) {
|
||||
auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout;
|
||||
getCsr(false)->waitForCompletionWithTimeout(NEO::WaitParams{false, false, timeoutMicroseconds}, getCsr(false)->peekTaskCount());
|
||||
getCsr(false)->waitForCompletionWithTimeout(NEO::WaitParams{false, false, false, timeoutMicroseconds}, getCsr(false)->peekTaskCount());
|
||||
}
|
||||
|
||||
if (!isImmediateType() &&
|
||||
|
|
|
@ -177,7 +177,7 @@ ze_result_t CommandQueueImp::synchronizeByPollingForTaskCount(uint64_t timeoutNa
|
|||
timeoutMicroseconds = NEO::TimeoutControls::maxTimeout;
|
||||
}
|
||||
|
||||
const auto waitStatus = csr->waitForCompletionWithTimeout(NEO::WaitParams{false, enableTimeout, timeoutMicroseconds}, taskCountToWait);
|
||||
const auto waitStatus = csr->waitForCompletionWithTimeout(NEO::WaitParams{false, enableTimeout, false, timeoutMicroseconds}, taskCountToWait);
|
||||
if (waitStatus == NEO::WaitStatus::notReady) {
|
||||
return ZE_RESULT_NOT_READY;
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
* Copyright (C) 2018-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -784,7 +784,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenWaitForT
|
|||
auto cmdBuffer = cmdBufferList.peekHead();
|
||||
EXPECT_EQ(1u, cmdBuffer->taskCount);
|
||||
|
||||
mockCsr->waitForCompletionWithTimeout(WaitParams{false, false, 1}, 1);
|
||||
mockCsr->waitForCompletionWithTimeout(WaitParams{false, false, false, 1}, 1);
|
||||
|
||||
EXPECT_EQ(1u, mockCsr->peekLatestFlushedTaskCount());
|
||||
|
||||
|
@ -2090,5 +2090,5 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenWaitForCompletionWithTimeoutI
|
|||
mockCsr.latestSentTaskCount = 1;
|
||||
auto cmdBuffer = std::make_unique<CommandBuffer>(*pDevice);
|
||||
mockCsr.submissionAggregator->recordCommandBuffer(cmdBuffer.release());
|
||||
EXPECT_EQ(NEO::WaitStatus::notReady, mockCsr.waitForCompletionWithTimeout(WaitParams{false, false, 0}, 1));
|
||||
EXPECT_EQ(NEO::WaitStatus::notReady, mockCsr.waitForCompletionWithTimeout(WaitParams{false, false, false, 0}, 1));
|
||||
}
|
||||
|
|
|
@ -816,7 +816,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenTagValueNotMeetingTaskCountTo
|
|||
CpuIntrinsicsTests::pauseAddress = mockCsr->tagAddress;
|
||||
CpuIntrinsicsTests::pauseValue = taskCountToWait;
|
||||
|
||||
const auto ret = mockCsr->waitForCompletionWithTimeout(WaitParams{false, false, 1}, taskCountToWait);
|
||||
const auto ret = mockCsr->waitForCompletionWithTimeout(WaitParams{false, false, false, 1}, taskCountToWait);
|
||||
EXPECT_EQ(NEO::WaitStatus::ready, ret);
|
||||
}
|
||||
|
||||
|
@ -836,7 +836,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenTagValueNotMeetingTaskCountTo
|
|||
CpuIntrinsicsTests::pauseAddress = mockCsr->tagAddress;
|
||||
CpuIntrinsicsTests::pauseValue = taskCountToWait;
|
||||
|
||||
const auto ret = mockCsr->waitForCompletionWithTimeout(WaitParams{true, true, 10}, taskCountToWait);
|
||||
const auto ret = mockCsr->waitForCompletionWithTimeout(WaitParams{true, true, false, 10}, taskCountToWait);
|
||||
EXPECT_EQ(NEO::WaitStatus::notReady, ret);
|
||||
}
|
||||
|
||||
|
|
|
@ -215,7 +215,7 @@ WaitStatus CommandStreamReceiver::waitForTaskCount(TaskCountType requiredTaskCou
|
|||
auto address = getTagAddress();
|
||||
if (!skipResourceCleanup() && address) {
|
||||
this->downloadTagAllocation(requiredTaskCount);
|
||||
return baseWaitFunction(address, WaitParams{false, false, 0}, requiredTaskCount);
|
||||
return baseWaitFunction(address, WaitParams{false, false, false, 0}, requiredTaskCount);
|
||||
}
|
||||
|
||||
return WaitStatus::ready;
|
||||
|
|
|
@ -828,7 +828,7 @@ inline WaitStatus CommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNot
|
|||
if (status == WaitStatus::notReady) {
|
||||
waitForFlushStamp(flushStampToWait);
|
||||
// now call blocking wait, this is to ensure that task count is reached
|
||||
status = waitForCompletionWithTimeout(WaitParams{false, false, 0}, taskCountToWait);
|
||||
status = waitForCompletionWithTimeout(WaitParams{false, false, false, 0}, taskCountToWait);
|
||||
}
|
||||
|
||||
// If GPU hang occured, then propagate it to the caller.
|
||||
|
|
|
@ -17,7 +17,7 @@ SubmissionStatus CommandStreamReceiverHw<GfxFamily>::initializeDeviceWithFirstSu
|
|||
auto status = flushTagUpdate();
|
||||
|
||||
if (isTbxMode() && (status == SubmissionStatus::success)) {
|
||||
waitForTaskCountWithKmdNotifyFallback(this->taskCount, 0, false, QueueThrottle::MEDIUM);
|
||||
waitForCompletionWithTimeout({true, false, true, TimeoutControls::maxTimeout}, this->taskCount);
|
||||
}
|
||||
|
||||
return status;
|
||||
|
|
|
@ -30,7 +30,7 @@ class TbxCommandStreamReceiverHw : public CommandStreamReceiverSimulatedHw<GfxFa
|
|||
|
||||
uint32_t getMaskAndValueForPollForCompletion() const;
|
||||
bool getpollNotEqualValueForPollForCompletion() const;
|
||||
void flushSubmissionsAndDownloadAllocations(TaskCountType taskCount);
|
||||
void flushSubmissionsAndDownloadAllocations(TaskCountType taskCount, bool skipAllocationsDownload);
|
||||
|
||||
public:
|
||||
using CommandStreamReceiverSimulatedCommonHw<GfxFamily>::initAdditionalMMIO;
|
||||
|
|
|
@ -487,7 +487,7 @@ bool TbxCommandStreamReceiverHw<GfxFamily>::expectMemory(const void *gfxAddress,
|
|||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void TbxCommandStreamReceiverHw<GfxFamily>::flushSubmissionsAndDownloadAllocations(TaskCountType taskCountToWait) {
|
||||
void TbxCommandStreamReceiverHw<GfxFamily>::flushSubmissionsAndDownloadAllocations(TaskCountType taskCountToWait, bool skipAllocationsDownload) {
|
||||
this->flushBatchedSubmissions();
|
||||
|
||||
if (this->latestFlushedTaskCount < taskCountToWait) {
|
||||
|
@ -502,6 +502,10 @@ void TbxCommandStreamReceiverHw<GfxFamily>::flushSubmissionsAndDownloadAllocatio
|
|||
pollAddress = ptrOffset(pollAddress, this->immWritePostSyncWriteOffset);
|
||||
}
|
||||
|
||||
if (skipAllocationsDownload) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto lockCSR = this->obtainUniqueOwnership();
|
||||
for (GraphicsAllocation *graphicsAllocation : this->allocationsForDownload) {
|
||||
this->downloadAllocation(*graphicsAllocation);
|
||||
|
@ -511,13 +515,13 @@ void TbxCommandStreamReceiverHw<GfxFamily>::flushSubmissionsAndDownloadAllocatio
|
|||
|
||||
template <typename GfxFamily>
|
||||
WaitStatus TbxCommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) {
|
||||
flushSubmissionsAndDownloadAllocations(taskCountToWait);
|
||||
flushSubmissionsAndDownloadAllocations(taskCountToWait, false);
|
||||
return BaseClass::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, throttle);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
WaitStatus TbxCommandStreamReceiverHw<GfxFamily>::waitForCompletionWithTimeout(const WaitParams ¶ms, TaskCountType taskCountToWait) {
|
||||
flushSubmissionsAndDownloadAllocations(taskCountToWait);
|
||||
flushSubmissionsAndDownloadAllocations(taskCountToWait, params.skipTbxDownload);
|
||||
return BaseClass::waitForCompletionWithTimeout(params, taskCountToWait);
|
||||
}
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
* Copyright (C) 2018-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -18,8 +18,13 @@ enum class WaitStatus {
|
|||
};
|
||||
|
||||
struct WaitParams {
|
||||
WaitParams() = default;
|
||||
WaitParams(bool indefinitelyPoll, bool enableTimeout, bool skipTbxDownload, int64_t waitTimeout)
|
||||
: indefinitelyPoll(indefinitelyPoll), enableTimeout(enableTimeout), skipTbxDownload(skipTbxDownload), waitTimeout(waitTimeout){};
|
||||
|
||||
bool indefinitelyPoll = false;
|
||||
bool enableTimeout = false;
|
||||
bool skipTbxDownload = false;
|
||||
int64_t waitTimeout = 0;
|
||||
};
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
* Copyright (C) 2018-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -24,7 +24,7 @@ WaitParams KmdNotifyHelper::obtainTimeoutParams(bool quickKmdSleepRequest,
|
|||
bool kmdWaitModeActive,
|
||||
bool directSubmissionEnabled) {
|
||||
if (throttle == QueueThrottle::HIGH) {
|
||||
return WaitParams{true};
|
||||
return WaitParams{true, false, false, 0};
|
||||
}
|
||||
|
||||
if (flushStampToWait == 0) {
|
||||
|
@ -36,7 +36,7 @@ WaitParams KmdNotifyHelper::obtainTimeoutParams(bool quickKmdSleepRequest,
|
|||
}
|
||||
|
||||
if (debugManager.flags.PowerSavingMode.get() || throttle == QueueThrottle::LOW) {
|
||||
return WaitParams{false, true, 1};
|
||||
return WaitParams{false, true, false, 1};
|
||||
}
|
||||
|
||||
const int64_t taskCountDiff = (currentHwTag < taskCountToWait) ? static_cast<int64_t>(taskCountToWait - currentHwTag) : 1;
|
||||
|
|
|
@ -902,7 +902,7 @@ void MemoryManager::waitForEnginesCompletion(GraphicsAllocation &graphicsAllocat
|
|||
if (graphicsAllocation.isUsedByOsContext(osContextId) &&
|
||||
engine.commandStreamReceiver->getTagAllocation() != nullptr &&
|
||||
allocationTaskCount > *engine.commandStreamReceiver->getTagAddress()) {
|
||||
engine.commandStreamReceiver->waitForCompletionWithTimeout(WaitParams{false, false, TimeoutControls::maxTimeout}, allocationTaskCount);
|
||||
engine.commandStreamReceiver->waitForCompletionWithTimeout(WaitParams{false, false, false, TimeoutControls::maxTimeout}, allocationTaskCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -925,7 +925,7 @@ void MemoryManager::cleanTemporaryAllocationListOnAllEngines(bool waitForComplet
|
|||
for (auto &engine : engineContainer) {
|
||||
auto csr = engine.commandStreamReceiver;
|
||||
if (waitForCompletion) {
|
||||
csr->waitForCompletionWithTimeout(WaitParams{false, false, 0}, csr->peekLatestSentTaskCount());
|
||||
csr->waitForCompletionWithTimeout(WaitParams{false, false, false, 0}, csr->peekLatestSentTaskCount());
|
||||
}
|
||||
csr->getInternalAllocationStorage()->cleanAllocationList(*csr->getTagAddress(), AllocationUsage::TEMPORARY_ALLOCATION);
|
||||
}
|
||||
|
|
|
@ -78,7 +78,7 @@ DrmCommandStreamReceiver<GfxFamily>::DrmCommandStreamReceiver(ExecutionEnvironme
|
|||
template <typename GfxFamily>
|
||||
inline DrmCommandStreamReceiver<GfxFamily>::~DrmCommandStreamReceiver() {
|
||||
if (this->isUpdateTagFromWaitEnabled()) {
|
||||
this->waitForCompletionWithTimeout(WaitParams{false, false, 0}, this->peekTaskCount());
|
||||
this->waitForCompletionWithTimeout(WaitParams{false, false, false, 0}, this->peekTaskCount());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -198,7 +198,7 @@ MemoryOperationsStatus DrmMemoryOperationsHandlerBind::evictUnusedAllocationsImp
|
|||
break;
|
||||
}
|
||||
if (waitForCompletion) {
|
||||
const auto waitStatus = engine.commandStreamReceiver->waitForCompletionWithTimeout(WaitParams{false, false, 0}, engine.commandStreamReceiver->peekLatestFlushedTaskCount());
|
||||
const auto waitStatus = engine.commandStreamReceiver->waitForCompletionWithTimeout(WaitParams{false, false, false, 0}, engine.commandStreamReceiver->peekLatestFlushedTaskCount());
|
||||
if (waitStatus == WaitStatus::gpuHang) {
|
||||
return MemoryOperationsStatus::gpuHangDetectedDuringOperation;
|
||||
}
|
||||
|
|
|
@ -304,7 +304,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
|
|||
}
|
||||
|
||||
WaitStatus waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, TaskCountType taskCountToWait) {
|
||||
return waitForCompletionWithTimeout(WaitParams{false, enableTimeout, timeoutMicroseconds}, taskCountToWait);
|
||||
return waitForCompletionWithTimeout(WaitParams{false, enableTimeout, false, timeoutMicroseconds}, taskCountToWait);
|
||||
}
|
||||
|
||||
WaitStatus waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) override {
|
||||
|
@ -523,7 +523,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
|
|||
|
||||
std::atomic<TaskCountType> latestWaitForCompletionWithTimeoutTaskCount{0};
|
||||
TaskCountType latestSentTaskCountValueDuringFlush = 0;
|
||||
WaitParams latestWaitForCompletionWithTimeoutWaitParams{0};
|
||||
WaitParams latestWaitForCompletionWithTimeoutWaitParams{};
|
||||
WaitUserFenceParams waitUserFenecParams;
|
||||
WriteMemoryParams writeMemoryParams;
|
||||
TaskCountType flushBcsTaskReturnValue{};
|
||||
|
|
|
@ -5349,15 +5349,16 @@ HWTEST_F(CommandStreamReceiverTest, givenTbxCsrWhenInitializingThenWaitForComple
|
|||
commandStreamReceiver.initializeTagAllocation();
|
||||
|
||||
EXPECT_EQ(0u, commandStreamReceiver.taskCount);
|
||||
EXPECT_EQ(0u, commandStreamReceiver.waitForTaskCountWithKmdNotifyFallbackCalled);
|
||||
EXPECT_EQ(0u, commandStreamReceiver.waitForCompletionWithTimeoutTaskCountCalled);
|
||||
|
||||
EXPECT_EQ(SubmissionStatus::success, commandStreamReceiver.initializeDeviceWithFirstSubmission(*pDevice));
|
||||
EXPECT_EQ(1u, commandStreamReceiver.taskCount);
|
||||
EXPECT_EQ(1u, commandStreamReceiver.waitForTaskCountWithKmdNotifyFallbackCalled);
|
||||
EXPECT_EQ(1u, commandStreamReceiver.waitForCompletionWithTimeoutTaskCountCalled);
|
||||
EXPECT_TRUE(commandStreamReceiver.latestWaitForCompletionWithTimeoutWaitParams.skipTbxDownload);
|
||||
|
||||
EXPECT_EQ(SubmissionStatus::success, commandStreamReceiver.initializeDeviceWithFirstSubmission(*pDevice));
|
||||
EXPECT_EQ(1u, commandStreamReceiver.taskCount);
|
||||
EXPECT_EQ(1u, commandStreamReceiver.waitForTaskCountWithKmdNotifyFallbackCalled);
|
||||
EXPECT_EQ(1u, commandStreamReceiver.waitForCompletionWithTimeoutTaskCountCalled);
|
||||
|
||||
MockCsrHw<FamilyType> failingCommandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
|
||||
failingCommandStreamReceiver.commandStreamReceiverType = CommandStreamReceiverType::tbx;
|
||||
|
|
|
@ -437,7 +437,7 @@ HWTEST_F(TbxCommandSteamSimpleTest, givenTbxCsrWhenCallingWaitForCompletionWithT
|
|||
|
||||
tbxCsr.allocationsForDownload = {&allocation1, &allocation2, &allocation3};
|
||||
|
||||
tbxCsr.waitForCompletionWithTimeout(WaitParams{false, true, 0}, 0);
|
||||
tbxCsr.waitForCompletionWithTimeout(WaitParams{false, true, false, 0}, 0);
|
||||
|
||||
std::set<GraphicsAllocation *> expectedDownloadedAllocations = {tbxCsr.getTagAllocation(), &allocation1, &allocation2, &allocation3};
|
||||
EXPECT_EQ(expectedDownloadedAllocations, tbxCsr.downloadedAllocations);
|
||||
|
@ -456,7 +456,7 @@ HWTEST_F(TbxCommandSteamSimpleTest, givenLatestFlushedTaskCountLowerThanTagWhenF
|
|||
EXPECT_FALSE(tbxCsr.flushTagCalled);
|
||||
|
||||
EXPECT_EQ(0u, tbxCsr.obtainUniqueOwnershipCalled);
|
||||
tbxCsr.flushSubmissionsAndDownloadAllocations(1u);
|
||||
tbxCsr.flushSubmissionsAndDownloadAllocations(1u, false);
|
||||
EXPECT_EQ(1u, tbxCsr.obtainUniqueOwnershipCalled);
|
||||
|
||||
EXPECT_TRUE(tbxCsr.flushTagCalled);
|
||||
|
|
Loading…
Reference in New Issue