fix: only increase fence/task count when submit task success

Related-To: NEO-7812

Signed-off-by: Pan Zhenjie <zhenjie.pan@intel.com>
This commit is contained in:
Zhenjie Pan
2023-03-15 15:10:06 +00:00
committed by Compute-Runtime-Automation
parent d8f99161dd
commit 820a189c52
6 changed files with 104 additions and 10 deletions

View File

@@ -430,7 +430,9 @@ WaitStatus CommandStreamReceiver::baseWaitFunction(volatile TagAddressType *poll
TaskCountType latestSentTaskCount = this->latestFlushedTaskCount;
if (latestSentTaskCount < taskCountToWait) {
this->flushTagUpdate();
if (this->flushTagUpdate() != NEO::SubmissionStatus::SUCCESS) {
return WaitStatus::NotReady;
}
}
volatile TagAddressType *partitionAddress = pollAddress;

View File

@@ -191,6 +191,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
void handleFrontEndStateTransition(const DispatchFlags &dispatchFlags);
void handlePipelineSelectStateTransition(const DispatchFlags &dispatchFlags);
void handleStateBaseAddressStateTransition(const DispatchFlags &dispatchFlags, bool &isStateBaseAddressDirty);
void updateStreamTaskCount(LinearStream &stream, TaskCountType newTaskCount);
bool bcsRelaxedOrderingAllowed(const BlitPropertiesContainer &blitPropertiesContainer, bool hasStallingCmds) const;

View File

@@ -241,11 +241,11 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushBcsTask(LinearStream &c
streamToSubmit.getUsed(), &streamToSubmit, bbEndLocation, this->getNumClients(), (submitCSR || dispatchBcsFlags.hasStallingCmds),
dispatchBcsFlags.hasRelaxedOrderingDependencies};
streamToSubmit.getGraphicsAllocation()->updateTaskCount(this->taskCount + 1, this->osContext->getContextId());
streamToSubmit.getGraphicsAllocation()->updateResidencyTaskCount(this->taskCount + 1, this->osContext->getContextId());
updateStreamTaskCount(streamToSubmit, taskCount + 1);
auto submissionStatus = flushHandler(batchBuffer, this->getResidencyAllocations());
if (submissionStatus != SubmissionStatus::SUCCESS) {
updateStreamTaskCount(streamToSubmit, taskCount);
CompletionStamp completionStamp = {CompletionStamp::getTaskCountFromSubmissionStatusError(submissionStatus)};
return completionStamp;
}
@@ -723,13 +723,14 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
dispatchFlags.requiresCoherency, dispatchFlags.lowPriority, dispatchFlags.throttle, dispatchFlags.sliceCount,
streamToSubmit.getUsed(), &streamToSubmit, bbEndLocation, this->getNumClients(), (submitCSR || dispatchFlags.hasStallingCmds || hasStallingCmdsOnTaskStream),
dispatchFlags.hasRelaxedOrderingDependencies};
streamToSubmit.getGraphicsAllocation()->updateTaskCount(this->taskCount + 1, this->osContext->getContextId());
streamToSubmit.getGraphicsAllocation()->updateResidencyTaskCount(this->taskCount + 1, this->osContext->getContextId());
updateStreamTaskCount(streamToSubmit, taskCount + 1);
if (submitCSR || submitTask) {
if (this->dispatchMode == DispatchMode::ImmediateDispatch) {
auto submissionStatus = flushHandler(batchBuffer, this->getResidencyAllocations());
if (submissionStatus != SubmissionStatus::SUCCESS) {
updateStreamTaskCount(streamToSubmit, taskCount);
CompletionStamp completionStamp = {CompletionStamp::getTaskCountFromSubmissionStatusError(submissionStatus)};
return completionStamp;
}
@@ -1329,11 +1330,11 @@ TaskCountType CommandStreamReceiverHw<GfxFamily>::flushBcsTask(const BlitPropert
BatchBuffer batchBuffer{commandStream.getGraphicsAllocation(), commandStreamStart, 0, taskStartAddress, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount,
commandStream.getUsed(), &commandStream, endingCmdPtr, this->getNumClients(), hasStallingCmds, isRelaxedOrderingDispatch};
commandStream.getGraphicsAllocation()->updateTaskCount(newTaskCount, this->osContext->getContextId());
commandStream.getGraphicsAllocation()->updateResidencyTaskCount(newTaskCount, this->osContext->getContextId());
updateStreamTaskCount(commandStream, newTaskCount);
auto flushSubmissionStatus = flush(batchBuffer, getResidencyAllocations());
if (flushSubmissionStatus != SubmissionStatus::SUCCESS) {
updateStreamTaskCount(commandStream, taskCount);
return CompletionStamp::getTaskCountFromSubmissionStatusError(flushSubmissionStatus);
}
makeSurfacePackNonResident(getResidencyAllocations(), true);
@@ -1447,7 +1448,9 @@ SubmissionStatus CommandStreamReceiverHw<GfxFamily>::flushSmallTask(LinearStream
this->latestSentTaskCount = taskCount + 1;
auto submissionStatus = flushHandler(batchBuffer, getResidencyAllocations());
taskCount++;
if (submissionStatus == SubmissionStatus::SUCCESS) {
taskCount++;
}
return submissionStatus;
}
@@ -1743,4 +1746,10 @@ inline void CommandStreamReceiverHw<GfxFamily>::handleStateBaseAddressStateTrans
}
}
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::updateStreamTaskCount(LinearStream &stream, TaskCountType newTaskCount) {
stream.getGraphicsAllocation()->updateTaskCount(newTaskCount, this->osContext->getContextId());
stream.getGraphicsAllocation()->updateResidencyTaskCount(newTaskCount, this->osContext->getContextId());
}
} // namespace NEO

View File

@@ -126,7 +126,7 @@ bool DrmDirectSubmission<GfxFamily, Dispatcher>::submit(uint64_t gpuAddress, siz
TaskCountType completionValue = 0u;
uint64_t completionFenceGpuAddress = 0u;
if (this->isCompletionFenceSupported()) {
completionValue = ++completionFenceValue;
completionValue = completionFenceValue + 1;
completionFenceGpuAddress = this->completionFenceAllocation->getGpuAddress() + TagAllocationLayout::completionFenceOffset;
}
@@ -155,6 +155,10 @@ bool DrmDirectSubmission<GfxFamily, Dispatcher>::submit(uint64_t gpuAddress, siz
}
}
if (this->isCompletionFenceSupported() && ret) {
completionFenceValue++;
}
return ret;
}

View File

@@ -392,7 +392,7 @@ HWTEST_F(CommandStreamReceiverTest, givenFailingFlushSubmissionsAndNoGpuHangWhen
EXPECT_EQ(WaitStatus::NotReady, waitStatus);
}
HWTEST_F(CommandStreamReceiverTest, givenGpuHangWhenWaititingForTaskCountThenGpuHangIsReturned) {
HWTEST_F(CommandStreamReceiverTest, givenGpuHangWhenWaitingForTaskCountThenGpuHangIsReturned) {
auto driverModelMock = std::make_unique<MockDriverModel>();
driverModelMock->isGpuHangDetectedToReturn = true;
@@ -413,6 +413,32 @@ HWTEST_F(CommandStreamReceiverTest, givenGpuHangWhenWaititingForTaskCountThenGpu
EXPECT_TRUE(csr.downloadAllocationCalled);
}
HWTEST_F(CommandStreamReceiverTest, givenFlushUnsuccessWhenWaitingForTaskCountThenNotReadyIsReturned) {
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
csr.activePartitions = 1;
constexpr auto taskCountToWait = 1;
csr.flushReturnValue = SubmissionStatus::FAILED;
auto waitStatus = csr.waitForTaskCount(taskCountToWait);
EXPECT_EQ(WaitStatus::NotReady, waitStatus);
csr.flushReturnValue = SubmissionStatus::OUT_OF_MEMORY;
waitStatus = csr.waitForTaskCount(taskCountToWait);
EXPECT_EQ(WaitStatus::NotReady, waitStatus);
csr.flushReturnValue = SubmissionStatus::OUT_OF_HOST_MEMORY;
waitStatus = csr.waitForTaskCount(taskCountToWait);
EXPECT_EQ(WaitStatus::NotReady, waitStatus);
csr.flushReturnValue = SubmissionStatus::UNSUPPORTED;
waitStatus = csr.waitForTaskCount(taskCountToWait);
EXPECT_EQ(WaitStatus::NotReady, waitStatus);
csr.flushReturnValue = SubmissionStatus::DEVICE_UNINITIALIZED;
waitStatus = csr.waitForTaskCount(taskCountToWait);
EXPECT_EQ(WaitStatus::NotReady, waitStatus);
}
HWTEST_F(CommandStreamReceiverTest, whenDownloadTagAllocationThenDonwloadOnlyIfTagAllocationWasFlushed) {
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
csr.activePartitions = 1;
@@ -2709,6 +2735,32 @@ HWTEST_F(CommandStreamReceiverHwTest, givenFailedFailureOnFlushWhenFlushingTaskT
EXPECT_EQ(CompletionStamp::failed, completionStamp.taskCount);
}
HWTEST_F(CommandStreamReceiverHwTest, givenUnsuccessOnFlushWhenFlushingSmallTaskThenTaskCountIsNotIncreased) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
auto &stream = commandStreamReceiver.getCS(4096u);
commandStreamReceiver.taskCount = 1u;
commandStreamReceiver.flushReturnValue = SubmissionStatus::FAILED;
commandStreamReceiver.flushSmallTask(stream, stream.getUsed());
EXPECT_EQ(1u, commandStreamReceiver.taskCount);
commandStreamReceiver.flushReturnValue = SubmissionStatus::OUT_OF_MEMORY;
commandStreamReceiver.flushSmallTask(stream, stream.getUsed());
EXPECT_EQ(1u, commandStreamReceiver.taskCount);
commandStreamReceiver.flushReturnValue = SubmissionStatus::OUT_OF_HOST_MEMORY;
commandStreamReceiver.flushSmallTask(stream, stream.getUsed());
EXPECT_EQ(1u, commandStreamReceiver.taskCount);
commandStreamReceiver.flushReturnValue = SubmissionStatus::UNSUPPORTED;
commandStreamReceiver.flushSmallTask(stream, stream.getUsed());
EXPECT_EQ(1u, commandStreamReceiver.taskCount);
commandStreamReceiver.flushReturnValue = SubmissionStatus::DEVICE_UNINITIALIZED;
commandStreamReceiver.flushSmallTask(stream, stream.getUsed());
EXPECT_EQ(1u, commandStreamReceiver.taskCount);
}
HWTEST_F(CommandStreamReceiverHwTest, givenOutOfMemoryFailureOnFlushWhenFlushingMiDWThenErrorIsPropagated) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();

View File

@@ -399,6 +399,32 @@ HWTEST_F(DrmDirectSubmissionTest, givenNoCompletionFenceSupportAndExecFailureWhe
ringBuffer->getBufferObjectToModify(0) = initialBO;
}
HWTEST_F(DrmDirectSubmissionTest, givenCompletionFenceSupportAndExecFailureWhenSubmittingThenCompletionFenceValueIsNotIncreased) {
uint64_t gpuAddress = 0x1000;
size_t size = 0x1000;
DebugManagerStateRestore restorer;
DebugManager.flags.EnableDrmCompletionFence.set(1);
auto &commandStreamReceiver = *device->getDefaultEngine().commandStreamReceiver;
auto drm = executionEnvironment.rootDeviceEnvironments[0]->osInterface->getDriverModel()->as<Drm>();
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> drmDirectSubmission(commandStreamReceiver);
drmDirectSubmission.completionFenceAllocation = commandStreamReceiver.getTagAllocation();
EXPECT_TRUE(drmDirectSubmission.allocateResources());
auto ringBuffer = static_cast<DrmAllocation *>(drmDirectSubmission.ringBuffers[drmDirectSubmission.currentRingBuffer].ringBuffer);
auto initialBO = ringBuffer->getBufferObjectToModify(0);
MockBufferObject mockBO(drm);
ringBuffer->getBufferObjectToModify(0) = &mockBO;
mockBO.execReturnValue = 1;
drmDirectSubmission.completionFenceValue = 1u;
EXPECT_FALSE(drmDirectSubmission.submit(gpuAddress, size));
EXPECT_EQ(1u, drmDirectSubmission.completionFenceValue);
ringBuffer->getBufferObjectToModify(0) = initialBO;
}
HWTEST_F(DrmDirectSubmissionTest, givenTile0AndCompletionFenceSupportWhenSubmittingThenCompletionAddressAndValueArePassedToExec) {
uint64_t gpuAddress = 0x1000;
size_t size = 0x1000;