fix: only increase fence/task count when submit task success

Related-To: NEO-7812

Signed-off-by: Pan Zhenjie <zhenjie.pan@intel.com>
This commit is contained in:
Zhenjie Pan
2023-03-15 15:10:06 +00:00
committed by Compute-Runtime-Automation
parent d8f99161dd
commit 820a189c52
6 changed files with 104 additions and 10 deletions

View File

@@ -430,7 +430,9 @@ WaitStatus CommandStreamReceiver::baseWaitFunction(volatile TagAddressType *poll
TaskCountType latestSentTaskCount = this->latestFlushedTaskCount; TaskCountType latestSentTaskCount = this->latestFlushedTaskCount;
if (latestSentTaskCount < taskCountToWait) { if (latestSentTaskCount < taskCountToWait) {
this->flushTagUpdate(); if (this->flushTagUpdate() != NEO::SubmissionStatus::SUCCESS) {
return WaitStatus::NotReady;
}
} }
volatile TagAddressType *partitionAddress = pollAddress; volatile TagAddressType *partitionAddress = pollAddress;

View File

@@ -191,6 +191,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
void handleFrontEndStateTransition(const DispatchFlags &dispatchFlags); void handleFrontEndStateTransition(const DispatchFlags &dispatchFlags);
void handlePipelineSelectStateTransition(const DispatchFlags &dispatchFlags); void handlePipelineSelectStateTransition(const DispatchFlags &dispatchFlags);
void handleStateBaseAddressStateTransition(const DispatchFlags &dispatchFlags, bool &isStateBaseAddressDirty); void handleStateBaseAddressStateTransition(const DispatchFlags &dispatchFlags, bool &isStateBaseAddressDirty);
void updateStreamTaskCount(LinearStream &stream, TaskCountType newTaskCount);
bool bcsRelaxedOrderingAllowed(const BlitPropertiesContainer &blitPropertiesContainer, bool hasStallingCmds) const; bool bcsRelaxedOrderingAllowed(const BlitPropertiesContainer &blitPropertiesContainer, bool hasStallingCmds) const;

View File

@@ -241,11 +241,11 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushBcsTask(LinearStream &c
streamToSubmit.getUsed(), &streamToSubmit, bbEndLocation, this->getNumClients(), (submitCSR || dispatchBcsFlags.hasStallingCmds), streamToSubmit.getUsed(), &streamToSubmit, bbEndLocation, this->getNumClients(), (submitCSR || dispatchBcsFlags.hasStallingCmds),
dispatchBcsFlags.hasRelaxedOrderingDependencies}; dispatchBcsFlags.hasRelaxedOrderingDependencies};
streamToSubmit.getGraphicsAllocation()->updateTaskCount(this->taskCount + 1, this->osContext->getContextId()); updateStreamTaskCount(streamToSubmit, taskCount + 1);
streamToSubmit.getGraphicsAllocation()->updateResidencyTaskCount(this->taskCount + 1, this->osContext->getContextId());
auto submissionStatus = flushHandler(batchBuffer, this->getResidencyAllocations()); auto submissionStatus = flushHandler(batchBuffer, this->getResidencyAllocations());
if (submissionStatus != SubmissionStatus::SUCCESS) { if (submissionStatus != SubmissionStatus::SUCCESS) {
updateStreamTaskCount(streamToSubmit, taskCount);
CompletionStamp completionStamp = {CompletionStamp::getTaskCountFromSubmissionStatusError(submissionStatus)}; CompletionStamp completionStamp = {CompletionStamp::getTaskCountFromSubmissionStatusError(submissionStatus)};
return completionStamp; return completionStamp;
} }
@@ -723,13 +723,14 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
dispatchFlags.requiresCoherency, dispatchFlags.lowPriority, dispatchFlags.throttle, dispatchFlags.sliceCount, dispatchFlags.requiresCoherency, dispatchFlags.lowPriority, dispatchFlags.throttle, dispatchFlags.sliceCount,
streamToSubmit.getUsed(), &streamToSubmit, bbEndLocation, this->getNumClients(), (submitCSR || dispatchFlags.hasStallingCmds || hasStallingCmdsOnTaskStream), streamToSubmit.getUsed(), &streamToSubmit, bbEndLocation, this->getNumClients(), (submitCSR || dispatchFlags.hasStallingCmds || hasStallingCmdsOnTaskStream),
dispatchFlags.hasRelaxedOrderingDependencies}; dispatchFlags.hasRelaxedOrderingDependencies};
streamToSubmit.getGraphicsAllocation()->updateTaskCount(this->taskCount + 1, this->osContext->getContextId());
streamToSubmit.getGraphicsAllocation()->updateResidencyTaskCount(this->taskCount + 1, this->osContext->getContextId()); updateStreamTaskCount(streamToSubmit, taskCount + 1);
if (submitCSR || submitTask) { if (submitCSR || submitTask) {
if (this->dispatchMode == DispatchMode::ImmediateDispatch) { if (this->dispatchMode == DispatchMode::ImmediateDispatch) {
auto submissionStatus = flushHandler(batchBuffer, this->getResidencyAllocations()); auto submissionStatus = flushHandler(batchBuffer, this->getResidencyAllocations());
if (submissionStatus != SubmissionStatus::SUCCESS) { if (submissionStatus != SubmissionStatus::SUCCESS) {
updateStreamTaskCount(streamToSubmit, taskCount);
CompletionStamp completionStamp = {CompletionStamp::getTaskCountFromSubmissionStatusError(submissionStatus)}; CompletionStamp completionStamp = {CompletionStamp::getTaskCountFromSubmissionStatusError(submissionStatus)};
return completionStamp; return completionStamp;
} }
@@ -1329,11 +1330,11 @@ TaskCountType CommandStreamReceiverHw<GfxFamily>::flushBcsTask(const BlitPropert
BatchBuffer batchBuffer{commandStream.getGraphicsAllocation(), commandStreamStart, 0, taskStartAddress, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, BatchBuffer batchBuffer{commandStream.getGraphicsAllocation(), commandStreamStart, 0, taskStartAddress, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount,
commandStream.getUsed(), &commandStream, endingCmdPtr, this->getNumClients(), hasStallingCmds, isRelaxedOrderingDispatch}; commandStream.getUsed(), &commandStream, endingCmdPtr, this->getNumClients(), hasStallingCmds, isRelaxedOrderingDispatch};
commandStream.getGraphicsAllocation()->updateTaskCount(newTaskCount, this->osContext->getContextId()); updateStreamTaskCount(commandStream, newTaskCount);
commandStream.getGraphicsAllocation()->updateResidencyTaskCount(newTaskCount, this->osContext->getContextId());
auto flushSubmissionStatus = flush(batchBuffer, getResidencyAllocations()); auto flushSubmissionStatus = flush(batchBuffer, getResidencyAllocations());
if (flushSubmissionStatus != SubmissionStatus::SUCCESS) { if (flushSubmissionStatus != SubmissionStatus::SUCCESS) {
updateStreamTaskCount(commandStream, taskCount);
return CompletionStamp::getTaskCountFromSubmissionStatusError(flushSubmissionStatus); return CompletionStamp::getTaskCountFromSubmissionStatusError(flushSubmissionStatus);
} }
makeSurfacePackNonResident(getResidencyAllocations(), true); makeSurfacePackNonResident(getResidencyAllocations(), true);
@@ -1447,7 +1448,9 @@ SubmissionStatus CommandStreamReceiverHw<GfxFamily>::flushSmallTask(LinearStream
this->latestSentTaskCount = taskCount + 1; this->latestSentTaskCount = taskCount + 1;
auto submissionStatus = flushHandler(batchBuffer, getResidencyAllocations()); auto submissionStatus = flushHandler(batchBuffer, getResidencyAllocations());
taskCount++; if (submissionStatus == SubmissionStatus::SUCCESS) {
taskCount++;
}
return submissionStatus; return submissionStatus;
} }
@@ -1743,4 +1746,10 @@ inline void CommandStreamReceiverHw<GfxFamily>::handleStateBaseAddressStateTrans
} }
} }
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::updateStreamTaskCount(LinearStream &stream, TaskCountType newTaskCount) {
stream.getGraphicsAllocation()->updateTaskCount(newTaskCount, this->osContext->getContextId());
stream.getGraphicsAllocation()->updateResidencyTaskCount(newTaskCount, this->osContext->getContextId());
}
} // namespace NEO } // namespace NEO

View File

@@ -126,7 +126,7 @@ bool DrmDirectSubmission<GfxFamily, Dispatcher>::submit(uint64_t gpuAddress, siz
TaskCountType completionValue = 0u; TaskCountType completionValue = 0u;
uint64_t completionFenceGpuAddress = 0u; uint64_t completionFenceGpuAddress = 0u;
if (this->isCompletionFenceSupported()) { if (this->isCompletionFenceSupported()) {
completionValue = ++completionFenceValue; completionValue = completionFenceValue + 1;
completionFenceGpuAddress = this->completionFenceAllocation->getGpuAddress() + TagAllocationLayout::completionFenceOffset; completionFenceGpuAddress = this->completionFenceAllocation->getGpuAddress() + TagAllocationLayout::completionFenceOffset;
} }
@@ -155,6 +155,10 @@ bool DrmDirectSubmission<GfxFamily, Dispatcher>::submit(uint64_t gpuAddress, siz
} }
} }
if (this->isCompletionFenceSupported() && ret) {
completionFenceValue++;
}
return ret; return ret;
} }

View File

@@ -392,7 +392,7 @@ HWTEST_F(CommandStreamReceiverTest, givenFailingFlushSubmissionsAndNoGpuHangWhen
EXPECT_EQ(WaitStatus::NotReady, waitStatus); EXPECT_EQ(WaitStatus::NotReady, waitStatus);
} }
HWTEST_F(CommandStreamReceiverTest, givenGpuHangWhenWaititingForTaskCountThenGpuHangIsReturned) { HWTEST_F(CommandStreamReceiverTest, givenGpuHangWhenWaitingForTaskCountThenGpuHangIsReturned) {
auto driverModelMock = std::make_unique<MockDriverModel>(); auto driverModelMock = std::make_unique<MockDriverModel>();
driverModelMock->isGpuHangDetectedToReturn = true; driverModelMock->isGpuHangDetectedToReturn = true;
@@ -413,6 +413,32 @@ HWTEST_F(CommandStreamReceiverTest, givenGpuHangWhenWaititingForTaskCountThenGpu
EXPECT_TRUE(csr.downloadAllocationCalled); EXPECT_TRUE(csr.downloadAllocationCalled);
} }
HWTEST_F(CommandStreamReceiverTest, givenFlushUnsuccessWhenWaitingForTaskCountThenNotReadyIsReturned) {
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
csr.activePartitions = 1;
constexpr auto taskCountToWait = 1;
csr.flushReturnValue = SubmissionStatus::FAILED;
auto waitStatus = csr.waitForTaskCount(taskCountToWait);
EXPECT_EQ(WaitStatus::NotReady, waitStatus);
csr.flushReturnValue = SubmissionStatus::OUT_OF_MEMORY;
waitStatus = csr.waitForTaskCount(taskCountToWait);
EXPECT_EQ(WaitStatus::NotReady, waitStatus);
csr.flushReturnValue = SubmissionStatus::OUT_OF_HOST_MEMORY;
waitStatus = csr.waitForTaskCount(taskCountToWait);
EXPECT_EQ(WaitStatus::NotReady, waitStatus);
csr.flushReturnValue = SubmissionStatus::UNSUPPORTED;
waitStatus = csr.waitForTaskCount(taskCountToWait);
EXPECT_EQ(WaitStatus::NotReady, waitStatus);
csr.flushReturnValue = SubmissionStatus::DEVICE_UNINITIALIZED;
waitStatus = csr.waitForTaskCount(taskCountToWait);
EXPECT_EQ(WaitStatus::NotReady, waitStatus);
}
HWTEST_F(CommandStreamReceiverTest, whenDownloadTagAllocationThenDonwloadOnlyIfTagAllocationWasFlushed) { HWTEST_F(CommandStreamReceiverTest, whenDownloadTagAllocationThenDonwloadOnlyIfTagAllocationWasFlushed) {
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>(); auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
csr.activePartitions = 1; csr.activePartitions = 1;
@@ -2709,6 +2735,32 @@ HWTEST_F(CommandStreamReceiverHwTest, givenFailedFailureOnFlushWhenFlushingTaskT
EXPECT_EQ(CompletionStamp::failed, completionStamp.taskCount); EXPECT_EQ(CompletionStamp::failed, completionStamp.taskCount);
} }
HWTEST_F(CommandStreamReceiverHwTest, givenUnsuccessOnFlushWhenFlushingSmallTaskThenTaskCountIsNotIncreased) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
auto &stream = commandStreamReceiver.getCS(4096u);
commandStreamReceiver.taskCount = 1u;
commandStreamReceiver.flushReturnValue = SubmissionStatus::FAILED;
commandStreamReceiver.flushSmallTask(stream, stream.getUsed());
EXPECT_EQ(1u, commandStreamReceiver.taskCount);
commandStreamReceiver.flushReturnValue = SubmissionStatus::OUT_OF_MEMORY;
commandStreamReceiver.flushSmallTask(stream, stream.getUsed());
EXPECT_EQ(1u, commandStreamReceiver.taskCount);
commandStreamReceiver.flushReturnValue = SubmissionStatus::OUT_OF_HOST_MEMORY;
commandStreamReceiver.flushSmallTask(stream, stream.getUsed());
EXPECT_EQ(1u, commandStreamReceiver.taskCount);
commandStreamReceiver.flushReturnValue = SubmissionStatus::UNSUPPORTED;
commandStreamReceiver.flushSmallTask(stream, stream.getUsed());
EXPECT_EQ(1u, commandStreamReceiver.taskCount);
commandStreamReceiver.flushReturnValue = SubmissionStatus::DEVICE_UNINITIALIZED;
commandStreamReceiver.flushSmallTask(stream, stream.getUsed());
EXPECT_EQ(1u, commandStreamReceiver.taskCount);
}
HWTEST_F(CommandStreamReceiverHwTest, givenOutOfMemoryFailureOnFlushWhenFlushingMiDWThenErrorIsPropagated) { HWTEST_F(CommandStreamReceiverHwTest, givenOutOfMemoryFailureOnFlushWhenFlushingMiDWThenErrorIsPropagated) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>(); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();

View File

@@ -399,6 +399,32 @@ HWTEST_F(DrmDirectSubmissionTest, givenNoCompletionFenceSupportAndExecFailureWhe
ringBuffer->getBufferObjectToModify(0) = initialBO; ringBuffer->getBufferObjectToModify(0) = initialBO;
} }
HWTEST_F(DrmDirectSubmissionTest, givenCompletionFenceSupportAndExecFailureWhenSubmittingThenCompletionFenceValueIsNotIncreased) {
uint64_t gpuAddress = 0x1000;
size_t size = 0x1000;
DebugManagerStateRestore restorer;
DebugManager.flags.EnableDrmCompletionFence.set(1);
auto &commandStreamReceiver = *device->getDefaultEngine().commandStreamReceiver;
auto drm = executionEnvironment.rootDeviceEnvironments[0]->osInterface->getDriverModel()->as<Drm>();
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> drmDirectSubmission(commandStreamReceiver);
drmDirectSubmission.completionFenceAllocation = commandStreamReceiver.getTagAllocation();
EXPECT_TRUE(drmDirectSubmission.allocateResources());
auto ringBuffer = static_cast<DrmAllocation *>(drmDirectSubmission.ringBuffers[drmDirectSubmission.currentRingBuffer].ringBuffer);
auto initialBO = ringBuffer->getBufferObjectToModify(0);
MockBufferObject mockBO(drm);
ringBuffer->getBufferObjectToModify(0) = &mockBO;
mockBO.execReturnValue = 1;
drmDirectSubmission.completionFenceValue = 1u;
EXPECT_FALSE(drmDirectSubmission.submit(gpuAddress, size));
EXPECT_EQ(1u, drmDirectSubmission.completionFenceValue);
ringBuffer->getBufferObjectToModify(0) = initialBO;
}
HWTEST_F(DrmDirectSubmissionTest, givenTile0AndCompletionFenceSupportWhenSubmittingThenCompletionAddressAndValueArePassedToExec) { HWTEST_F(DrmDirectSubmissionTest, givenTile0AndCompletionFenceSupportWhenSubmittingThenCompletionAddressAndValueArePassedToExec) {
uint64_t gpuAddress = 0x1000; uint64_t gpuAddress = 0x1000;
size_t size = 0x1000; size_t size = 0x1000;