Fix for task count hang issue

Signed-off-by: John Falkowski <john.falkowski@intel.com>
This commit is contained in:
John Falkowski
2022-04-25 20:10:37 +00:00
committed by Compute-Runtime-Automation
parent 56a164ffcd
commit 340ba8bf56
8 changed files with 124 additions and 28 deletions

View File

@ -96,6 +96,8 @@ NEO::SubmissionStatus CommandQueueImp::submitBatchBuffer(size_t offset, NEO::Res
csr->setActivePartitions(partitionCount);
auto ret = csr->submitBatchBuffer(batchBuffer, csr->getResidencyAllocations());
if (ret != NEO::SubmissionStatus::SUCCESS) {
commandStream->getGraphicsAllocation()->updateTaskCount(csr->peekTaskCount(), csr->getOsContext().getContextId());
commandStream->getGraphicsAllocation()->updateResidencyTaskCount(csr->peekTaskCount(), csr->getOsContext().getContextId());
return ret;
}

View File

@ -22,6 +22,7 @@
#include "shared/source/helpers/hw_info.h"
#include "shared/source/helpers/pipe_control_args.h"
#include "shared/source/helpers/preamble.h"
#include "shared/source/memory_manager/graphics_allocation.h"
#include "shared/source/memory_manager/memory_manager.h"
#include "shared/source/memory_manager/residency_container.h"
#include "shared/source/os_interface/hw_info_config.h"
@ -475,27 +476,30 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
csr->setLatestFlushedTaskCount(this->taskCount);
}
csr->makeSurfacePackNonResident(csr->getResidencyAllocations());
ze_result_t retVal = ZE_RESULT_SUCCESS;
if (getSynchronousMode() == ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS) {
const auto synchronizeResult = this->synchronize(std::numeric_limits<uint64_t>::max());
if (synchronizeResult == ZE_RESULT_ERROR_DEVICE_LOST) {
return ZE_RESULT_ERROR_DEVICE_LOST;
retVal = ZE_RESULT_ERROR_DEVICE_LOST;
}
} else {
csr->pollForCompletion();
}
this->heapContainer.clear();
csr->pollForCompletion();
if (ret != NEO::SubmissionStatus::SUCCESS) {
if ((ret != NEO::SubmissionStatus::SUCCESS) || (retVal == ZE_RESULT_ERROR_DEVICE_LOST)) {
for (auto &gfx : csr->getResidencyAllocations()) {
gfx->updateTaskCount(csr->peekLatestFlushedTaskCount(), csr->getOsContext().getContextId());
}
if (retVal != ZE_RESULT_ERROR_DEVICE_LOST) {
retVal = ZE_RESULT_ERROR_UNKNOWN;
}
if (ret == NEO::SubmissionStatus::OUT_OF_MEMORY) {
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
retVal = ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
}
return ZE_RESULT_ERROR_UNKNOWN;
}
return ZE_RESULT_SUCCESS;
csr->makeSurfacePackNonResident(csr->getResidencyAllocations());
return retVal;
}
template <GFXCORE_FAMILY gfxCoreFamily>

View File

@ -353,11 +353,15 @@ HWTEST_F(CommandQueueCreate, givenContainerWithAllocationsWhenResidencyContainer
false,
returnValue));
ResidencyContainer container;
commandQueue->submitBatchBuffer(0, container, nullptr, false);
uint32_t peekTaskCountBefore = commandQueue->csr->peekTaskCount();
uint32_t flushedTaskCountBefore = commandQueue->csr->peekLatestFlushedTaskCount();
NEO::SubmissionStatus ret = commandQueue->submitBatchBuffer(0, container, nullptr, false);
EXPECT_EQ(csr->makeResidentCalledTimes, 0u);
EXPECT_EQ(commandQueue->commandStream->getGraphicsAllocation()->getTaskCount(commandQueue->csr->getOsContext().getContextId()), commandQueue->csr->peekTaskCount());
EXPECT_EQ(ret, NEO::SubmissionStatus::SUCCESS);
EXPECT_EQ((peekTaskCountBefore + 1), commandQueue->csr->peekTaskCount());
EXPECT_EQ((flushedTaskCountBefore + 1), commandQueue->csr->peekLatestFlushedTaskCount());
EXPECT_EQ(commandQueue->commandStream->getGraphicsAllocation()->getTaskCount(commandQueue->csr->getOsContext().getContextId()), commandQueue->csr->peekTaskCount());
EXPECT_EQ(commandQueue->commandStream->getGraphicsAllocation()->getResidencyTaskCount(commandQueue->csr->getOsContext().getContextId()), commandQueue->csr->peekTaskCount());
commandQueue->destroy();
}
@ -374,9 +378,14 @@ HWTEST_F(CommandQueueCreate, givenCommandStreamReceiverFailsThenSubmitBatchBuffe
false,
returnValue));
ResidencyContainer container;
uint32_t peekTaskCountBefore = commandQueue->csr->peekTaskCount();
uint32_t flushedTaskCountBefore = commandQueue->csr->peekLatestFlushedTaskCount();
NEO::SubmissionStatus ret = commandQueue->submitBatchBuffer(0, container, nullptr, false);
EXPECT_EQ(ret, NEO::SubmissionStatus::FAILED);
EXPECT_EQ(peekTaskCountBefore, commandQueue->csr->peekTaskCount());
EXPECT_EQ(flushedTaskCountBefore, commandQueue->csr->peekLatestFlushedTaskCount());
EXPECT_EQ(commandQueue->commandStream->getGraphicsAllocation()->getTaskCount(commandQueue->csr->getOsContext().getContextId()), commandQueue->csr->peekTaskCount());
EXPECT_EQ(commandQueue->commandStream->getGraphicsAllocation()->getResidencyTaskCount(commandQueue->csr->getOsContext().getContextId()), commandQueue->csr->peekTaskCount());
commandQueue->destroy();
}
@ -1517,6 +1526,57 @@ HWTEST2_F(ExecuteCommandListTests, givenFailingSubmitBatchBufferThenExecuteComma
commandList->destroy();
}
HWTEST2_F(ExecuteCommandListTests, givenFailingSubmitBatchBufferThenResetGraphicsTaskCounts, IsAtLeastSkl) {
ze_command_queue_desc_t desc = {};
NEO::CommandStreamReceiver *csr;
device->getCsrForOrdinalAndIndex(&csr, 0u, 0u);
auto commandQueue = new MockCommandQueueSubmitBatchBuffer<gfxCoreFamily>(device, csr, &desc);
commandQueue->submitBatchBufferResult = NEO::SubmissionStatus::FAILED;
commandQueue->initialize(false, false);
auto commandList = new CommandListCoreFamily<gfxCoreFamily>();
commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
auto commandListHandle = commandList->toHandle();
void *alloc = alignedMalloc(0x100, 0x100);
NEO::GraphicsAllocation graphicsAllocation1(0, NEO::AllocationType::BUFFER, alloc, 0u, 0u, 1u, MemoryPool::System4KBPages, 1u);
NEO::GraphicsAllocation graphicsAllocation2(0, NEO::AllocationType::BUFFER, alloc, 0u, 0u, 1u, MemoryPool::System4KBPages, 1u);
graphicsAllocation1.updateTaskCount(3, csr->getOsContext().getContextId());
graphicsAllocation2.updateTaskCount(3, csr->getOsContext().getContextId());
commandList->commandContainer.addToResidencyContainer(&graphicsAllocation1);
commandList->commandContainer.addToResidencyContainer(&graphicsAllocation2);
auto res = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
EXPECT_EQ(ZE_RESULT_ERROR_UNKNOWN, res);
EXPECT_EQ(0u, graphicsAllocation1.getTaskCount(csr->getOsContext().getContextId()));
EXPECT_EQ(0u, graphicsAllocation2.getTaskCount(csr->getOsContext().getContextId()));
commandQueue->destroy();
commandList->destroy();
alignedFree(alloc);
}
HWTEST2_F(ExecuteCommandListTests, givenFailingSubmitBatchBufferThenWaitForCompletionFalse, IsAtLeastSkl) {
ze_command_queue_desc_t desc = {};
NEO::CommandStreamReceiver *csr;
device->getCsrForOrdinalAndIndex(&csr, 0u, 0u);
auto commandQueue = new MockCommandQueueSubmitBatchBuffer<gfxCoreFamily>(device, csr, &desc);
commandQueue->submitBatchBufferResult = NEO::SubmissionStatus::FAILED;
commandQueue->initialize(false, false);
auto commandList = new CommandListCoreFamily<gfxCoreFamily>();
commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
auto commandListHandle = commandList->toHandle();
uint32_t flushedTaskCountPrior = csr->peekTaskCount();
csr->setLatestFlushedTaskCount(flushedTaskCountPrior);
auto res = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
EXPECT_EQ(ZE_RESULT_ERROR_UNKNOWN, res);
EXPECT_EQ(csr->peekLatestFlushedTaskCount(), flushedTaskCountPrior);
commandQueue->destroy();
commandList->destroy();
}
HWTEST2_F(ExecuteCommandListTests, givenSuccessfulSubmitBatchBufferThenExecuteCommandListReturnsSuccess, IsAtLeastSkl) {
ze_command_queue_desc_t desc = {};
NEO::CommandStreamReceiver *csr;

View File

@ -103,6 +103,10 @@ SubmissionStatus CommandStreamReceiver::submitBatchBuffer(BatchBuffer &batchBuff
this->latestSentTaskCount = taskCount + 1;
SubmissionStatus retVal = this->flush(batchBuffer, allocationsForResidency);
if (retVal != NEO::SubmissionStatus::SUCCESS) {
return retVal;
}
if (!isUpdateTagFromWaitEnabled()) {
this->latestFlushedTaskCount = taskCount + 1;
}

View File

@ -246,11 +246,14 @@ int DrmCommandStreamReceiver<GfxFamily>::exec(const BatchBuffer &batchBuffer, ui
template <typename GfxFamily>
void DrmCommandStreamReceiver<GfxFamily>::processResidency(const ResidencyContainer &inputAllocationsForResidency, uint32_t handleId) {
if ((!drm->isVmBindAvailable()) || (DebugManager.flags.PassBoundBOToExec.get() == 1)) {
for (auto &alloc : inputAllocationsForResidency) {
auto drmAlloc = static_cast<DrmAllocation *>(alloc);
drmAlloc->makeBOsResident(osContext, handleId, &this->residency, false);
}
}
}
template <typename GfxFamily>
void DrmCommandStreamReceiver<GfxFamily>::makeNonResident(GraphicsAllocation &gfxAllocation) {

View File

@ -104,6 +104,7 @@ MemoryOperationsStatus DrmMemoryOperationsHandlerBind::isResident(Device *device
}
MemoryOperationsStatus DrmMemoryOperationsHandlerBind::mergeWithResidencyContainer(OsContext *osContext, ResidencyContainer &residencyContainer) {
if (DebugManager.flags.MakeEachAllocationResident.get() == 2) {
auto memoryManager = static_cast<DrmMemoryManager *>(this->rootDeviceEnvironment.executionEnvironment.memoryManager.get());
@ -117,15 +118,6 @@ MemoryOperationsStatus DrmMemoryOperationsHandlerBind::mergeWithResidencyContain
return retVal;
}
auto clearContainer = true;
if (DebugManager.flags.PassBoundBOToExec.get() != -1) {
clearContainer = !DebugManager.flags.PassBoundBOToExec.get();
}
if (clearContainer) {
residencyContainer.clear();
}
return MemoryOperationsStatus::SUCCESS;
}

View File

@ -197,6 +197,15 @@ class MockCommandStreamReceiverWithOutOfMemorySubmitBatch : public MockCommandSt
}
};
class MockCommandStreamReceiverWithFailingFlush : public MockCommandStreamReceiver {
public:
MockCommandStreamReceiverWithFailingFlush(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield)
: MockCommandStreamReceiver(executionEnvironment, rootDeviceIndex, deviceBitfield) {}
SubmissionStatus flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override {
return SubmissionStatus::FAILED;
}
};
template <typename GfxFamily>
class MockCsrHw2 : public CommandStreamReceiverHw<GfxFamily> {
public:

View File

@ -492,6 +492,28 @@ TEST(CommandStreamReceiverSimpleTest, givenCsrWhenSubmitiingBatchBufferThenTaskC
executionEnvironment.memoryManager->freeGraphicsMemoryImpl(commandBuffer);
}
TEST(CommandStreamReceiverSimpleTest, givenCsrWhenSubmittingBatchBufferAndFlushFailThenTaskCountIsNotIncremented) {
MockExecutionEnvironment executionEnvironment;
executionEnvironment.prepareRootDeviceEnvironments(1);
executionEnvironment.initializeMemoryManager();
DeviceBitfield deviceBitfield(1);
MockCommandStreamReceiverWithFailingFlush csr(executionEnvironment, 0, deviceBitfield);
GraphicsAllocation *commandBuffer = executionEnvironment.memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{csr.getRootDeviceIndex(), MemoryConstants::pageSize});
ASSERT_NE(nullptr, commandBuffer);
LinearStream cs(commandBuffer);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false};
ResidencyContainer residencyList;
auto expectedTaskCount = csr.peekTaskCount();
csr.submitBatchBuffer(batchBuffer, residencyList);
EXPECT_EQ(expectedTaskCount, csr.peekTaskCount());
EXPECT_EQ(expectedTaskCount, csr.peekLatestFlushedTaskCount());
executionEnvironment.memoryManager->freeGraphicsMemoryImpl(commandBuffer);
}
HWTEST_F(CommandStreamReceiverTest, givenUpdateTaskCountFromWaitWhenSubmitiingBatchBufferThenTaskCountIsIncrementedAndLatestsValuesSetCorrectly) {
DebugManagerStateRestore restorer;
DebugManager.flags.UpdateTaskCountFromWait.set(3);