refactor: unify immediate cmd list wait methods

Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz 2023-08-09 13:02:46 +00:00 committed by Compute-Runtime-Automation
parent 77319f886a
commit d5565a68e2
5 changed files with 37 additions and 41 deletions

View File

@ -8,6 +8,7 @@
#pragma once #pragma once
#include "shared/source/command_stream/csr_definitions.h" #include "shared/source/command_stream/csr_definitions.h"
#include "shared/source/command_stream/task_count_helper.h"
#include "level_zero/core/source/cmdlist/cmdlist_hw.h" #include "level_zero/core/source/cmdlist/cmdlist_hw.h"
@ -184,6 +185,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
void printKernelsPrintfOutput(bool hangDetected); void printKernelsPrintfOutput(bool hangDetected);
ze_result_t synchronizeInOrderExecution(uint64_t timeout) const; ze_result_t synchronizeInOrderExecution(uint64_t timeout) const;
ze_result_t hostSynchronize(uint64_t timeout, TaskCountType taskCount, bool handlePostWaitOperations);
bool hasStallingCmdsForRelaxedOrdering(uint32_t numWaitEvents, bool relaxedOrderingDispatch) const; bool hasStallingCmdsForRelaxedOrdering(uint32_t numWaitEvents, bool relaxedOrderingDispatch) const;
void setupFlushMethod(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) override; void setupFlushMethod(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) override;
void handleInOrderDependencyCounter(); void handleInOrderDependencyCounter();

View File

@ -380,17 +380,10 @@ inline ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommand
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY; return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
} }
ze_result_t status = ZE_RESULT_SUCCESS;
if (this->isSyncModeQueue || this->printfKernelContainer.size() > 0u) { if (this->isSyncModeQueue || this->printfKernelContainer.size() > 0u) {
auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout; status = hostSynchronize(std::numeric_limits<uint64_t>::max(), completionStamp.taskCount, true);
const auto waitStatus = csr->waitForCompletionWithTimeout(NEO::WaitParams{false, false, timeoutMicroseconds}, completionStamp.taskCount);
if (waitStatus == NEO::WaitStatus::GpuHang) {
this->printKernelsPrintfOutput(true);
this->checkAssert();
return ZE_RESULT_ERROR_DEVICE_LOST;
}
csr->getInternalAllocationStorage()->cleanAllocationList(completionStamp.taskCount, NEO::AllocationUsage::TEMPORARY_ALLOCATION);
this->printKernelsPrintfOutput(false);
this->checkAssert();
} }
this->cmdListCurrentStartOffset = commandStream->getUsed(); this->cmdListCurrentStartOffset = commandStream->getUsed();
@ -402,7 +395,7 @@ inline ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommand
this->device->getNEODevice()->debugExecutionCounter++; this->device->getNEODevice()->debugExecutionCounter++;
} }
return ZE_RESULT_SUCCESS; return status;
} }
template <GFXCORE_FAMILY gfxCoreFamily> template <GFXCORE_FAMILY gfxCoreFamily>
@ -852,33 +845,39 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchCooperati
} }
template <GFXCORE_FAMILY gfxCoreFamily> template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::hostSynchronize(uint64_t timeout) { ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::hostSynchronize(uint64_t timeout, TaskCountType taskCount, bool handlePostWaitOperations) {
auto syncTaskCount = this->csr->peekTaskCount();
ze_result_t status = ZE_RESULT_SUCCESS; ze_result_t status = ZE_RESULT_SUCCESS;
if (isInOrderExecutionEnabled()) { if (isInOrderExecutionEnabled()) {
status = synchronizeInOrderExecution(timeout); status = synchronizeInOrderExecution(timeout);
} else if (this->isFlushTaskSubmissionEnabled && !this->isSyncModeQueue) { } else {
const int64_t timeoutInMicroSeconds = timeout / 1000; const int64_t timeoutInMicroSeconds = timeout / 1000;
const auto indefinitelyPoll = timeout == std::numeric_limits<uint64_t>::max(); const auto indefinitelyPoll = timeout == std::numeric_limits<uint64_t>::max();
const auto waitStatus = this->csr->waitForCompletionWithTimeout(NEO::WaitParams{indefinitelyPoll, !indefinitelyPoll, timeoutInMicroSeconds}, const auto waitStatus = this->csr->waitForCompletionWithTimeout(NEO::WaitParams{indefinitelyPoll, !indefinitelyPoll, timeoutInMicroSeconds},
syncTaskCount); taskCount);
if (waitStatus == NEO::WaitStatus::GpuHang) { if (waitStatus == NEO::WaitStatus::GpuHang) {
status = ZE_RESULT_ERROR_DEVICE_LOST; status = ZE_RESULT_ERROR_DEVICE_LOST;
} }
} }
if (status == ZE_RESULT_SUCCESS) { if (handlePostWaitOperations) {
this->cmdQImmediate->unregisterCsrClient(); if (status == ZE_RESULT_SUCCESS) {
this->csr->getInternalAllocationStorage()->cleanAllocationList(syncTaskCount, NEO::AllocationUsage::TEMPORARY_ALLOCATION); this->cmdQImmediate->unregisterCsrClient();
this->csr->getInternalAllocationStorage()->cleanAllocationList(taskCount, NEO::AllocationUsage::TEMPORARY_ALLOCATION);
}
this->printKernelsPrintfOutput(status == ZE_RESULT_ERROR_DEVICE_LOST);
this->checkAssert();
} }
this->printKernelsPrintfOutput(status == ZE_RESULT_ERROR_DEVICE_LOST);
this->checkAssert();
return status; return status;
} }
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::hostSynchronize(uint64_t timeout) {
return hostSynchronize(timeout, this->csr->peekTaskCount(), true);
}
template <GFXCORE_FAMILY gfxCoreFamily> template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediate(ze_result_t inputRet, bool performMigration, bool hasStallingCmds, ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediate(ze_result_t inputRet, bool performMigration, bool hasStallingCmds,
bool hasRelaxedOrderingDependencies, ze_event_handle_t hSignalEvent) { bool hasRelaxedOrderingDependencies, ze_event_handle_t hSignalEvent) {
@ -1040,22 +1039,15 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::performCpuMemcpy(cons
const void *cpuMemcpySrcPtr = srcLockPointer ? srcLockPointer : cpuMemCopyInfo.srcPtr; const void *cpuMemcpySrcPtr = srcLockPointer ? srcLockPointer : cpuMemCopyInfo.srcPtr;
void *cpuMemcpyDstPtr = dstLockPointer ? dstLockPointer : cpuMemCopyInfo.dstPtr; void *cpuMemcpyDstPtr = dstLockPointer ? dstLockPointer : cpuMemCopyInfo.dstPtr;
if (this->dependenciesPresent) { if (this->dependenciesPresent || isInOrderExecutionEnabled()) {
auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout; auto waitStatus = hostSynchronize(std::numeric_limits<uint64_t>::max(), this->csr->peekTaskCount(), false);
const auto waitStatus = this->csr->waitForCompletionWithTimeout(NEO::WaitParams{false, false, timeoutMicroseconds}, this->csr->peekTaskCount());
if (waitStatus == NEO::WaitStatus::GpuHang) { if (waitStatus != ZE_RESULT_SUCCESS) {
return ZE_RESULT_ERROR_DEVICE_LOST; return waitStatus;
} }
this->dependenciesPresent = false; this->dependenciesPresent = false;
} }
if (isInOrderExecutionEnabled()) {
auto status = synchronizeInOrderExecution(std::numeric_limits<uint64_t>::max());
if (status != ZE_RESULT_SUCCESS) {
return status;
}
}
if (signalEvent) { if (signalEvent) {
CommandListImp::addToMappedEventList(signalEvent); CommandListImp::addToMappedEventList(signalEvent);
CommandListImp::storeReferenceTsToMappedEvents(true); CommandListImp::storeReferenceTsToMappedEvents(true);

View File

@ -1252,7 +1252,7 @@ HWTEST2_F(CommandListCreate, givenDirectSubmissionAndImmCmdListWhenDispatchingDi
HWTEST2_F(CommandListCreate, whenDispatchingThenPassNumCsrClients, IsAtLeastXeHpcCore) { HWTEST2_F(CommandListCreate, whenDispatchingThenPassNumCsrClients, IsAtLeastXeHpcCore) {
ze_command_queue_desc_t desc = {}; ze_command_queue_desc_t desc = {};
desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
ze_result_t returnValue; ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue));
ASSERT_NE(nullptr, commandList); ASSERT_NE(nullptr, commandList);
@ -1276,7 +1276,7 @@ HWTEST2_F(CommandListCreate, whenDispatchingThenPassNumCsrClients, IsAtLeastXeHp
HWTEST_F(CommandListCreate, givenSignalEventWhenCallingSynchronizeThenUnregisterClient) { HWTEST_F(CommandListCreate, givenSignalEventWhenCallingSynchronizeThenUnregisterClient) {
ze_command_queue_desc_t desc = {}; ze_command_queue_desc_t desc = {};
desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
ze_result_t returnValue; ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue));
ASSERT_NE(nullptr, commandList); ASSERT_NE(nullptr, commandList);
@ -1345,7 +1345,7 @@ HWTEST_F(CommandListCreate, givenDebugFlagSetWhenCallingSynchronizeThenDontUnreg
DebugManager.flags.TrackNumCsrClientsOnSyncPoints.set(0); DebugManager.flags.TrackNumCsrClientsOnSyncPoints.set(0);
ze_command_queue_desc_t desc = {}; ze_command_queue_desc_t desc = {};
desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
ze_result_t returnValue; ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue));
ASSERT_NE(nullptr, commandList); ASSERT_NE(nullptr, commandList);
@ -1388,7 +1388,7 @@ HWTEST2_F(CommandListCreate, givenDirectSubmissionAndImmCmdListWhenDispatchingTh
DebugManager.flags.DirectSubmissionRelaxedOrdering.set(1); DebugManager.flags.DirectSubmissionRelaxedOrdering.set(1);
ze_command_queue_desc_t desc = {}; ze_command_queue_desc_t desc = {};
desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
ze_result_t returnValue; ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue));
ASSERT_NE(nullptr, commandList); ASSERT_NE(nullptr, commandList);

View File

@ -2966,7 +2966,7 @@ HWTEST2_F(ImmediateCommandListHostSynchronize, givenFlushTaskEnabledAndNotSyncMo
EXPECT_EQ(waitForFlushTagUpdateCalled, 1u); EXPECT_EQ(waitForFlushTagUpdateCalled, 1u);
} }
HWTEST2_F(ImmediateCommandListHostSynchronize, givenSyncModeThenWaitForCompletionIsNotCalled, IsAtLeastSkl) { HWTEST2_F(ImmediateCommandListHostSynchronize, givenSyncModeThenWaitForCompletionIsCalled, IsAtLeastSkl) {
auto csr = static_cast<NEO::UltCommandStreamReceiver<FamilyType> *>(device->getNEODevice()->getInternalEngine().commandStreamReceiver); auto csr = static_cast<NEO::UltCommandStreamReceiver<FamilyType> *>(device->getNEODevice()->getInternalEngine().commandStreamReceiver);
auto cmdList = createCmdList<gfxCoreFamily>(csr); auto cmdList = createCmdList<gfxCoreFamily>(csr);
@ -2975,10 +2975,10 @@ HWTEST2_F(ImmediateCommandListHostSynchronize, givenSyncModeThenWaitForCompletio
EXPECT_EQ(cmdList->hostSynchronize(0), ZE_RESULT_SUCCESS); EXPECT_EQ(cmdList->hostSynchronize(0), ZE_RESULT_SUCCESS);
uint32_t waitForFlushTagUpdateCalled = csr->waitForCompletionWithTimeoutTaskCountCalled; uint32_t waitForFlushTagUpdateCalled = csr->waitForCompletionWithTimeoutTaskCountCalled;
EXPECT_EQ(waitForFlushTagUpdateCalled, 0u); EXPECT_EQ(waitForFlushTagUpdateCalled, 1u);
} }
HWTEST2_F(ImmediateCommandListHostSynchronize, givenFlushTaskSubmissionIsDisabledThenWaitForCompletionIsNotCalled, IsAtLeastSkl) { HWTEST2_F(ImmediateCommandListHostSynchronize, givenFlushTaskSubmissionIsDisabledThenWaitForCompletionIsCalled, IsAtLeastSkl) {
auto csr = static_cast<NEO::UltCommandStreamReceiver<FamilyType> *>(device->getNEODevice()->getInternalEngine().commandStreamReceiver); auto csr = static_cast<NEO::UltCommandStreamReceiver<FamilyType> *>(device->getNEODevice()->getInternalEngine().commandStreamReceiver);
auto cmdList = createCmdList<gfxCoreFamily>(csr); auto cmdList = createCmdList<gfxCoreFamily>(csr);
@ -2988,7 +2988,7 @@ HWTEST2_F(ImmediateCommandListHostSynchronize, givenFlushTaskSubmissionIsDisable
EXPECT_EQ(cmdList->hostSynchronize(0), ZE_RESULT_SUCCESS); EXPECT_EQ(cmdList->hostSynchronize(0), ZE_RESULT_SUCCESS);
uint32_t waitForFlushTagUpdateCalled = csr->waitForCompletionWithTimeoutTaskCountCalled; uint32_t waitForFlushTagUpdateCalled = csr->waitForCompletionWithTimeoutTaskCountCalled;
EXPECT_EQ(waitForFlushTagUpdateCalled, 0u); EXPECT_EQ(waitForFlushTagUpdateCalled, 1u);
} }
HWTEST2_F(ImmediateCommandListHostSynchronize, givenGpuStatusIsHangThenDeviceLostIsReturned, IsAtLeastSkl) { HWTEST2_F(ImmediateCommandListHostSynchronize, givenGpuStatusIsHangThenDeviceLostIsReturned, IsAtLeastSkl) {

View File

@ -673,6 +673,8 @@ struct InOrderCmdListTests : public CommandListAppendLaunchKernel {
CommandListAppendLaunchKernel::SetUp(); CommandListAppendLaunchKernel::SetUp();
createKernel(); createKernel();
const_cast<KernelDescriptor &>(kernel->getKernelDescriptor()).kernelAttributes.flags.usesPrintf = false;
} }
void TearDown() override { void TearDown() override {