refactor: unify immediate cmd list wait methods

Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz 2023-08-09 13:02:46 +00:00 committed by Compute-Runtime-Automation
parent 77319f886a
commit d5565a68e2
5 changed files with 37 additions and 41 deletions

View File

@ -8,6 +8,7 @@
#pragma once
#include "shared/source/command_stream/csr_definitions.h"
#include "shared/source/command_stream/task_count_helper.h"
#include "level_zero/core/source/cmdlist/cmdlist_hw.h"
@ -184,6 +185,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
void printKernelsPrintfOutput(bool hangDetected);
ze_result_t synchronizeInOrderExecution(uint64_t timeout) const;
ze_result_t hostSynchronize(uint64_t timeout, TaskCountType taskCount, bool handlePostWaitOperations);
bool hasStallingCmdsForRelaxedOrdering(uint32_t numWaitEvents, bool relaxedOrderingDispatch) const;
void setupFlushMethod(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) override;
void handleInOrderDependencyCounter();

View File

@ -380,17 +380,10 @@ inline ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommand
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
}
ze_result_t status = ZE_RESULT_SUCCESS;
if (this->isSyncModeQueue || this->printfKernelContainer.size() > 0u) {
auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout;
const auto waitStatus = csr->waitForCompletionWithTimeout(NEO::WaitParams{false, false, timeoutMicroseconds}, completionStamp.taskCount);
if (waitStatus == NEO::WaitStatus::GpuHang) {
this->printKernelsPrintfOutput(true);
this->checkAssert();
return ZE_RESULT_ERROR_DEVICE_LOST;
}
csr->getInternalAllocationStorage()->cleanAllocationList(completionStamp.taskCount, NEO::AllocationUsage::TEMPORARY_ALLOCATION);
this->printKernelsPrintfOutput(false);
this->checkAssert();
status = hostSynchronize(std::numeric_limits<uint64_t>::max(), completionStamp.taskCount, true);
}
this->cmdListCurrentStartOffset = commandStream->getUsed();
@ -402,7 +395,7 @@ inline ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommand
this->device->getNEODevice()->debugExecutionCounter++;
}
return ZE_RESULT_SUCCESS;
return status;
}
template <GFXCORE_FAMILY gfxCoreFamily>
@ -852,33 +845,39 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchCooperati
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::hostSynchronize(uint64_t timeout) {
auto syncTaskCount = this->csr->peekTaskCount();
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::hostSynchronize(uint64_t timeout, TaskCountType taskCount, bool handlePostWaitOperations) {
ze_result_t status = ZE_RESULT_SUCCESS;
if (isInOrderExecutionEnabled()) {
status = synchronizeInOrderExecution(timeout);
} else if (this->isFlushTaskSubmissionEnabled && !this->isSyncModeQueue) {
} else {
const int64_t timeoutInMicroSeconds = timeout / 1000;
const auto indefinitelyPoll = timeout == std::numeric_limits<uint64_t>::max();
const auto waitStatus = this->csr->waitForCompletionWithTimeout(NEO::WaitParams{indefinitelyPoll, !indefinitelyPoll, timeoutInMicroSeconds},
syncTaskCount);
taskCount);
if (waitStatus == NEO::WaitStatus::GpuHang) {
status = ZE_RESULT_ERROR_DEVICE_LOST;
}
}
if (status == ZE_RESULT_SUCCESS) {
this->cmdQImmediate->unregisterCsrClient();
this->csr->getInternalAllocationStorage()->cleanAllocationList(syncTaskCount, NEO::AllocationUsage::TEMPORARY_ALLOCATION);
if (handlePostWaitOperations) {
if (status == ZE_RESULT_SUCCESS) {
this->cmdQImmediate->unregisterCsrClient();
this->csr->getInternalAllocationStorage()->cleanAllocationList(taskCount, NEO::AllocationUsage::TEMPORARY_ALLOCATION);
}
this->printKernelsPrintfOutput(status == ZE_RESULT_ERROR_DEVICE_LOST);
this->checkAssert();
}
this->printKernelsPrintfOutput(status == ZE_RESULT_ERROR_DEVICE_LOST);
this->checkAssert();
return status;
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::hostSynchronize(uint64_t timeout) {
return hostSynchronize(timeout, this->csr->peekTaskCount(), true);
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediate(ze_result_t inputRet, bool performMigration, bool hasStallingCmds,
bool hasRelaxedOrderingDependencies, ze_event_handle_t hSignalEvent) {
@ -1040,22 +1039,15 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::performCpuMemcpy(cons
const void *cpuMemcpySrcPtr = srcLockPointer ? srcLockPointer : cpuMemCopyInfo.srcPtr;
void *cpuMemcpyDstPtr = dstLockPointer ? dstLockPointer : cpuMemCopyInfo.dstPtr;
if (this->dependenciesPresent) {
auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout;
const auto waitStatus = this->csr->waitForCompletionWithTimeout(NEO::WaitParams{false, false, timeoutMicroseconds}, this->csr->peekTaskCount());
if (waitStatus == NEO::WaitStatus::GpuHang) {
return ZE_RESULT_ERROR_DEVICE_LOST;
if (this->dependenciesPresent || isInOrderExecutionEnabled()) {
auto waitStatus = hostSynchronize(std::numeric_limits<uint64_t>::max(), this->csr->peekTaskCount(), false);
if (waitStatus != ZE_RESULT_SUCCESS) {
return waitStatus;
}
this->dependenciesPresent = false;
}
if (isInOrderExecutionEnabled()) {
auto status = synchronizeInOrderExecution(std::numeric_limits<uint64_t>::max());
if (status != ZE_RESULT_SUCCESS) {
return status;
}
}
if (signalEvent) {
CommandListImp::addToMappedEventList(signalEvent);
CommandListImp::storeReferenceTsToMappedEvents(true);

View File

@ -1252,7 +1252,7 @@ HWTEST2_F(CommandListCreate, givenDirectSubmissionAndImmCmdListWhenDispatchingDi
HWTEST2_F(CommandListCreate, whenDispatchingThenPassNumCsrClients, IsAtLeastXeHpcCore) {
ze_command_queue_desc_t desc = {};
desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue));
ASSERT_NE(nullptr, commandList);
@ -1276,7 +1276,7 @@ HWTEST2_F(CommandListCreate, whenDispatchingThenPassNumCsrClients, IsAtLeastXeHp
HWTEST_F(CommandListCreate, givenSignalEventWhenCallingSynchronizeThenUnregisterClient) {
ze_command_queue_desc_t desc = {};
desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue));
ASSERT_NE(nullptr, commandList);
@ -1345,7 +1345,7 @@ HWTEST_F(CommandListCreate, givenDebugFlagSetWhenCallingSynchronizeThenDontUnreg
DebugManager.flags.TrackNumCsrClientsOnSyncPoints.set(0);
ze_command_queue_desc_t desc = {};
desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue));
ASSERT_NE(nullptr, commandList);
@ -1388,7 +1388,7 @@ HWTEST2_F(CommandListCreate, givenDirectSubmissionAndImmCmdListWhenDispatchingTh
DebugManager.flags.DirectSubmissionRelaxedOrdering.set(1);
ze_command_queue_desc_t desc = {};
desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue));
ASSERT_NE(nullptr, commandList);

View File

@ -2966,7 +2966,7 @@ HWTEST2_F(ImmediateCommandListHostSynchronize, givenFlushTaskEnabledAndNotSyncMo
EXPECT_EQ(waitForFlushTagUpdateCalled, 1u);
}
HWTEST2_F(ImmediateCommandListHostSynchronize, givenSyncModeThenWaitForCompletionIsNotCalled, IsAtLeastSkl) {
HWTEST2_F(ImmediateCommandListHostSynchronize, givenSyncModeThenWaitForCompletionIsCalled, IsAtLeastSkl) {
auto csr = static_cast<NEO::UltCommandStreamReceiver<FamilyType> *>(device->getNEODevice()->getInternalEngine().commandStreamReceiver);
auto cmdList = createCmdList<gfxCoreFamily>(csr);
@ -2975,10 +2975,10 @@ HWTEST2_F(ImmediateCommandListHostSynchronize, givenSyncModeThenWaitForCompletio
EXPECT_EQ(cmdList->hostSynchronize(0), ZE_RESULT_SUCCESS);
uint32_t waitForFlushTagUpdateCalled = csr->waitForCompletionWithTimeoutTaskCountCalled;
EXPECT_EQ(waitForFlushTagUpdateCalled, 0u);
EXPECT_EQ(waitForFlushTagUpdateCalled, 1u);
}
HWTEST2_F(ImmediateCommandListHostSynchronize, givenFlushTaskSubmissionIsDisabledThenWaitForCompletionIsNotCalled, IsAtLeastSkl) {
HWTEST2_F(ImmediateCommandListHostSynchronize, givenFlushTaskSubmissionIsDisabledThenWaitForCompletionIsCalled, IsAtLeastSkl) {
auto csr = static_cast<NEO::UltCommandStreamReceiver<FamilyType> *>(device->getNEODevice()->getInternalEngine().commandStreamReceiver);
auto cmdList = createCmdList<gfxCoreFamily>(csr);
@ -2988,7 +2988,7 @@ HWTEST2_F(ImmediateCommandListHostSynchronize, givenFlushTaskSubmissionIsDisable
EXPECT_EQ(cmdList->hostSynchronize(0), ZE_RESULT_SUCCESS);
uint32_t waitForFlushTagUpdateCalled = csr->waitForCompletionWithTimeoutTaskCountCalled;
EXPECT_EQ(waitForFlushTagUpdateCalled, 0u);
EXPECT_EQ(waitForFlushTagUpdateCalled, 1u);
}
HWTEST2_F(ImmediateCommandListHostSynchronize, givenGpuStatusIsHangThenDeviceLostIsReturned, IsAtLeastSkl) {

View File

@ -673,6 +673,8 @@ struct InOrderCmdListTests : public CommandListAppendLaunchKernel {
CommandListAppendLaunchKernel::SetUp();
createKernel();
const_cast<KernelDescriptor &>(kernel->getKernelDescriptor()).kernelAttributes.flags.usesPrintf = false;
}
void TearDown() override {