refactor: unify immediate cmd list wait methods
Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
parent
77319f886a
commit
d5565a68e2
|
@ -8,6 +8,7 @@
|
|||
#pragma once
|
||||
|
||||
#include "shared/source/command_stream/csr_definitions.h"
|
||||
#include "shared/source/command_stream/task_count_helper.h"
|
||||
|
||||
#include "level_zero/core/source/cmdlist/cmdlist_hw.h"
|
||||
|
||||
|
@ -184,6 +185,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
|
|||
|
||||
void printKernelsPrintfOutput(bool hangDetected);
|
||||
ze_result_t synchronizeInOrderExecution(uint64_t timeout) const;
|
||||
ze_result_t hostSynchronize(uint64_t timeout, TaskCountType taskCount, bool handlePostWaitOperations);
|
||||
bool hasStallingCmdsForRelaxedOrdering(uint32_t numWaitEvents, bool relaxedOrderingDispatch) const;
|
||||
void setupFlushMethod(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) override;
|
||||
void handleInOrderDependencyCounter();
|
||||
|
|
|
@ -380,17 +380,10 @@ inline ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommand
|
|||
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
}
|
||||
|
||||
ze_result_t status = ZE_RESULT_SUCCESS;
|
||||
|
||||
if (this->isSyncModeQueue || this->printfKernelContainer.size() > 0u) {
|
||||
auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout;
|
||||
const auto waitStatus = csr->waitForCompletionWithTimeout(NEO::WaitParams{false, false, timeoutMicroseconds}, completionStamp.taskCount);
|
||||
if (waitStatus == NEO::WaitStatus::GpuHang) {
|
||||
this->printKernelsPrintfOutput(true);
|
||||
this->checkAssert();
|
||||
return ZE_RESULT_ERROR_DEVICE_LOST;
|
||||
}
|
||||
csr->getInternalAllocationStorage()->cleanAllocationList(completionStamp.taskCount, NEO::AllocationUsage::TEMPORARY_ALLOCATION);
|
||||
this->printKernelsPrintfOutput(false);
|
||||
this->checkAssert();
|
||||
status = hostSynchronize(std::numeric_limits<uint64_t>::max(), completionStamp.taskCount, true);
|
||||
}
|
||||
|
||||
this->cmdListCurrentStartOffset = commandStream->getUsed();
|
||||
|
@ -402,7 +395,7 @@ inline ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommand
|
|||
this->device->getNEODevice()->debugExecutionCounter++;
|
||||
}
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
return status;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
|
@ -852,33 +845,39 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchCooperati
|
|||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::hostSynchronize(uint64_t timeout) {
|
||||
auto syncTaskCount = this->csr->peekTaskCount();
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::hostSynchronize(uint64_t timeout, TaskCountType taskCount, bool handlePostWaitOperations) {
|
||||
ze_result_t status = ZE_RESULT_SUCCESS;
|
||||
|
||||
if (isInOrderExecutionEnabled()) {
|
||||
status = synchronizeInOrderExecution(timeout);
|
||||
} else if (this->isFlushTaskSubmissionEnabled && !this->isSyncModeQueue) {
|
||||
} else {
|
||||
const int64_t timeoutInMicroSeconds = timeout / 1000;
|
||||
const auto indefinitelyPoll = timeout == std::numeric_limits<uint64_t>::max();
|
||||
const auto waitStatus = this->csr->waitForCompletionWithTimeout(NEO::WaitParams{indefinitelyPoll, !indefinitelyPoll, timeoutInMicroSeconds},
|
||||
syncTaskCount);
|
||||
taskCount);
|
||||
if (waitStatus == NEO::WaitStatus::GpuHang) {
|
||||
status = ZE_RESULT_ERROR_DEVICE_LOST;
|
||||
}
|
||||
}
|
||||
|
||||
if (status == ZE_RESULT_SUCCESS) {
|
||||
this->cmdQImmediate->unregisterCsrClient();
|
||||
this->csr->getInternalAllocationStorage()->cleanAllocationList(syncTaskCount, NEO::AllocationUsage::TEMPORARY_ALLOCATION);
|
||||
if (handlePostWaitOperations) {
|
||||
if (status == ZE_RESULT_SUCCESS) {
|
||||
this->cmdQImmediate->unregisterCsrClient();
|
||||
this->csr->getInternalAllocationStorage()->cleanAllocationList(taskCount, NEO::AllocationUsage::TEMPORARY_ALLOCATION);
|
||||
}
|
||||
|
||||
this->printKernelsPrintfOutput(status == ZE_RESULT_ERROR_DEVICE_LOST);
|
||||
this->checkAssert();
|
||||
}
|
||||
|
||||
this->printKernelsPrintfOutput(status == ZE_RESULT_ERROR_DEVICE_LOST);
|
||||
this->checkAssert();
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::hostSynchronize(uint64_t timeout) {
|
||||
return hostSynchronize(timeout, this->csr->peekTaskCount(), true);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediate(ze_result_t inputRet, bool performMigration, bool hasStallingCmds,
|
||||
bool hasRelaxedOrderingDependencies, ze_event_handle_t hSignalEvent) {
|
||||
|
@ -1040,22 +1039,15 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::performCpuMemcpy(cons
|
|||
const void *cpuMemcpySrcPtr = srcLockPointer ? srcLockPointer : cpuMemCopyInfo.srcPtr;
|
||||
void *cpuMemcpyDstPtr = dstLockPointer ? dstLockPointer : cpuMemCopyInfo.dstPtr;
|
||||
|
||||
if (this->dependenciesPresent) {
|
||||
auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout;
|
||||
const auto waitStatus = this->csr->waitForCompletionWithTimeout(NEO::WaitParams{false, false, timeoutMicroseconds}, this->csr->peekTaskCount());
|
||||
if (waitStatus == NEO::WaitStatus::GpuHang) {
|
||||
return ZE_RESULT_ERROR_DEVICE_LOST;
|
||||
if (this->dependenciesPresent || isInOrderExecutionEnabled()) {
|
||||
auto waitStatus = hostSynchronize(std::numeric_limits<uint64_t>::max(), this->csr->peekTaskCount(), false);
|
||||
|
||||
if (waitStatus != ZE_RESULT_SUCCESS) {
|
||||
return waitStatus;
|
||||
}
|
||||
this->dependenciesPresent = false;
|
||||
}
|
||||
|
||||
if (isInOrderExecutionEnabled()) {
|
||||
auto status = synchronizeInOrderExecution(std::numeric_limits<uint64_t>::max());
|
||||
if (status != ZE_RESULT_SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
}
|
||||
|
||||
if (signalEvent) {
|
||||
CommandListImp::addToMappedEventList(signalEvent);
|
||||
CommandListImp::storeReferenceTsToMappedEvents(true);
|
||||
|
|
|
@ -1252,7 +1252,7 @@ HWTEST2_F(CommandListCreate, givenDirectSubmissionAndImmCmdListWhenDispatchingDi
|
|||
|
||||
HWTEST2_F(CommandListCreate, whenDispatchingThenPassNumCsrClients, IsAtLeastXeHpcCore) {
|
||||
ze_command_queue_desc_t desc = {};
|
||||
desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
|
||||
desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
|
||||
ze_result_t returnValue;
|
||||
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue));
|
||||
ASSERT_NE(nullptr, commandList);
|
||||
|
@ -1276,7 +1276,7 @@ HWTEST2_F(CommandListCreate, whenDispatchingThenPassNumCsrClients, IsAtLeastXeHp
|
|||
|
||||
HWTEST_F(CommandListCreate, givenSignalEventWhenCallingSynchronizeThenUnregisterClient) {
|
||||
ze_command_queue_desc_t desc = {};
|
||||
desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
|
||||
desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
|
||||
ze_result_t returnValue;
|
||||
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue));
|
||||
ASSERT_NE(nullptr, commandList);
|
||||
|
@ -1345,7 +1345,7 @@ HWTEST_F(CommandListCreate, givenDebugFlagSetWhenCallingSynchronizeThenDontUnreg
|
|||
DebugManager.flags.TrackNumCsrClientsOnSyncPoints.set(0);
|
||||
|
||||
ze_command_queue_desc_t desc = {};
|
||||
desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
|
||||
desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
|
||||
ze_result_t returnValue;
|
||||
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue));
|
||||
ASSERT_NE(nullptr, commandList);
|
||||
|
@ -1388,7 +1388,7 @@ HWTEST2_F(CommandListCreate, givenDirectSubmissionAndImmCmdListWhenDispatchingTh
|
|||
DebugManager.flags.DirectSubmissionRelaxedOrdering.set(1);
|
||||
|
||||
ze_command_queue_desc_t desc = {};
|
||||
desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
|
||||
desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
|
||||
ze_result_t returnValue;
|
||||
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue));
|
||||
ASSERT_NE(nullptr, commandList);
|
||||
|
|
|
@ -2966,7 +2966,7 @@ HWTEST2_F(ImmediateCommandListHostSynchronize, givenFlushTaskEnabledAndNotSyncMo
|
|||
EXPECT_EQ(waitForFlushTagUpdateCalled, 1u);
|
||||
}
|
||||
|
||||
HWTEST2_F(ImmediateCommandListHostSynchronize, givenSyncModeThenWaitForCompletionIsNotCalled, IsAtLeastSkl) {
|
||||
HWTEST2_F(ImmediateCommandListHostSynchronize, givenSyncModeThenWaitForCompletionIsCalled, IsAtLeastSkl) {
|
||||
auto csr = static_cast<NEO::UltCommandStreamReceiver<FamilyType> *>(device->getNEODevice()->getInternalEngine().commandStreamReceiver);
|
||||
|
||||
auto cmdList = createCmdList<gfxCoreFamily>(csr);
|
||||
|
@ -2975,10 +2975,10 @@ HWTEST2_F(ImmediateCommandListHostSynchronize, givenSyncModeThenWaitForCompletio
|
|||
EXPECT_EQ(cmdList->hostSynchronize(0), ZE_RESULT_SUCCESS);
|
||||
|
||||
uint32_t waitForFlushTagUpdateCalled = csr->waitForCompletionWithTimeoutTaskCountCalled;
|
||||
EXPECT_EQ(waitForFlushTagUpdateCalled, 0u);
|
||||
EXPECT_EQ(waitForFlushTagUpdateCalled, 1u);
|
||||
}
|
||||
|
||||
HWTEST2_F(ImmediateCommandListHostSynchronize, givenFlushTaskSubmissionIsDisabledThenWaitForCompletionIsNotCalled, IsAtLeastSkl) {
|
||||
HWTEST2_F(ImmediateCommandListHostSynchronize, givenFlushTaskSubmissionIsDisabledThenWaitForCompletionIsCalled, IsAtLeastSkl) {
|
||||
auto csr = static_cast<NEO::UltCommandStreamReceiver<FamilyType> *>(device->getNEODevice()->getInternalEngine().commandStreamReceiver);
|
||||
|
||||
auto cmdList = createCmdList<gfxCoreFamily>(csr);
|
||||
|
@ -2988,7 +2988,7 @@ HWTEST2_F(ImmediateCommandListHostSynchronize, givenFlushTaskSubmissionIsDisable
|
|||
EXPECT_EQ(cmdList->hostSynchronize(0), ZE_RESULT_SUCCESS);
|
||||
|
||||
uint32_t waitForFlushTagUpdateCalled = csr->waitForCompletionWithTimeoutTaskCountCalled;
|
||||
EXPECT_EQ(waitForFlushTagUpdateCalled, 0u);
|
||||
EXPECT_EQ(waitForFlushTagUpdateCalled, 1u);
|
||||
}
|
||||
|
||||
HWTEST2_F(ImmediateCommandListHostSynchronize, givenGpuStatusIsHangThenDeviceLostIsReturned, IsAtLeastSkl) {
|
||||
|
|
|
@ -673,6 +673,8 @@ struct InOrderCmdListTests : public CommandListAppendLaunchKernel {
|
|||
|
||||
CommandListAppendLaunchKernel::SetUp();
|
||||
createKernel();
|
||||
|
||||
const_cast<KernelDescriptor &>(kernel->getKernelDescriptor()).kernelAttributes.flags.usesPrintf = false;
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
|
|
Loading…
Reference in New Issue