refactor: unify immediate cmd list wait methods
Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
parent
77319f886a
commit
d5565a68e2
|
@ -8,6 +8,7 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "shared/source/command_stream/csr_definitions.h"
|
#include "shared/source/command_stream/csr_definitions.h"
|
||||||
|
#include "shared/source/command_stream/task_count_helper.h"
|
||||||
|
|
||||||
#include "level_zero/core/source/cmdlist/cmdlist_hw.h"
|
#include "level_zero/core/source/cmdlist/cmdlist_hw.h"
|
||||||
|
|
||||||
|
@ -184,6 +185,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
|
||||||
|
|
||||||
void printKernelsPrintfOutput(bool hangDetected);
|
void printKernelsPrintfOutput(bool hangDetected);
|
||||||
ze_result_t synchronizeInOrderExecution(uint64_t timeout) const;
|
ze_result_t synchronizeInOrderExecution(uint64_t timeout) const;
|
||||||
|
ze_result_t hostSynchronize(uint64_t timeout, TaskCountType taskCount, bool handlePostWaitOperations);
|
||||||
bool hasStallingCmdsForRelaxedOrdering(uint32_t numWaitEvents, bool relaxedOrderingDispatch) const;
|
bool hasStallingCmdsForRelaxedOrdering(uint32_t numWaitEvents, bool relaxedOrderingDispatch) const;
|
||||||
void setupFlushMethod(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) override;
|
void setupFlushMethod(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) override;
|
||||||
void handleInOrderDependencyCounter();
|
void handleInOrderDependencyCounter();
|
||||||
|
|
|
@ -380,17 +380,10 @@ inline ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommand
|
||||||
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
|
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ze_result_t status = ZE_RESULT_SUCCESS;
|
||||||
|
|
||||||
if (this->isSyncModeQueue || this->printfKernelContainer.size() > 0u) {
|
if (this->isSyncModeQueue || this->printfKernelContainer.size() > 0u) {
|
||||||
auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout;
|
status = hostSynchronize(std::numeric_limits<uint64_t>::max(), completionStamp.taskCount, true);
|
||||||
const auto waitStatus = csr->waitForCompletionWithTimeout(NEO::WaitParams{false, false, timeoutMicroseconds}, completionStamp.taskCount);
|
|
||||||
if (waitStatus == NEO::WaitStatus::GpuHang) {
|
|
||||||
this->printKernelsPrintfOutput(true);
|
|
||||||
this->checkAssert();
|
|
||||||
return ZE_RESULT_ERROR_DEVICE_LOST;
|
|
||||||
}
|
|
||||||
csr->getInternalAllocationStorage()->cleanAllocationList(completionStamp.taskCount, NEO::AllocationUsage::TEMPORARY_ALLOCATION);
|
|
||||||
this->printKernelsPrintfOutput(false);
|
|
||||||
this->checkAssert();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
this->cmdListCurrentStartOffset = commandStream->getUsed();
|
this->cmdListCurrentStartOffset = commandStream->getUsed();
|
||||||
|
@ -402,7 +395,7 @@ inline ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommand
|
||||||
this->device->getNEODevice()->debugExecutionCounter++;
|
this->device->getNEODevice()->debugExecutionCounter++;
|
||||||
}
|
}
|
||||||
|
|
||||||
return ZE_RESULT_SUCCESS;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
|
@ -852,33 +845,39 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchCooperati
|
||||||
}
|
}
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::hostSynchronize(uint64_t timeout) {
|
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::hostSynchronize(uint64_t timeout, TaskCountType taskCount, bool handlePostWaitOperations) {
|
||||||
auto syncTaskCount = this->csr->peekTaskCount();
|
|
||||||
ze_result_t status = ZE_RESULT_SUCCESS;
|
ze_result_t status = ZE_RESULT_SUCCESS;
|
||||||
|
|
||||||
if (isInOrderExecutionEnabled()) {
|
if (isInOrderExecutionEnabled()) {
|
||||||
status = synchronizeInOrderExecution(timeout);
|
status = synchronizeInOrderExecution(timeout);
|
||||||
} else if (this->isFlushTaskSubmissionEnabled && !this->isSyncModeQueue) {
|
} else {
|
||||||
const int64_t timeoutInMicroSeconds = timeout / 1000;
|
const int64_t timeoutInMicroSeconds = timeout / 1000;
|
||||||
const auto indefinitelyPoll = timeout == std::numeric_limits<uint64_t>::max();
|
const auto indefinitelyPoll = timeout == std::numeric_limits<uint64_t>::max();
|
||||||
const auto waitStatus = this->csr->waitForCompletionWithTimeout(NEO::WaitParams{indefinitelyPoll, !indefinitelyPoll, timeoutInMicroSeconds},
|
const auto waitStatus = this->csr->waitForCompletionWithTimeout(NEO::WaitParams{indefinitelyPoll, !indefinitelyPoll, timeoutInMicroSeconds},
|
||||||
syncTaskCount);
|
taskCount);
|
||||||
if (waitStatus == NEO::WaitStatus::GpuHang) {
|
if (waitStatus == NEO::WaitStatus::GpuHang) {
|
||||||
status = ZE_RESULT_ERROR_DEVICE_LOST;
|
status = ZE_RESULT_ERROR_DEVICE_LOST;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (status == ZE_RESULT_SUCCESS) {
|
if (handlePostWaitOperations) {
|
||||||
this->cmdQImmediate->unregisterCsrClient();
|
if (status == ZE_RESULT_SUCCESS) {
|
||||||
this->csr->getInternalAllocationStorage()->cleanAllocationList(syncTaskCount, NEO::AllocationUsage::TEMPORARY_ALLOCATION);
|
this->cmdQImmediate->unregisterCsrClient();
|
||||||
|
this->csr->getInternalAllocationStorage()->cleanAllocationList(taskCount, NEO::AllocationUsage::TEMPORARY_ALLOCATION);
|
||||||
|
}
|
||||||
|
|
||||||
|
this->printKernelsPrintfOutput(status == ZE_RESULT_ERROR_DEVICE_LOST);
|
||||||
|
this->checkAssert();
|
||||||
}
|
}
|
||||||
|
|
||||||
this->printKernelsPrintfOutput(status == ZE_RESULT_ERROR_DEVICE_LOST);
|
|
||||||
this->checkAssert();
|
|
||||||
|
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
|
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::hostSynchronize(uint64_t timeout) {
|
||||||
|
return hostSynchronize(timeout, this->csr->peekTaskCount(), true);
|
||||||
|
}
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediate(ze_result_t inputRet, bool performMigration, bool hasStallingCmds,
|
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediate(ze_result_t inputRet, bool performMigration, bool hasStallingCmds,
|
||||||
bool hasRelaxedOrderingDependencies, ze_event_handle_t hSignalEvent) {
|
bool hasRelaxedOrderingDependencies, ze_event_handle_t hSignalEvent) {
|
||||||
|
@ -1040,22 +1039,15 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::performCpuMemcpy(cons
|
||||||
const void *cpuMemcpySrcPtr = srcLockPointer ? srcLockPointer : cpuMemCopyInfo.srcPtr;
|
const void *cpuMemcpySrcPtr = srcLockPointer ? srcLockPointer : cpuMemCopyInfo.srcPtr;
|
||||||
void *cpuMemcpyDstPtr = dstLockPointer ? dstLockPointer : cpuMemCopyInfo.dstPtr;
|
void *cpuMemcpyDstPtr = dstLockPointer ? dstLockPointer : cpuMemCopyInfo.dstPtr;
|
||||||
|
|
||||||
if (this->dependenciesPresent) {
|
if (this->dependenciesPresent || isInOrderExecutionEnabled()) {
|
||||||
auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout;
|
auto waitStatus = hostSynchronize(std::numeric_limits<uint64_t>::max(), this->csr->peekTaskCount(), false);
|
||||||
const auto waitStatus = this->csr->waitForCompletionWithTimeout(NEO::WaitParams{false, false, timeoutMicroseconds}, this->csr->peekTaskCount());
|
|
||||||
if (waitStatus == NEO::WaitStatus::GpuHang) {
|
if (waitStatus != ZE_RESULT_SUCCESS) {
|
||||||
return ZE_RESULT_ERROR_DEVICE_LOST;
|
return waitStatus;
|
||||||
}
|
}
|
||||||
this->dependenciesPresent = false;
|
this->dependenciesPresent = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (isInOrderExecutionEnabled()) {
|
|
||||||
auto status = synchronizeInOrderExecution(std::numeric_limits<uint64_t>::max());
|
|
||||||
if (status != ZE_RESULT_SUCCESS) {
|
|
||||||
return status;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (signalEvent) {
|
if (signalEvent) {
|
||||||
CommandListImp::addToMappedEventList(signalEvent);
|
CommandListImp::addToMappedEventList(signalEvent);
|
||||||
CommandListImp::storeReferenceTsToMappedEvents(true);
|
CommandListImp::storeReferenceTsToMappedEvents(true);
|
||||||
|
|
|
@ -1252,7 +1252,7 @@ HWTEST2_F(CommandListCreate, givenDirectSubmissionAndImmCmdListWhenDispatchingDi
|
||||||
|
|
||||||
HWTEST2_F(CommandListCreate, whenDispatchingThenPassNumCsrClients, IsAtLeastXeHpcCore) {
|
HWTEST2_F(CommandListCreate, whenDispatchingThenPassNumCsrClients, IsAtLeastXeHpcCore) {
|
||||||
ze_command_queue_desc_t desc = {};
|
ze_command_queue_desc_t desc = {};
|
||||||
desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
|
desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
|
||||||
ze_result_t returnValue;
|
ze_result_t returnValue;
|
||||||
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue));
|
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue));
|
||||||
ASSERT_NE(nullptr, commandList);
|
ASSERT_NE(nullptr, commandList);
|
||||||
|
@ -1276,7 +1276,7 @@ HWTEST2_F(CommandListCreate, whenDispatchingThenPassNumCsrClients, IsAtLeastXeHp
|
||||||
|
|
||||||
HWTEST_F(CommandListCreate, givenSignalEventWhenCallingSynchronizeThenUnregisterClient) {
|
HWTEST_F(CommandListCreate, givenSignalEventWhenCallingSynchronizeThenUnregisterClient) {
|
||||||
ze_command_queue_desc_t desc = {};
|
ze_command_queue_desc_t desc = {};
|
||||||
desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
|
desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
|
||||||
ze_result_t returnValue;
|
ze_result_t returnValue;
|
||||||
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue));
|
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue));
|
||||||
ASSERT_NE(nullptr, commandList);
|
ASSERT_NE(nullptr, commandList);
|
||||||
|
@ -1345,7 +1345,7 @@ HWTEST_F(CommandListCreate, givenDebugFlagSetWhenCallingSynchronizeThenDontUnreg
|
||||||
DebugManager.flags.TrackNumCsrClientsOnSyncPoints.set(0);
|
DebugManager.flags.TrackNumCsrClientsOnSyncPoints.set(0);
|
||||||
|
|
||||||
ze_command_queue_desc_t desc = {};
|
ze_command_queue_desc_t desc = {};
|
||||||
desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
|
desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
|
||||||
ze_result_t returnValue;
|
ze_result_t returnValue;
|
||||||
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue));
|
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue));
|
||||||
ASSERT_NE(nullptr, commandList);
|
ASSERT_NE(nullptr, commandList);
|
||||||
|
@ -1388,7 +1388,7 @@ HWTEST2_F(CommandListCreate, givenDirectSubmissionAndImmCmdListWhenDispatchingTh
|
||||||
DebugManager.flags.DirectSubmissionRelaxedOrdering.set(1);
|
DebugManager.flags.DirectSubmissionRelaxedOrdering.set(1);
|
||||||
|
|
||||||
ze_command_queue_desc_t desc = {};
|
ze_command_queue_desc_t desc = {};
|
||||||
desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
|
desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
|
||||||
ze_result_t returnValue;
|
ze_result_t returnValue;
|
||||||
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue));
|
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue));
|
||||||
ASSERT_NE(nullptr, commandList);
|
ASSERT_NE(nullptr, commandList);
|
||||||
|
|
|
@ -2966,7 +2966,7 @@ HWTEST2_F(ImmediateCommandListHostSynchronize, givenFlushTaskEnabledAndNotSyncMo
|
||||||
EXPECT_EQ(waitForFlushTagUpdateCalled, 1u);
|
EXPECT_EQ(waitForFlushTagUpdateCalled, 1u);
|
||||||
}
|
}
|
||||||
|
|
||||||
HWTEST2_F(ImmediateCommandListHostSynchronize, givenSyncModeThenWaitForCompletionIsNotCalled, IsAtLeastSkl) {
|
HWTEST2_F(ImmediateCommandListHostSynchronize, givenSyncModeThenWaitForCompletionIsCalled, IsAtLeastSkl) {
|
||||||
auto csr = static_cast<NEO::UltCommandStreamReceiver<FamilyType> *>(device->getNEODevice()->getInternalEngine().commandStreamReceiver);
|
auto csr = static_cast<NEO::UltCommandStreamReceiver<FamilyType> *>(device->getNEODevice()->getInternalEngine().commandStreamReceiver);
|
||||||
|
|
||||||
auto cmdList = createCmdList<gfxCoreFamily>(csr);
|
auto cmdList = createCmdList<gfxCoreFamily>(csr);
|
||||||
|
@ -2975,10 +2975,10 @@ HWTEST2_F(ImmediateCommandListHostSynchronize, givenSyncModeThenWaitForCompletio
|
||||||
EXPECT_EQ(cmdList->hostSynchronize(0), ZE_RESULT_SUCCESS);
|
EXPECT_EQ(cmdList->hostSynchronize(0), ZE_RESULT_SUCCESS);
|
||||||
|
|
||||||
uint32_t waitForFlushTagUpdateCalled = csr->waitForCompletionWithTimeoutTaskCountCalled;
|
uint32_t waitForFlushTagUpdateCalled = csr->waitForCompletionWithTimeoutTaskCountCalled;
|
||||||
EXPECT_EQ(waitForFlushTagUpdateCalled, 0u);
|
EXPECT_EQ(waitForFlushTagUpdateCalled, 1u);
|
||||||
}
|
}
|
||||||
|
|
||||||
HWTEST2_F(ImmediateCommandListHostSynchronize, givenFlushTaskSubmissionIsDisabledThenWaitForCompletionIsNotCalled, IsAtLeastSkl) {
|
HWTEST2_F(ImmediateCommandListHostSynchronize, givenFlushTaskSubmissionIsDisabledThenWaitForCompletionIsCalled, IsAtLeastSkl) {
|
||||||
auto csr = static_cast<NEO::UltCommandStreamReceiver<FamilyType> *>(device->getNEODevice()->getInternalEngine().commandStreamReceiver);
|
auto csr = static_cast<NEO::UltCommandStreamReceiver<FamilyType> *>(device->getNEODevice()->getInternalEngine().commandStreamReceiver);
|
||||||
|
|
||||||
auto cmdList = createCmdList<gfxCoreFamily>(csr);
|
auto cmdList = createCmdList<gfxCoreFamily>(csr);
|
||||||
|
@ -2988,7 +2988,7 @@ HWTEST2_F(ImmediateCommandListHostSynchronize, givenFlushTaskSubmissionIsDisable
|
||||||
EXPECT_EQ(cmdList->hostSynchronize(0), ZE_RESULT_SUCCESS);
|
EXPECT_EQ(cmdList->hostSynchronize(0), ZE_RESULT_SUCCESS);
|
||||||
|
|
||||||
uint32_t waitForFlushTagUpdateCalled = csr->waitForCompletionWithTimeoutTaskCountCalled;
|
uint32_t waitForFlushTagUpdateCalled = csr->waitForCompletionWithTimeoutTaskCountCalled;
|
||||||
EXPECT_EQ(waitForFlushTagUpdateCalled, 0u);
|
EXPECT_EQ(waitForFlushTagUpdateCalled, 1u);
|
||||||
}
|
}
|
||||||
|
|
||||||
HWTEST2_F(ImmediateCommandListHostSynchronize, givenGpuStatusIsHangThenDeviceLostIsReturned, IsAtLeastSkl) {
|
HWTEST2_F(ImmediateCommandListHostSynchronize, givenGpuStatusIsHangThenDeviceLostIsReturned, IsAtLeastSkl) {
|
||||||
|
|
|
@ -673,6 +673,8 @@ struct InOrderCmdListTests : public CommandListAppendLaunchKernel {
|
||||||
|
|
||||||
CommandListAppendLaunchKernel::SetUp();
|
CommandListAppendLaunchKernel::SetUp();
|
||||||
createKernel();
|
createKernel();
|
||||||
|
|
||||||
|
const_cast<KernelDescriptor &>(kernel->getKernelDescriptor()).kernelAttributes.flags.usesPrintf = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
void TearDown() override {
|
void TearDown() override {
|
||||||
|
|
Loading…
Reference in New Issue