diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h index 06d97e8f0b..29e3bff51d 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h @@ -8,6 +8,7 @@ #pragma once #include "shared/source/command_stream/csr_definitions.h" +#include "shared/source/command_stream/task_count_helper.h" #include "level_zero/core/source/cmdlist/cmdlist_hw.h" @@ -184,6 +185,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily::executeCommand return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY; } + ze_result_t status = ZE_RESULT_SUCCESS; + if (this->isSyncModeQueue || this->printfKernelContainer.size() > 0u) { - auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout; - const auto waitStatus = csr->waitForCompletionWithTimeout(NEO::WaitParams{false, false, timeoutMicroseconds}, completionStamp.taskCount); - if (waitStatus == NEO::WaitStatus::GpuHang) { - this->printKernelsPrintfOutput(true); - this->checkAssert(); - return ZE_RESULT_ERROR_DEVICE_LOST; - } - csr->getInternalAllocationStorage()->cleanAllocationList(completionStamp.taskCount, NEO::AllocationUsage::TEMPORARY_ALLOCATION); - this->printKernelsPrintfOutput(false); - this->checkAssert(); + status = hostSynchronize(std::numeric_limits::max(), completionStamp.taskCount, true); } this->cmdListCurrentStartOffset = commandStream->getUsed(); @@ -402,7 +395,7 @@ inline ze_result_t CommandListCoreFamilyImmediate::executeCommand this->device->getNEODevice()->debugExecutionCounter++; } - return ZE_RESULT_SUCCESS; + return status; } template @@ -852,33 +845,39 @@ ze_result_t CommandListCoreFamilyImmediate::appendLaunchCooperati } template -ze_result_t CommandListCoreFamilyImmediate::hostSynchronize(uint64_t timeout) { - auto syncTaskCount = this->csr->peekTaskCount(); +ze_result_t CommandListCoreFamilyImmediate::hostSynchronize(uint64_t timeout, TaskCountType taskCount, bool handlePostWaitOperations) { ze_result_t status = ZE_RESULT_SUCCESS; if (isInOrderExecutionEnabled()) { status = synchronizeInOrderExecution(timeout); - } else if (this->isFlushTaskSubmissionEnabled && !this->isSyncModeQueue) { + } else { const int64_t timeoutInMicroSeconds = timeout / 1000; const auto indefinitelyPoll = timeout == std::numeric_limits::max(); const auto waitStatus = this->csr->waitForCompletionWithTimeout(NEO::WaitParams{indefinitelyPoll, !indefinitelyPoll, timeoutInMicroSeconds}, - syncTaskCount); + taskCount); if (waitStatus == NEO::WaitStatus::GpuHang) { status = ZE_RESULT_ERROR_DEVICE_LOST; } } - if (status == ZE_RESULT_SUCCESS) { - this->cmdQImmediate->unregisterCsrClient(); - this->csr->getInternalAllocationStorage()->cleanAllocationList(syncTaskCount, NEO::AllocationUsage::TEMPORARY_ALLOCATION); + if (handlePostWaitOperations) { + if (status == ZE_RESULT_SUCCESS) { + this->cmdQImmediate->unregisterCsrClient(); + this->csr->getInternalAllocationStorage()->cleanAllocationList(taskCount, NEO::AllocationUsage::TEMPORARY_ALLOCATION); + } + + this->printKernelsPrintfOutput(status == ZE_RESULT_ERROR_DEVICE_LOST); + this->checkAssert(); } - this->printKernelsPrintfOutput(status == ZE_RESULT_ERROR_DEVICE_LOST); - this->checkAssert(); - return status; } +template +ze_result_t CommandListCoreFamilyImmediate::hostSynchronize(uint64_t timeout) { + return hostSynchronize(timeout, this->csr->peekTaskCount(), true); +} + template ze_result_t CommandListCoreFamilyImmediate::flushImmediate(ze_result_t inputRet, bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, ze_event_handle_t hSignalEvent) { @@ -1040,22 +1039,15 @@ ze_result_t CommandListCoreFamilyImmediate::performCpuMemcpy(cons const void *cpuMemcpySrcPtr = srcLockPointer ? srcLockPointer : cpuMemCopyInfo.srcPtr; void *cpuMemcpyDstPtr = dstLockPointer ? dstLockPointer : cpuMemCopyInfo.dstPtr; - if (this->dependenciesPresent) { - auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout; - const auto waitStatus = this->csr->waitForCompletionWithTimeout(NEO::WaitParams{false, false, timeoutMicroseconds}, this->csr->peekTaskCount()); - if (waitStatus == NEO::WaitStatus::GpuHang) { - return ZE_RESULT_ERROR_DEVICE_LOST; + if (this->dependenciesPresent || isInOrderExecutionEnabled()) { + auto waitStatus = hostSynchronize(std::numeric_limits::max(), this->csr->peekTaskCount(), false); + + if (waitStatus != ZE_RESULT_SUCCESS) { + return waitStatus; } this->dependenciesPresent = false; } - if (isInOrderExecutionEnabled()) { - auto status = synchronizeInOrderExecution(std::numeric_limits::max()); - if (status != ZE_RESULT_SUCCESS) { - return status; - } - } - if (signalEvent) { CommandListImp::addToMappedEventList(signalEvent); CommandListImp::storeReferenceTsToMappedEvents(true); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp index af30651da7..1f7491dfd8 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp @@ -1252,7 +1252,7 @@ HWTEST2_F(CommandListCreate, givenDirectSubmissionAndImmCmdListWhenDispatchingDi HWTEST2_F(CommandListCreate, whenDispatchingThenPassNumCsrClients, IsAtLeastXeHpcCore) { ze_command_queue_desc_t desc = {}; - desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; + desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; ze_result_t returnValue; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); ASSERT_NE(nullptr, commandList); @@ -1276,7 +1276,7 @@ HWTEST2_F(CommandListCreate, whenDispatchingThenPassNumCsrClients, IsAtLeastXeHp HWTEST_F(CommandListCreate, givenSignalEventWhenCallingSynchronizeThenUnregisterClient) { ze_command_queue_desc_t desc = {}; - desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; + desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; ze_result_t returnValue; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); ASSERT_NE(nullptr, commandList); @@ -1345,7 +1345,7 @@ HWTEST_F(CommandListCreate, givenDebugFlagSetWhenCallingSynchronizeThenDontUnreg DebugManager.flags.TrackNumCsrClientsOnSyncPoints.set(0); ze_command_queue_desc_t desc = {}; - desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; + desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; ze_result_t returnValue; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); ASSERT_NE(nullptr, commandList); @@ -1388,7 +1388,7 @@ HWTEST2_F(CommandListCreate, givenDirectSubmissionAndImmCmdListWhenDispatchingTh DebugManager.flags.DirectSubmissionRelaxedOrdering.set(1); ze_command_queue_desc_t desc = {}; - desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS; + desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; ze_result_t returnValue; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); ASSERT_NE(nullptr, commandList); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp index a3b4cd7b0c..392537b887 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp @@ -2966,7 +2966,7 @@ HWTEST2_F(ImmediateCommandListHostSynchronize, givenFlushTaskEnabledAndNotSyncMo EXPECT_EQ(waitForFlushTagUpdateCalled, 1u); } -HWTEST2_F(ImmediateCommandListHostSynchronize, givenSyncModeThenWaitForCompletionIsNotCalled, IsAtLeastSkl) { +HWTEST2_F(ImmediateCommandListHostSynchronize, givenSyncModeThenWaitForCompletionIsCalled, IsAtLeastSkl) { auto csr = static_cast *>(device->getNEODevice()->getInternalEngine().commandStreamReceiver); auto cmdList = createCmdList(csr); @@ -2975,10 +2975,10 @@ HWTEST2_F(ImmediateCommandListHostSynchronize, givenSyncModeThenWaitForCompletio EXPECT_EQ(cmdList->hostSynchronize(0), ZE_RESULT_SUCCESS); uint32_t waitForFlushTagUpdateCalled = csr->waitForCompletionWithTimeoutTaskCountCalled; - EXPECT_EQ(waitForFlushTagUpdateCalled, 0u); + EXPECT_EQ(waitForFlushTagUpdateCalled, 1u); } -HWTEST2_F(ImmediateCommandListHostSynchronize, givenFlushTaskSubmissionIsDisabledThenWaitForCompletionIsNotCalled, IsAtLeastSkl) { +HWTEST2_F(ImmediateCommandListHostSynchronize, givenFlushTaskSubmissionIsDisabledThenWaitForCompletionIsCalled, IsAtLeastSkl) { auto csr = static_cast *>(device->getNEODevice()->getInternalEngine().commandStreamReceiver); auto cmdList = createCmdList(csr); @@ -2988,7 +2988,7 @@ HWTEST2_F(ImmediateCommandListHostSynchronize, givenFlushTaskSubmissionIsDisable EXPECT_EQ(cmdList->hostSynchronize(0), ZE_RESULT_SUCCESS); uint32_t waitForFlushTagUpdateCalled = csr->waitForCompletionWithTimeoutTaskCountCalled; - EXPECT_EQ(waitForFlushTagUpdateCalled, 0u); + EXPECT_EQ(waitForFlushTagUpdateCalled, 1u); } HWTEST2_F(ImmediateCommandListHostSynchronize, givenGpuStatusIsHangThenDeviceLostIsReturned, IsAtLeastSkl) { diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp index 62a79cdf1c..8871c200f6 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp @@ -673,6 +673,8 @@ struct InOrderCmdListTests : public CommandListAppendLaunchKernel { CommandListAppendLaunchKernel::SetUp(); createKernel(); + + const_cast(kernel->getKernelDescriptor()).kernelAttributes.flags.usesPrintf = false; } void TearDown() override {