diff --git a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue.cpp b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue.cpp index 70f6e723f1..bfa2c9306d 100644 --- a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue.cpp @@ -249,7 +249,7 @@ HWTEST_F(CommandQueueCreate, givenUpdateTaskCountFromWaitWhenDispatchTaskCountWr using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; DebugManagerStateRestore restorer; - DebugManager.flags.UpdateTaskCountFromWait.set(1); + DebugManager.flags.UpdateTaskCountFromWait.set(3); const ze_command_queue_desc_t desc = {}; ze_result_t returnValue; diff --git a/opencl/source/command_queue/command_queue.cpp b/opencl/source/command_queue/command_queue.cpp index 5ab856c8bc..87d9fab950 100644 --- a/opencl/source/command_queue/command_queue.cpp +++ b/opencl/source/command_queue/command_queue.cpp @@ -955,6 +955,58 @@ void CommandQueue::aubCaptureHook(bool &blocking, bool &clearAllDependencies, co } } +bool CommandQueue::isTimestampWaitEnabled() { + auto enabled = false; + + switch (DebugManager.flags.EnableTimestampWait.get()) { + case 0: + enabled = false; + break; + case 1: + enabled = getGpgpuCommandStreamReceiver().isUpdateTagFromWaitEnabled(); + break; + case 2: + enabled = getGpgpuCommandStreamReceiver().isDirectSubmissionEnabled(); + break; + case 3: + enabled = getGpgpuCommandStreamReceiver().isAnyDirectSubmissionEnabled(); + break; + case 4: + enabled = true; + break; + } + + return enabled; +} + +void CommandQueue::waitForTimestamps(uint32_t taskCount) { + if (isTimestampWaitEnabled()) { + bool waited = false; + + for (const auto ×tamp : timestampPacketContainer->peekNodes()) { + for (uint32_t i = 0; i < timestamp->getPacketsUsed(); i++) { + while (timestamp->getContextEndValue(i) == 1) { + } + waited = true; + } + } + + if (isOOQEnabled()) { + for (const auto ×tamp : deferredTimestampPackets->peekNodes()) { + for (uint32_t i = 0; i < timestamp->getPacketsUsed(); i++) { + while (timestamp->getContextEndValue(i) == 1) { + } + waited = true; + } + } + } + + if (waited) { + getGpgpuCommandStreamReceiver().updateTagFromCpu(taskCount); + } + } +} + void CommandQueue::waitForAllEngines(bool blockedQueue, PrintfHandler *printfHandler, bool cleanTemporaryAllocationsList) { if (blockedQueue) { while (isQueueBlocked()) { @@ -963,6 +1015,7 @@ void CommandQueue::waitForAllEngines(bool blockedQueue, PrintfHandler *printfHan TimestampPacketContainer nodesToRelease; if (deferredTimestampPackets) { + waitForTimestamps(taskCount); deferredTimestampPackets->swapNodes(nodesToRelease); } diff --git a/opencl/source/command_queue/command_queue.h b/opencl/source/command_queue/command_queue.h index 68c2f68308..7f592bcb4d 100644 --- a/opencl/source/command_queue/command_queue.h +++ b/opencl/source/command_queue/command_queue.h @@ -215,6 +215,9 @@ class CommandQueue : public BaseObject<_cl_command_queue> { bool isCompleted(uint32_t gpgpuTaskCount, CopyEngineState bcsState) const; + bool isTimestampWaitEnabled(); + void waitForTimestamps(uint32_t taskCount); + MOCKABLE_VIRTUAL bool isQueueBlocked(); MOCKABLE_VIRTUAL void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList); diff --git a/opencl/test/unit_test/command_queue/command_queue_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_tests.cpp index b6a850ee9a..e15a985ae3 100644 --- a/opencl/test/unit_test/command_queue/command_queue_tests.cpp +++ b/opencl/test/unit_test/command_queue/command_queue_tests.cpp @@ -138,6 +138,37 @@ TEST(CommandQueue, WhenConstructingCommandQueueThenQueueFamilyIsNotSelected) { EXPECT_FALSE(cmdQ.isQueueFamilySelected()); } +TEST(CommandQueue, givenEnableTimestampWaitWhenCheckIsTimestampWaitEnabledThenReturnProperValue) { + DebugManagerStateRestore restorer; + auto mockDevice = std::make_unique(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get())); + MockCommandQueue cmdQ(nullptr, mockDevice.get(), 0, false); + + { + DebugManager.flags.EnableTimestampWait.set(0); + EXPECT_FALSE(cmdQ.isTimestampWaitEnabled()); + } + + { + DebugManager.flags.EnableTimestampWait.set(1); + EXPECT_EQ(cmdQ.isTimestampWaitEnabled(), cmdQ.getGpgpuCommandStreamReceiver().isUpdateTagFromWaitEnabled()); + } + + { + DebugManager.flags.EnableTimestampWait.set(2); + EXPECT_EQ(cmdQ.isTimestampWaitEnabled(), cmdQ.getGpgpuCommandStreamReceiver().isDirectSubmissionEnabled()); + } + + { + DebugManager.flags.EnableTimestampWait.set(3); + EXPECT_EQ(cmdQ.isTimestampWaitEnabled(), cmdQ.getGpgpuCommandStreamReceiver().isAnyDirectSubmissionEnabled()); + } + + { + DebugManager.flags.EnableTimestampWait.set(4); + EXPECT_TRUE(cmdQ.isTimestampWaitEnabled()); + } +} + struct GetTagTest : public ClDeviceFixture, public CommandQueueFixture, public CommandStreamFixture, diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp index 17e9e5e2b1..7248431de4 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp @@ -126,7 +126,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEmptyQueueWhenFinishingThenTa HWTEST_F(CommandStreamReceiverFlushTaskTests, givenTaskCountToWaitBiggerThanLatestSentTaskCountWhenWaitForCompletionThenFlushPipeControl) { typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; DebugManagerStateRestore restorer; - DebugManager.flags.UpdateTaskCountFromWait.set(1); + DebugManager.flags.UpdateTaskCountFromWait.set(3); auto &csr = pDevice->getUltCommandStreamReceiver(); diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp index f703f18cc3..8557e57e0c 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp @@ -532,7 +532,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenFlushTas HWTEST_F(CommandStreamReceiverFlushTaskTests, givenUpdateTaskCountFromWaitWhenFlushBatchedIsCalledThenFlushedTaskCountIsNotModifed) { DebugManagerStateRestore restorer; - DebugManager.flags.UpdateTaskCountFromWait.set(1); + DebugManager.flags.UpdateTaskCountFromWait.set(3); auto mockCsr = new MockCsrHw2(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); pDevice->resetCommandStreamReceiver(mockCsr); @@ -1021,7 +1021,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWithOutOfOrd HWTEST_F(CommandStreamReceiverFlushTaskTests, givenUpdateTaskCountFromWaitSetWhenFlushTaskThenThereIsNoPipeControlForUpdateTaskCount) { DebugManagerStateRestore restorer; - DebugManager.flags.UpdateTaskCountFromWait.set(1); + DebugManager.flags.UpdateTaskCountFromWait.set(3); CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStream = commandQueue.getCS(4096u); @@ -1053,7 +1053,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenUpdateTaskCountFromWaitSetWhe HWTEST_F(CommandStreamReceiverFlushTaskTests, givenUpdateTaskCountFromWaitSetWhenFlushTaskThenPipeControlIsFlushed) { DebugManagerStateRestore restorer; - DebugManager.flags.UpdateTaskCountFromWait.set(1); + DebugManager.flags.UpdateTaskCountFromWait.set(3); CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); commandQueue.taskCount = 10; diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_tests_xehp_and_later.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_tests_xehp_and_later.cpp index f1f63d1d88..021d98aca9 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_tests_xehp_and_later.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_tests_xehp_and_later.cpp @@ -917,7 +917,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTile HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTileTests, givenMultipleStaticActivePartitionsWhenFlushingTagUpdateThenExpectTagUpdatePipeControlWithPartitionFlagOnAndActivePartitionConfig) { DebugManagerStateRestore restorer; - DebugManager.flags.UpdateTaskCountFromWait.set(1); + DebugManager.flags.UpdateTaskCountFromWait.set(3); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); if (pDevice->getPreemptionMode() == PreemptionMode::MidThread || pDevice->isDebuggerActive()) { @@ -956,8 +956,10 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTile verifyActivePartitionConfig(commandStreamReceiver, false); } -HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTileTests, - givenMultipleStaticActivePartitionsWhenFlushingTaskTwiceThenExpectTagUpdatePipeControlWithPartitionFlagOnAndNoActivePartitionConfigAtSecondFlush) { +HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterMultiTileTests, givenMultipleStaticActivePartitionsWhenFlushingTaskTwiceThenExpectTagUpdatePipeControlWithPartitionFlagOnAndNoActivePartitionConfigAtSecondFlush) { + DebugManagerStateRestore restorer; + DebugManager.flags.UpdateTaskCountFromWait.set(3); + auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); if (pDevice->getPreemptionMode() == PreemptionMode::MidThread || pDevice->isDebuggerActive()) { commandStreamReceiver.createPreemptionAllocation(); diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp index 9f16e959e6..0579547466 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp @@ -351,7 +351,7 @@ HWTEST_F(BcsTests, whenBlitBufferThenCommandBufferHasProperTaskCount) { HWTEST_F(BcsTests, givenUpdateTaskCountFromWaitWhenBlitBufferThenCsrHasProperTaskCounts) { DebugManagerStateRestore restorer; - DebugManager.flags.UpdateTaskCountFromWait.set(1); + DebugManager.flags.UpdateTaskCountFromWait.set(3); auto &csr = pDevice->getUltCommandStreamReceiver(); diff --git a/opencl/test/unit_test/helpers/timestamp_packet_1_tests.cpp b/opencl/test/unit_test/helpers/timestamp_packet_1_tests.cpp index 8e806208be..7bfc102fdb 100644 --- a/opencl/test/unit_test/helpers/timestamp_packet_1_tests.cpp +++ b/opencl/test/unit_test/helpers/timestamp_packet_1_tests.cpp @@ -768,6 +768,95 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingThe EXPECT_EQ(0u, deferredTimestampPackets->peekNodes().size()); } +HWTEST_F(TimestampPacketTests, givenEnableTimestampWaitWhenFinishWithoutEnqueueThenWaitOnTimestampAndDoNotUpdateTagFromCpu) { + DebugManagerStateRestore restorer; + DebugManager.flags.UpdateTaskCountFromWait.set(3); + DebugManager.flags.EnableTimestampWait.set(1); + + device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; + auto cmdQ = std::make_unique>(context, device.get(), nullptr); + + const auto &csr = cmdQ->getGpgpuCommandStreamReceiver(); + auto taskCount = *csr.getTagAddress(); + auto latestFlushedTaskCount = csr.peekLatestFlushedTaskCount(); + + TimestampPacketContainer *deferredTimestampPackets = cmdQ->deferredTimestampPackets.get(); + TimestampPacketContainer *timestampPacketContainer = cmdQ->timestampPacketContainer.get(); + + EXPECT_EQ(0u, deferredTimestampPackets->peekNodes().size()); + EXPECT_EQ(0u, timestampPacketContainer->peekNodes().size()); + + cmdQ->finish(); + + EXPECT_EQ(csr.peekLatestFlushedTaskCount(), latestFlushedTaskCount); + EXPECT_EQ(*csr.getTagAddress(), taskCount); +} + +HWTEST_F(TimestampPacketTests, givenEnableTimestampWaitWhenFinishThenWaitOnTimestampAndUpdateTagFromCpu) { + DebugManagerStateRestore restorer; + DebugManager.flags.UpdateTaskCountFromWait.set(3); + DebugManager.flags.EnableTimestampWait.set(1); + + device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; + auto cmdQ = std::make_unique>(context, device.get(), nullptr); + + TimestampPacketContainer *deferredTimestampPackets = cmdQ->deferredTimestampPackets.get(); + TimestampPacketContainer *timestampPacketContainer = cmdQ->timestampPacketContainer.get(); + + cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); + cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); + cmdQ->flush(); + + EXPECT_EQ(1u, deferredTimestampPackets->peekNodes().size()); + EXPECT_EQ(1u, timestampPacketContainer->peekNodes().size()); + + typename FamilyType::TimestampPacketType timestampData[] = {2, 2, 2, 2}; + for (uint32_t i = 0; i < deferredTimestampPackets->peekNodes()[0]->getPacketsUsed(); i++) { + deferredTimestampPackets->peekNodes()[0]->assignDataToAllTimestamps(i, timestampData); + timestampPacketContainer->peekNodes()[0]->assignDataToAllTimestamps(i, timestampData); + } + + cmdQ->finish(); + + const auto &csr = cmdQ->getGpgpuCommandStreamReceiver(); + EXPECT_EQ(csr.peekLatestFlushedTaskCount(), 2u); + EXPECT_EQ(*csr.getTagAddress(), 2u); +} + +HWTEST_F(TimestampPacketTests, givenOOQAndEnableTimestampWaitWhenFinishThenWaitOnTimestampAndUpdateTagFromCpu) { + DebugManagerStateRestore restorer; + DebugManager.flags.UpdateTaskCountFromWait.set(3); + DebugManager.flags.EnableTimestampWait.set(1); + + device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; + cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 0}; + auto cmdQ = std::make_unique>(context, device.get(), props); + + TimestampPacketContainer *deferredTimestampPackets = cmdQ->deferredTimestampPackets.get(); + TimestampPacketContainer *timestampPacketContainer = cmdQ->timestampPacketContainer.get(); + + cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); + cmdQ->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); + cmdQ->flush(); + + EXPECT_EQ(1u, deferredTimestampPackets->peekNodes().size()); + EXPECT_EQ(1u, timestampPacketContainer->peekNodes().size()); + + typename FamilyType::TimestampPacketType timestampData[] = {2, 2, 2, 2}; + for (uint32_t i = 0; i < deferredTimestampPackets->peekNodes()[0]->getPacketsUsed(); i++) { + deferredTimestampPackets->peekNodes()[0]->assignDataToAllTimestamps(i, timestampData); + timestampPacketContainer->peekNodes()[0]->assignDataToAllTimestamps(i, timestampData); + } + + cmdQ->finish(); + + const auto &csr = cmdQ->getGpgpuCommandStreamReceiver(); + EXPECT_EQ(csr.peekLatestFlushedTaskCount(), 2u); + EXPECT_EQ(*csr.getTagAddress(), 2u); + + cmdQ.reset(); +} + HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingToOoqThenMoveToDeferredList) { device->getUltCommandStreamReceiver().timestampPacketWriteEnabled = true; diff --git a/opencl/test/unit_test/test_files/igdrcl.config b/opencl/test/unit_test/test_files/igdrcl.config index 74b284b6e0..a1116cee3e 100644 --- a/opencl/test/unit_test/test_files/igdrcl.config +++ b/opencl/test/unit_test/test_files/igdrcl.config @@ -234,6 +234,7 @@ PrintIoctlTimes = 0 PrintIoctlEntries = 0 PrintUmdSharedMigration = 0 UpdateTaskCountFromWait = -1 +EnableTimestampWait = -1 PreferCopyEngineForCopyBufferToBuffer = -1 EnableStaticPartitioning = -1 DisableDeepBind = 0 diff --git a/shared/source/command_stream/command_stream_receiver.cpp b/shared/source/command_stream/command_stream_receiver.cpp index 144e62c130..289f75131e 100644 --- a/shared/source/command_stream/command_stream_receiver.cpp +++ b/shared/source/command_stream/command_stream_receiver.cpp @@ -669,6 +669,16 @@ bool CommandStreamReceiver::createAllocationForHostSurface(HostPtrSurface &surfa return true; } +void CommandStreamReceiver::updateTagFromCpu(uint32_t taskCount) { + this->latestFlushedTaskCount.store(taskCount); + + auto partitionAddress = getTagAddress(); + for (uint32_t i = 0; i < activePartitions; i++) { + *partitionAddress = taskCount; + partitionAddress = ptrOffset(partitionAddress, CommonConstants::partitionAddressOffset); + } +} + TagAllocatorBase *CommandStreamReceiver::getEventTsAllocator() { if (profilingTimeStampAllocator.get() == nullptr) { std::vector rootDeviceIndices = {rootDeviceIndex}; diff --git a/shared/source/command_stream/command_stream_receiver.h b/shared/source/command_stream/command_stream_receiver.h index 02982e5a47..90ca884fab 100644 --- a/shared/source/command_stream/command_stream_receiver.h +++ b/shared/source/command_stream/command_stream_receiver.h @@ -225,6 +225,8 @@ class CommandStreamReceiver { virtual void updateTagFromWait() = 0; virtual bool isUpdateTagFromWaitEnabled() = 0; + void updateTagFromCpu(uint32_t taskCount); + ScratchSpaceController *getScratchSpaceController() const { return scratchSpaceController.get(); } diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index f0dfe189f4..f77f46a35d 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -210,6 +210,7 @@ CompletionStamp CommandStreamReceiverHw::flushTask( updateTag = !isUpdateTagFromWaitEnabled(); updateTag |= dispatchFlags.blocking; + updateTag |= dispatchFlags.dcFlush; if (updateTag) { PipeControlArgs args(dispatchFlags.dcFlush); @@ -1310,8 +1311,19 @@ template inline bool CommandStreamReceiverHw::isUpdateTagFromWaitEnabled() { bool enabled = false; - if (DebugManager.flags.UpdateTaskCountFromWait.get() != -1) { - enabled = DebugManager.flags.UpdateTaskCountFromWait.get(); + switch (DebugManager.flags.UpdateTaskCountFromWait.get()) { + case 0: + enabled = false; + break; + case 1: + enabled = this->isDirectSubmissionEnabled(); + break; + case 2: + enabled = this->isAnyDirectSubmissionEnabled(); + break; + case 3: + enabled = true; + break; } return enabled; diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index 3df950bee7..44fb321839 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -348,7 +348,8 @@ DECLARE_DEBUG_VARIABLE(int64_t, DisableIndirectAccess, -1, "0: default, 0: Use DECLARE_DEBUG_VARIABLE(int32_t, UseVmBind, -1, "Use new residency model on Linux (requires kernel support), -1: default, 0: disabled, 1: enabled") DECLARE_DEBUG_VARIABLE(int32_t, PassBoundBOToExec, -1, "Pass bound BOs to exec call to keep dependencies") DECLARE_DEBUG_VARIABLE(int32_t, EnableStaticPartitioning, -1, "Divide workload into partitions during dispatch, -1: default, 0: disabled, 1: enabled") -DECLARE_DEBUG_VARIABLE(int32_t, UpdateTaskCountFromWait, -1, " Do not update task count after each enqueue, but send update request while wait, -1: default(disabled), 0: disabled, 1: enabled") +DECLARE_DEBUG_VARIABLE(int32_t, UpdateTaskCountFromWait, -1, " Do not update task count after each enqueue, but send update request while wait, -1: default(disabled), 0: disabled, 1: enabled on gpgpue engine with direct submission, 2: enabled on any direct submission, 3: enabled") +DECLARE_DEBUG_VARIABLE(int32_t, EnableTimestampWait, -1, "Wait using timestamps, -1: default(disabled), 0: disabled, 1: enabled where UpdateTaskCountFromWait enabled, 2: enabled on gpgpue engine with direct submission, 3: enabled on any direct submission, 4: enabled") DECLARE_DEBUG_VARIABLE(int32_t, DeferOsContextInitialization, -1, "-1: default, 0: create all contexts immediately, 1: defer, if possible") DECLARE_DEBUG_VARIABLE(int32_t, UsmInitialPlacement, -1, "-1: default, 0: optimize for first CPU access, 1: optimize for first GPU access") DECLARE_DEBUG_VARIABLE(int32_t, ForceHostPointerImport, -1, "-1: default, 0: disable, 1: enable, Forces the driver to import every host pointer coming into driver, WARNING this is not spec complaint.") diff --git a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp index 7566161834..6d20ad2f50 100644 --- a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp +++ b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp @@ -278,7 +278,7 @@ TEST(CommandStreamReceiverSimpleTest, givenCsrWhenSubmitiingBatchBufferThenTaskC HWTEST_F(CommandStreamReceiverTest, givenUpdateTaskCountFromWaitWhenSubmitiingBatchBufferThenTaskCountIsIncrementedAndLatestsValuesSetCorrectly) { DebugManagerStateRestore restorer; - DebugManager.flags.UpdateTaskCountFromWait.set(1); + DebugManager.flags.UpdateTaskCountFromWait.set(3); MockCsrHw csr(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); @@ -361,6 +361,31 @@ HWTEST_F(CommandStreamReceiverTest, givenNoDirectSubmissionWhenCheckTaskCountFro EXPECT_FALSE(csr.isUpdateTagFromWaitEnabled()); } +HWTEST_F(CommandStreamReceiverTest, givenUpdateTaskCountFromWaitWhenCheckTaskCountFromWaitEnabledThenProperValueReturned) { + DebugManagerStateRestore restorer; + auto &csr = pDevice->getUltCommandStreamReceiver(); + + { + DebugManager.flags.UpdateTaskCountFromWait.set(0); + EXPECT_FALSE(csr.isUpdateTagFromWaitEnabled()); + } + + { + DebugManager.flags.UpdateTaskCountFromWait.set(1); + EXPECT_EQ(csr.isUpdateTagFromWaitEnabled(), csr.isDirectSubmissionEnabled()); + } + + { + DebugManager.flags.UpdateTaskCountFromWait.set(2); + EXPECT_EQ(csr.isUpdateTagFromWaitEnabled(), csr.isAnyDirectSubmissionEnabled()); + } + + { + DebugManager.flags.UpdateTaskCountFromWait.set(3); + EXPECT_TRUE(csr.isUpdateTagFromWaitEnabled()); + } +} + struct InitDirectSubmissionFixture { void SetUp() { DebugManager.flags.EnableDirectSubmission.set(1);