From 8e94d568a8dbbd218e2e62f89d6cdeb8e4f2ee76 Mon Sep 17 00:00:00 2001 From: Michal Mrozek Date: Wed, 2 Mar 2022 12:43:02 +0000 Subject: [PATCH] Do not set dcFlush in Immediate dispatch mode. Some devices do not need dcFlush. Setting it prevents further optimization of pipe controls which are not needed. Signed-off-by: Michal Mrozek --- ...and_stream_receiver_flush_task_2_tests.cpp | 44 +++++++++++++++++++ .../command_stream_receiver_hw_base.inl | 9 ++-- 2 files changed, 50 insertions(+), 3 deletions(-) diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp index e1e8ed4ca8..d5bdea18ea 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp @@ -80,6 +80,50 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenBlockedKernelNotRequiringDCFl buffer->release(); } +HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEnableUpdateTaskFromWaitWhenNonBlockingCallIsMadeThenNoPipeControlInsertedOnDevicesWithoutDCFlushRequirements) { + DebugManagerStateRestore restorer; + DebugManager.flags.UpdateTaskCountFromWait.set(3u); + typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; + MockContext ctx(pClDevice); + auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + commandStreamReceiver.timestampPacketWriteEnabled = false; + CommandQueueHw commandQueue(&ctx, pClDevice, 0, false); + size_t tempBuffer[] = {0, 1, 2}; + size_t dstBuffer[] = {0, 1, 2}; + cl_int retVal = 0; + + auto buffer = Buffer::create(&ctx, CL_MEM_USE_HOST_PTR, sizeof(tempBuffer), tempBuffer, retVal); + + commandQueue.enqueueWriteBuffer(buffer, CL_FALSE, 0, sizeof(tempBuffer), dstBuffer, nullptr, 0u, nullptr, 0); + + auto &commandStreamTask = *commandStreamReceiver.lastFlushedCommandStream; + + cmdList.clear(); + // Parse command list + parseCommands(commandStreamTask, 0); + + auto pipeControlExpected = MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo); + + auto itorPC = find(cmdList.begin(), cmdList.end()); + + if (pipeControlExpected) { + EXPECT_NE(cmdList.end(), itorPC); + if (UnitTestHelper::isPipeControlWArequired(pDevice->getHardwareInfo())) { + itorPC++; + itorPC = find(itorPC, cmdList.end()); + EXPECT_NE(cmdList.end(), itorPC); + } + + // Verify that the dcFlushEnabled bit is set in PC + auto pCmdWA = reinterpret_cast(*itorPC); + EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), pCmdWA->getDcFlushEnable()); + } else { + EXPECT_EQ(cmdList.end(), itorPC); + } + + buffer->release(); +} + HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenTaskCsPassedAsCommandStreamParamWhenFlushingTaskThenCompletionStampIsCorrect) { CommandQueueHw commandQueue(nullptr, pClDevice, 0, false); auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index ef2282622e..302af82324 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -187,6 +187,8 @@ CompletionStamp CommandStreamReceiverHw::flushTask( } const auto &hwInfo = peekHwInfo(); + auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); + bool updateTag = false; if (dispatchFlags.blocking || dispatchFlags.dcFlush || dispatchFlags.guardCommandBufferWithPipeControl) { if (this->dispatchMode == DispatchMode::ImmediateDispatch) { @@ -195,7 +197,10 @@ CompletionStamp CommandStreamReceiverHw::flushTask( levelClosed = true; //if we guard with ppc, flush dc as well to speed up completion latency if (dispatchFlags.guardCommandBufferWithPipeControl) { - dispatchFlags.dcFlush = true; + const auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily); + if (hwInfoConfig.isDcFlushAllowed()) { + dispatchFlags.dcFlush = true; + } } } @@ -255,8 +260,6 @@ CompletionStamp CommandStreamReceiverHw::flushTask( dispatchFlags.pipelineSelectArgs.specialPipelineSelectMode, hwInfo); - auto &hwHelper = HwHelper::get(peekHwInfo().platform.eRenderCoreFamily); - if (dispatchFlags.threadArbitrationPolicy == ThreadArbitrationPolicy::NotPresent) { if (this->streamProperties.stateComputeMode.threadArbitrationPolicy.value != -1) { // Reuse previous programming