diff --git a/opencl/source/command_queue/command_queue_hw_base.inl b/opencl/source/command_queue/command_queue_hw_base.inl index 5386b0e646..16d5082fd4 100644 --- a/opencl/source/command_queue/command_queue_hw_base.inl +++ b/opencl/source/command_queue/command_queue_hw_base.inl @@ -210,7 +210,7 @@ bool CommandQueueHw::isGpgpuSubmissionForBcsRequired(bool queueBlocked, bool required = (latestSentEnqueueType != EnqueueProperties::Operation::Blit) && (latestSentEnqueueType != EnqueueProperties::Operation::None) && - (isCacheFlushForBcsRequired() || !getGpgpuCommandStreamReceiver().isLatestTaskCountFlushed()); + (isCacheFlushForBcsRequired() || !(getGpgpuCommandStreamReceiver().getDispatchMode() == DispatchMode::ImmediateDispatch || getGpgpuCommandStreamReceiver().isLatestTaskCountFlushed())); if (DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.get() == 1) { required = true; diff --git a/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp b/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp index 622a90b420..e41a1af3bc 100644 --- a/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp +++ b/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp @@ -1802,6 +1802,44 @@ HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushN EXPECT_EQ(2u, gpgpuCsr->peekTaskCount()); } +HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenImmediateDispatchCacheFlushNotRequiredAndEnqueueNotFlushedWhenDoingBcsCopyThenSubmitOnlyOnceAfterEnqueue) { + auto mockCommandQueue = static_cast *>(commandQueue.get()); + EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType); + + DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1); + DebugManager.flags.PerformImplicitFlushForNewResource.set(0); + DebugManager.flags.PerformImplicitFlushForIdleGpu.set(0); + + mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true; + mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = false; + mockCommandQueue->getGpgpuCommandStreamReceiver().overrideDispatchPolicy(DispatchMode::ImmediateDispatch); + mockCommandQueue->getGpgpuCommandStreamReceiver().postInitFlagsSetup(); + + auto buffer = createBuffer(1, false); + buffer->forceDisallowCPUCopy = true; + int hostPtr = 0; + + commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); + EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType); + EXPECT_EQ(0u, gpgpuCsr->peekTaskCount()); + + commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); + EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType); + EXPECT_EQ(0u, gpgpuCsr->peekTaskCount()); + + commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); + EXPECT_EQ(EnqueueProperties::Operation::GpuKernel, mockCommandQueue->latestSentEnqueueType); + EXPECT_EQ(1u, gpgpuCsr->peekTaskCount()); + + commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); + EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType); + EXPECT_EQ(1u, gpgpuCsr->peekTaskCount()); + + commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); + EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType); + EXPECT_EQ(1u, gpgpuCsr->peekTaskCount()); +} + HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushNotRequiredWhenDoingBcsCopyAfterBarrierThenSubmitToGpgpu) { auto mockCommandQueue = static_cast *>(commandQueue.get()); EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType); diff --git a/shared/source/command_stream/command_stream_receiver.h b/shared/source/command_stream/command_stream_receiver.h index 46bc7b05cb..d0286c9fc3 100644 --- a/shared/source/command_stream/command_stream_receiver.h +++ b/shared/source/command_stream/command_stream_receiver.h @@ -331,6 +331,9 @@ class CommandStreamReceiver { } uint32_t getCompletionValue(const GraphicsAllocation &gfxAllocation); + DispatchMode getDispatchMode() const { + return this->dispatchMode; + } protected: void cleanupResources();