While enqueue blit do not flush gpgpu if already flushed

Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
Lukasz Jobczyk 2022-03-03 09:56:18 +00:00 committed by Compute-Runtime-Automation
parent f193efec2f
commit 999c6424a4
4 changed files with 76 additions and 2 deletions

View File

@ -196,7 +196,9 @@ bool CommandQueueHw<Family>::isGpgpuSubmissionForBcsRequired(bool queueBlocked,
return true;
}
bool required = (latestSentEnqueueType != EnqueueProperties::Operation::Blit) && (latestSentEnqueueType != EnqueueProperties::Operation::None);
bool required = (latestSentEnqueueType != EnqueueProperties::Operation::Blit) &&
(latestSentEnqueueType != EnqueueProperties::Operation::None) &&
(isCacheFlushForBcsRequired() || !getGpgpuCommandStreamReceiver().isLatestTaskCountFlushed());
if (DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.get() == 1) {
required = true;

View File

@ -1750,6 +1750,44 @@ HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushN
EXPECT_EQ(EnqueueProperties::Operation::GpuKernel, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
}
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushNotRequiredAndEnqueueNotFlushedWhenDoingBcsCopyThenSubmitOnlyOnceAfterEnqueue) {
auto mockCommandQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
EXPECT_EQ(EnqueueProperties::Operation::None, mockCommandQueue->latestSentEnqueueType);
DebugManager.flags.ForceGpgpuSubmissionForBcsEnqueue.set(-1);
DebugManager.flags.PerformImplicitFlushForNewResource.set(0);
DebugManager.flags.PerformImplicitFlushForIdleGpu.set(0);
mockCommandQueue->overrideIsCacheFlushForBcsRequired.enabled = true;
mockCommandQueue->overrideIsCacheFlushForBcsRequired.returnValue = false;
mockCommandQueue->getGpgpuCommandStreamReceiver().overrideDispatchPolicy(DispatchMode::BatchedDispatch);
mockCommandQueue->getGpgpuCommandStreamReceiver().postInitFlagsSetup();
auto buffer = createBuffer(1, false);
buffer->forceDisallowCPUCopy = true;
int hostPtr = 0;
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(0u, gpgpuCsr->peekTaskCount());
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(0u, gpgpuCsr->peekTaskCount());
commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::GpuKernel, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(1u, gpgpuCsr->peekTaskCount());
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(EnqueueProperties::Operation::Blit, mockCommandQueue->latestSentEnqueueType);
EXPECT_EQ(2u, gpgpuCsr->peekTaskCount());

View File

@ -1550,10 +1550,11 @@ HWTEST_F(IoqCommandQueueHwBlitTest, givenGpgpuCsrWhenEnqueueingSubsequentBlitsTh
EXPECT_EQ(0, gpgpuCsr.ensureCommandBufferAllocationCalled);
}
HWTEST_F(IoqCommandQueueHwBlitTest, givenGpgpuCsrWhenEnqueueingBlitAfterKernelThenGpgpuCommandStreamIsObtained) {
HWTEST_F(IoqCommandQueueHwBlitTest, givenGpgpuCsrWhenEnqueueingBlitAfterNotFlushedKernelThenGpgpuCommandStreamIsObtained) {
auto &gpgpuCsr = pDevice->getUltCommandStreamReceiver<FamilyType>();
auto srcBuffer = std::unique_ptr<Buffer>{BufferHelper<>::create(pContext)};
auto dstBuffer = std::unique_ptr<Buffer>{BufferHelper<>::create(pContext)};
pCmdQ->getGpgpuCommandStreamReceiver().overrideDispatchPolicy(DispatchMode::BatchedDispatch);
MockKernelWithInternals mockKernelWithInternals(*pClDevice);
size_t offset = 0;
@ -1576,6 +1577,35 @@ HWTEST_F(IoqCommandQueueHwBlitTest, givenGpgpuCsrWhenEnqueueingBlitAfterKernelTh
EXPECT_NE(ensureCommandBufferAllocationCalledAfterKernel, gpgpuCsr.ensureCommandBufferAllocationCalled);
}
HWTEST_F(IoqCommandQueueHwBlitTest, givenGpgpuCsrWhenEnqueueingBlitAfterFlushedKernelThenGpgpuCommandStreamIsNotObtained) {
auto &gpgpuCsr = pDevice->getUltCommandStreamReceiver<FamilyType>();
auto srcBuffer = std::unique_ptr<Buffer>{BufferHelper<>::create(pContext)};
auto dstBuffer = std::unique_ptr<Buffer>{BufferHelper<>::create(pContext)};
DebugManagerStateRestore restorer;
DebugManager.flags.ForceCacheFlushForBcs.set(0);
MockKernelWithInternals mockKernelWithInternals(*pClDevice);
size_t offset = 0;
size_t size = 1;
cl_int retVal = pCmdQ->enqueueKernel(mockKernelWithInternals.mockKernel, 1, &offset, &size, &size, 0, nullptr, nullptr);
ASSERT_EQ(CL_SUCCESS, retVal);
EXPECT_NE(0, gpgpuCsr.ensureCommandBufferAllocationCalled);
const auto ensureCommandBufferAllocationCalledAfterKernel = gpgpuCsr.ensureCommandBufferAllocationCalled;
retVal = pCmdQ->enqueueCopyBuffer(
srcBuffer.get(),
dstBuffer.get(),
0,
0,
1,
0,
nullptr,
nullptr);
ASSERT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(ensureCommandBufferAllocationCalledAfterKernel, gpgpuCsr.ensureCommandBufferAllocationCalled);
}
HWTEST_F(OoqCommandQueueHwBlitTest, givenBlitAfterBarrierWhenEnqueueingCommandThenWaitForBarrierOnBlit) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;

View File

@ -192,6 +192,10 @@ class CommandStreamReceiver {
bool peekTimestampPacketWriteEnabled() const { return timestampPacketWriteEnabled; }
bool isLatestTaskCountFlushed() {
return this->peekLatestFlushedTaskCount() == this->peekTaskCount();
}
size_t defaultSshSize = 0u;
bool canUse4GbHeaps = true;