Fix dependency resolution when marker with profiling is programmed.

We need to make sure it properly follows dependencies.
Before the change, code was not programming semaphores.

Related-to: NEO-6444

Signed-off-by: Michal Mrozek <michal.mrozek@intel.com>
This commit is contained in:
Michal Mrozek
2022-01-18 10:43:38 +00:00
committed by Compute-Runtime-Automation
parent 40483acd17
commit dd63f96836
2 changed files with 31 additions and 0 deletions

View File

@@ -236,6 +236,10 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
} }
} }
if (isMarkerWithProfiling) {
flushDependenciesForNonKernelCommand = true;
}
if (flushDependenciesForNonKernelCommand) { if (flushDependenciesForNonKernelCommand) {
TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer<GfxFamily>(commandStream, csrDeps); TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer<GfxFamily>(commandStream, csrDeps);
} }

View File

@@ -1541,6 +1541,33 @@ HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, givenEventFromCpuCopyWhenWaitingFo
clReleaseEvent(outEvent2); clReleaseEvent(outEvent2);
} }
HWTEST_TEMPLATED_F(BlitEnqueueTaskCountTests, givenMarkerThatFollowsCopyOperationWhenItIsWaitedItHasProperDependencies) {
auto buffer = createBuffer(1, false);
int hostPtr = 0;
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
auto mockCmdQueue = static_cast<MockCommandQueueHw<FamilyType> *>(commandQueue.get());
mockCmdQueue->commandQueueProperties |= CL_QUEUE_PROFILING_ENABLE;
cl_event outEvent1;
commandQueue->enqueueKernel(mockKernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
commandQueue->enqueueWriteBuffer(buffer.get(), false, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr);
auto offset = mockCmdQueue->getCS(0).getUsed();
//marker needs to program semaphore
commandQueue->enqueueMarkerWithWaitList(0, nullptr, &outEvent1);
auto cmdListQueue = getCmdList<FamilyType>(mockCmdQueue->getCS(0), offset);
expectCommand<MI_SEMAPHORE_WAIT>(cmdListQueue.begin(), cmdListQueue.end());
clWaitForEvents(1, &outEvent1);
clReleaseEvent(outEvent1);
}
using BlitEnqueueWithDisabledGpgpuSubmissionTests = BlitEnqueueTests<1>; using BlitEnqueueWithDisabledGpgpuSubmissionTests = BlitEnqueueTests<1>;
HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushRequiredWhenDoingBcsCopyThenSubmitToGpgpuOnlyIfPreviousEnqueueWasGpgpu) { HWTEST_TEMPLATED_F(BlitEnqueueWithDisabledGpgpuSubmissionTests, givenCacheFlushRequiredWhenDoingBcsCopyThenSubmitToGpgpuOnlyIfPreviousEnqueueWasGpgpu) {