performance(ocl): program barrier pc in taskStream

Program barrier to task stream, before next enqueue kernel.
This will reduce the number of batch buffer starts for sequences of
enqueue, barrier, enqueue, ... .

Related-To: NEO-8147

Signed-off-by: Dominik Dabek <dominik.dabek@intel.com>
This commit is contained in:
Dominik Dabek
2023-09-12 14:17:52 +00:00
committed by Compute-Runtime-Automation
parent e08d46085b
commit 1b7e178b25
23 changed files with 224 additions and 98 deletions

View File

@@ -811,7 +811,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenPipeControlRequestWhenDispatchingBlitEnq
}
}
HWTEST_TEMPLATED_F(BcsBufferTests, givenBarrierWithEmptyWaitlistWhenReleasingMultipleBlockedEnqueuesThenProgramBarrierOnce) {
HWTEST_TEMPLATED_F(BcsBufferTests, givenStallingCommandsOnNextFlushWhenReleasingMultipleBlockedEnqueuesThenProgramBarrierOnce) {
DebugManager.flags.OptimizeIoqBarriersHandling.set(0);
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
@@ -827,7 +827,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBarrierWithEmptyWaitlistWhenReleasingMul
cl_event waitlist0[] = {&userEvent0};
cl_event waitlist1[] = {&userEvent1};
cmdQ->enqueueBarrierWithWaitList(0, nullptr, nullptr);
cmdQ->setStallingCommandsOnNextFlush(true);
cmdQ->enqueueWriteBuffer(buffer.get(), false, 0, 1, hostPtr, nullptr, 1, waitlist0, nullptr);
cmdQ->enqueueWriteBuffer(buffer.get(), false, 0, 1, hostPtr, nullptr, 1, waitlist1, nullptr);