Update dispatch flags for enqueueCommandWithoutKernel

Change-Id: I51c8afee84b40db12901f048c03fd92fa3808c0c
Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
Related-To: NEO-3020
This commit is contained in:
Dunajski, Bartosz
2019-07-19 14:31:12 +02:00
committed by sys_ocldev
parent 7e404e79ff
commit 4e98d34471
7 changed files with 64 additions and 13 deletions

View File

@ -859,6 +859,9 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
dispatchFlags.multiEngineQueue = multiEngineQueue;
dispatchFlags.preemptionMode = device->getPreemptionMode();
dispatchFlags.implicitFlush = blitEnqueue;
dispatchFlags.guardCommandBufferWithPipeControl = true;
dispatchFlags.outOfOrderExecutionAllowed = getGpgpuCommandStreamReceiver().isNTo1SubmissionModelEnabled();
if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
dispatchFlags.csrDependencies.fillFromEventsRequestAndMakeResident(eventsRequest, getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OutOfCsr);
}

View File

@ -192,7 +192,7 @@ HWTEST_F(BarrierTest, eventWithWaitDependenciesShouldSync) {
// in this case only cmdQ raises the taskLevel why csr stay intact
EXPECT_EQ(8u, pCmdQ->taskLevel);
if (csr.peekTimestampPacketWriteEnabled()) {
EXPECT_EQ(8u, commandStreamReceiver.peekTaskLevel());
EXPECT_EQ(pCmdQ->taskLevel + 1, commandStreamReceiver.peekTaskLevel());
} else {
EXPECT_EQ(7u, commandStreamReceiver.peekTaskLevel());
}

View File

@ -64,12 +64,13 @@ HWTEST_F(EnqueueHandlerTest, whenEnqueueCommandWithoutKernelThenPassCorrectDispa
EXPECT_EQ(blocking, mockCsr->passedDispatchFlags.blocking);
EXPECT_FALSE(mockCsr->passedDispatchFlags.implicitFlush);
EXPECT_TRUE(mockCsr->passedDispatchFlags.guardCommandBufferWithPipeControl);
EXPECT_EQ(mockCmdQ->isMultiEngineQueue(), mockCsr->passedDispatchFlags.multiEngineQueue);
EXPECT_EQ(pDevice->getPreemptionMode(), mockCsr->passedDispatchFlags.preemptionMode);
mockCmdQ->gpgpuEngine->commandStreamReceiver = oldCsr;
}
HWTEST_F(EnqueueHandlerTest, givenBlitEnqueueWhenDispatchingCommandsWithoutKernelThenDoImplicitflush) {
HWTEST_F(EnqueueHandlerTest, givenBlitEnqueueWhenDispatchingCommandsWithoutKernelThenDoImplicitFlush) {
auto executionEnvironment = pDevice->getExecutionEnvironment();
auto mockCsr = std::make_unique<MockCsrHw2<FamilyType>>(*executionEnvironment);
auto mockCmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context, pDevice, nullptr);
@ -86,16 +87,47 @@ HWTEST_F(EnqueueHandlerTest, givenBlitEnqueueWhenDispatchingCommandsWithoutKerne
mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, mockCmdQ->getCS(0), 0, blocking, true, &previousTimestampPacketNodes, eventsRequest, eventBuilder, 0);
EXPECT_TRUE(mockCsr->passedDispatchFlags.implicitFlush);
EXPECT_TRUE(mockCsr->passedDispatchFlags.guardCommandBufferWithPipeControl);
mockCmdQ->gpgpuEngine->commandStreamReceiver = oldCsr;
}
HWTEST_F(EnqueueHandlerTest, GivenCommandStreamWithoutKernelAndZeroSurfacesWhenEnqueuedHandlerThenUsedSizeEqualZero) {
HWTEST_F(EnqueueHandlerTest, givenN1EnabledWhenDispatchingWithoutKernelTheAllowOutOfOrderExecution) {
auto executionEnvironment = pDevice->getExecutionEnvironment();
auto mockCsr = std::make_unique<MockCsrHw2<FamilyType>>(*executionEnvironment);
auto mockCmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context, pDevice, nullptr);
mockCsr->setupContext(*mockCmdQ->gpgpuEngine->osContext);
mockCsr->initializeTagAllocation();
auto oldCsr = mockCmdQ->gpgpuEngine->commandStreamReceiver;
mockCmdQ->gpgpuEngine->commandStreamReceiver = mockCsr.get();
mockCsr->createPreemptionAllocation();
TimestampPacketContainer previousTimestampPacketNodes;
EventsRequest eventsRequest(0, nullptr, nullptr);
EventBuilder eventBuilder;
bool blocked = false;
mockCsr->nTo1SubmissionModelEnabled = false;
mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, mockCmdQ->getCS(0), 0, blocked, true, &previousTimestampPacketNodes, eventsRequest, eventBuilder, 0);
EXPECT_FALSE(mockCsr->passedDispatchFlags.outOfOrderExecutionAllowed);
mockCsr->nTo1SubmissionModelEnabled = true;
mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, mockCmdQ->getCS(0), 0, blocked, true, &previousTimestampPacketNodes, eventsRequest, eventBuilder, 0);
EXPECT_TRUE(mockCsr->passedDispatchFlags.outOfOrderExecutionAllowed);
mockCmdQ->gpgpuEngine->commandStreamReceiver = oldCsr;
}
HWTEST_F(EnqueueHandlerTest, GivenCommandStreamWithoutKernelAndZeroSurfacesWhenEnqueuedHandlerThenProgramPipeControl) {
std::unique_ptr<MockCommandQueueWithCacheFlush<FamilyType>> mockCmdQ(new MockCommandQueueWithCacheFlush<FamilyType>(context, pDevice, 0));
mockCmdQ->commandRequireCacheFlush = true;
mockCmdQ->template enqueueHandler<CL_COMMAND_MARKER>(nullptr, 0, false, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(mockCmdQ->getCS(0).getUsed(), 0u);
auto requiredCmdStreamSize = alignUp(PipeControlHelper<FamilyType>::getSizeForPipeControlWithPostSyncOperation(),
MemoryConstants::cacheLineSize);
EXPECT_EQ(mockCmdQ->getCS(0).getUsed(), requiredCmdStreamSize);
}
HWTEST_F(EnqueueHandlerTest, givenTimestampPacketWriteEnabledAndCommandWithCacheFlushWhenEnqueueingHandlerThenObtainNewStamp) {
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();

View File

@ -222,7 +222,7 @@ TEST_F(EventTests, eventPassedToEnqueueMarkerHasTheSameLevelAsPreviousCommand) {
ASSERT_EQ(CL_SUCCESS, retVal);
if (csr.peekTimestampPacketWriteEnabled()) {
EXPECT_EQ(csr.peekTaskLevel(), pEvent2->taskLevel);
EXPECT_EQ(csr.peekTaskLevel(), pCmdQ->taskLevel + 1);
} else {
EXPECT_EQ(csr.peekTaskLevel(), pEvent->taskLevel + 1);
}

View File

@ -138,12 +138,13 @@ HWTEST_F(MarkerTest, returnedEventShouldHaveEqualDepthToLastCommandPacketInComma
HWTEST_F(MarkerTest, eventWithWaitDependenciesShouldSync) {
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
uint32_t initialTaskLevel = 7;
// In N:1, CSR is always highest task level.
commandStreamReceiver.taskLevel = 7;
commandStreamReceiver.taskLevel = initialTaskLevel;
// In N:1, pCmdQ.level <= CSR.level
pCmdQ->taskLevel = 7;
pCmdQ->taskLevel = initialTaskLevel;
// In N:1, event.level <= pCmdQ.level
Event event1(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 5, 15);
@ -165,7 +166,12 @@ HWTEST_F(MarkerTest, eventWithWaitDependenciesShouldSync) {
std::unique_ptr<Event> pEvent((Event *)(event));
// Should sync CSR & CmdQ levels.
EXPECT_EQ(commandStreamReceiver.peekTaskLevel(), pCmdQ->taskLevel);
if (pCmdQ->getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
EXPECT_EQ(initialTaskLevel, pCmdQ->taskLevel);
EXPECT_EQ(initialTaskLevel + 1, commandStreamReceiver.peekTaskLevel());
} else {
EXPECT_EQ(commandStreamReceiver.peekTaskLevel(), pCmdQ->taskLevel);
}
EXPECT_EQ(pCmdQ->taskLevel, pEvent->taskLevel);
EXPECT_EQ(7u, pEvent->taskLevel);
}

View File

@ -71,12 +71,17 @@ HWTEST_F(GetSizeRequiredTest, enqueueMarker) {
&eventReturned);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(0u, commandStream.getUsed() - usedBeforeCS);
size_t expectedStreamSize = 0;
if (pCmdQ->getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
expectedStreamSize = alignUp(PipeControlHelper<FamilyType>::getSizeForPipeControlWithPostSyncOperation(),
+MemoryConstants::cacheLineSize);
}
EXPECT_EQ(expectedStreamSize, commandStream.getUsed() - usedBeforeCS);
EXPECT_EQ(0u, dsh->getUsed() - usedBeforeDSH);
EXPECT_EQ(0u, ioh->getUsed() - usedBeforeIOH);
EXPECT_EQ(0u, ssh->getUsed() - usedBeforeSSH);
delete (Event *)eventReturned;
clReleaseEvent(eventReturned);
}
HWTEST_F(GetSizeRequiredTest, enqueueBarrierDoesntConsumeAnySpace) {
@ -92,9 +97,13 @@ HWTEST_F(GetSizeRequiredTest, enqueueBarrierDoesntConsumeAnySpace) {
&eventReturned);
EXPECT_EQ(CL_SUCCESS, retVal);
size_t expectedSize = 0;
size_t expectedStreamSize = 0;
if (pCmdQ->getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
expectedStreamSize = alignUp(PipeControlHelper<FamilyType>::getSizeForPipeControlWithPostSyncOperation(),
+MemoryConstants::cacheLineSize);
}
EXPECT_EQ(expectedSize, commandStream.getUsed() - usedBeforeCS);
EXPECT_EQ(expectedStreamSize, commandStream.getUsed() - usedBeforeCS);
delete (Event *)eventReturned;
clReleaseEvent(eventReturned);
}

View File

@ -162,6 +162,7 @@ class MockCsrHw2 : public CommandStreamReceiverHw<GfxFamily> {
using CommandStreamReceiver::isPreambleSent;
using CommandStreamReceiver::lastSentCoherencyRequest;
using CommandStreamReceiver::mediaVfeStateDirty;
using CommandStreamReceiver::nTo1SubmissionModelEnabled;
using CommandStreamReceiver::taskCount;
using CommandStreamReceiver::taskLevel;
using CommandStreamReceiver::timestampPacketWriteEnabled;