Synchronize blit enqueue after barrier call

Change-Id: I0349dc5b1581ecb142bdab881877450366bcdb86
Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
Related-To: NEO-3020
This commit is contained in:
Dunajski, Bartosz
2019-10-03 14:38:49 +02:00
committed by sys_ocldev
parent 0f122fbcee
commit d3e583f7f4
12 changed files with 187 additions and 24 deletions

View File

@@ -44,9 +44,11 @@ HWTEST_F(EnqueueHandlerTest, GivenCommandStreamWithoutKernelWhenCommandEnqueuedT
Surface *surfaces[] = {surface.get()};
auto blocking = true;
TimestampPacketContainer previousTimestampPacketNodes;
TimestampPacketContainer barrierTimestampPacketNodes;
EnqueueProperties enqueueProperties(false, false, false, true, nullptr);
mockCmdQ->enqueueCommandWithoutKernel(surfaces, 1, mockCmdQ->getCS(0), 0, blocking, enqueueProperties, &previousTimestampPacketNodes, eventsRequest, eventBuilder, 0);
mockCmdQ->enqueueCommandWithoutKernel(surfaces, 1, mockCmdQ->getCS(0), 0, blocking, enqueueProperties, &previousTimestampPacketNodes,
barrierTimestampPacketNodes, eventsRequest, eventBuilder, 0);
EXPECT_EQ(allocation->getTaskCount(mockCmdQ->getGpgpuCommandStreamReceiver().getOsContext().getContextId()), 1u);
}
@@ -60,6 +62,7 @@ HWTEST_F(EnqueueHandlerTest, givenNonBlitPropertyWhenEnqueueIsBlockedThenDontReg
auto blockedCommandsDataForDependencyFlush = new KernelOperation(commandStream, *csr.getInternalAllocationStorage());
TimestampPacketContainer previousTimestampPacketNodes;
TimestampPacketContainer barrierTimestampPacketNodes;
MultiDispatchInfo multiDispatchInfo;
EventsRequest eventsRequest(0, nullptr, nullptr);
EventBuilder eventBuilder;
@@ -69,7 +72,7 @@ HWTEST_F(EnqueueHandlerTest, givenNonBlitPropertyWhenEnqueueIsBlockedThenDontReg
auto blockedCommandsData = std::unique_ptr<KernelOperation>(blockedCommandsDataForDependencyFlush);
Surface *surfaces[] = {nullptr};
mockCmdQ->enqueueBlocked(CL_COMMAND_MARKER, surfaces, size_t(0), multiDispatchInfo, &previousTimestampPacketNodes,
blockedCommandsData, enqueuePropertiesForDependencyFlush, eventsRequest,
barrierTimestampPacketNodes, blockedCommandsData, enqueuePropertiesForDependencyFlush, eventsRequest,
eventBuilder, std::unique_ptr<PrintfHandler>(nullptr));
EXPECT_FALSE(blockedCommandsDataForDependencyFlush->blitEnqueue);
}
@@ -84,6 +87,7 @@ HWTEST_F(EnqueueHandlerTest, givenBlitPropertyWhenEnqueueIsBlockedThenRegisterBl
auto blockedCommandsDataForBlitEnqueue = new KernelOperation(commandStream, *csr.getInternalAllocationStorage());
TimestampPacketContainer previousTimestampPacketNodes;
TimestampPacketContainer barrierTimestampPacketNodes;
MultiDispatchInfo multiDispatchInfo;
EventsRequest eventsRequest(0, nullptr, nullptr);
EventBuilder eventBuilder;
@@ -96,7 +100,7 @@ HWTEST_F(EnqueueHandlerTest, givenBlitPropertyWhenEnqueueIsBlockedThenRegisterBl
auto blockedCommandsData = std::unique_ptr<KernelOperation>(blockedCommandsDataForBlitEnqueue);
Surface *surfaces[] = {nullptr};
mockCmdQ->enqueueBlocked(CL_COMMAND_READ_BUFFER, surfaces, size_t(0), multiDispatchInfo, &previousTimestampPacketNodes,
blockedCommandsData, enqueuePropertiesForBlitEnqueue, eventsRequest,
barrierTimestampPacketNodes, blockedCommandsData, enqueuePropertiesForBlitEnqueue, eventsRequest,
eventBuilder, std::unique_ptr<PrintfHandler>(nullptr));
EXPECT_TRUE(blockedCommandsDataForBlitEnqueue->blitEnqueue);
EXPECT_EQ(blitProperties.srcAllocation, blockedCommandsDataForBlitEnqueue->blitProperties.srcAllocation);
@@ -112,11 +116,13 @@ HWTEST_F(DispatchFlagsTests, whenEnqueueCommandWithoutKernelThenPassCorrectDispa
auto blocking = true;
TimestampPacketContainer previousTimestampPacketNodes;
TimestampPacketContainer barrierTimestampPacketNodes;
EventsRequest eventsRequest(0, nullptr, nullptr);
EventBuilder eventBuilder;
EnqueueProperties enqueueProperties(false, false, false, true, nullptr);
mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, mockCmdQ->getCS(0), 0, blocking, enqueueProperties, &previousTimestampPacketNodes, eventsRequest, eventBuilder, 0);
mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, mockCmdQ->getCS(0), 0, blocking, enqueueProperties, &previousTimestampPacketNodes,
barrierTimestampPacketNodes, eventsRequest, eventBuilder, 0);
EXPECT_EQ(blocking, mockCsr->passedDispatchFlags.blocking);
EXPECT_FALSE(mockCsr->passedDispatchFlags.implicitFlush);
@@ -140,6 +146,7 @@ HWTEST_F(DispatchFlagsTests, givenBlitEnqueueWhenDispatchingCommandsWithoutKerne
auto blocking = true;
TimestampPacketContainer previousTimestampPacketNodes;
TimestampPacketContainer barrierTimestampPacketNodes;
EventsRequest eventsRequest(0, nullptr, nullptr);
EventBuilder eventBuilder;
BuiltinOpParams builtinOpParams;
@@ -149,11 +156,12 @@ HWTEST_F(DispatchFlagsTests, givenBlitEnqueueWhenDispatchingCommandsWithoutKerne
multiDispatchInfo.setBuiltinOpParams(builtinOpParams);
mockCmdQ->obtainNewTimestampPacketNodes(1, previousTimestampPacketNodes, true);
BlitProperties blitProperties = mockCmdQ->processDispatchForBlitEnqueue(multiDispatchInfo, previousTimestampPacketNodes, eventsRequest,
mockCmdQ->getCS(0), 0, false);
BlitProperties blitProperties = mockCmdQ->processDispatchForBlitEnqueue(multiDispatchInfo, previousTimestampPacketNodes, barrierTimestampPacketNodes,
eventsRequest, mockCmdQ->getCS(0), 0, false);
EnqueueProperties enqueueProperties(true, false, false, false, &blitProperties);
mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, mockCmdQ->getCS(0), 0, blocking, enqueueProperties, &previousTimestampPacketNodes, eventsRequest, eventBuilder, 0);
mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, mockCmdQ->getCS(0), 0, blocking, enqueueProperties, &previousTimestampPacketNodes,
barrierTimestampPacketNodes, eventsRequest, eventBuilder, 0);
EXPECT_TRUE(mockCsr->passedDispatchFlags.implicitFlush);
EXPECT_TRUE(mockCsr->passedDispatchFlags.guardCommandBufferWithPipeControl);
@@ -173,6 +181,7 @@ HWTEST_F(DispatchFlagsTests, givenN1EnabledWhenDispatchingWithoutKernelTheAllowO
auto buffer = std::unique_ptr<Buffer>(Buffer::create(context.get(), 0, 1, nullptr, retVal));
TimestampPacketContainer previousTimestampPacketNodes;
TimestampPacketContainer barrierTimestampPacketNodes;
EventsRequest eventsRequest(0, nullptr, nullptr);
EventBuilder eventBuilder;
@@ -184,17 +193,19 @@ HWTEST_F(DispatchFlagsTests, givenN1EnabledWhenDispatchingWithoutKernelTheAllowO
multiDispatchInfo.setBuiltinOpParams(builtinOpParams);
mockCmdQ->obtainNewTimestampPacketNodes(1, previousTimestampPacketNodes, true);
BlitProperties blitProperties = mockCmdQ->processDispatchForBlitEnqueue(multiDispatchInfo, previousTimestampPacketNodes, eventsRequest,
mockCmdQ->getCS(0), 0, false);
BlitProperties blitProperties = mockCmdQ->processDispatchForBlitEnqueue(multiDispatchInfo, previousTimestampPacketNodes, barrierTimestampPacketNodes,
eventsRequest, mockCmdQ->getCS(0), 0, false);
EnqueueProperties enqueueProperties(true, false, false, false, &blitProperties);
enqueueProperties.blitProperties = &blitProperties;
mockCsr->nTo1SubmissionModelEnabled = false;
mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, mockCmdQ->getCS(0), 0, blocked, enqueueProperties, &previousTimestampPacketNodes, eventsRequest, eventBuilder, 0);
mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, mockCmdQ->getCS(0), 0, blocked, enqueueProperties, &previousTimestampPacketNodes,
barrierTimestampPacketNodes, eventsRequest, eventBuilder, 0);
EXPECT_FALSE(mockCsr->passedDispatchFlags.outOfOrderExecutionAllowed);
mockCsr->nTo1SubmissionModelEnabled = true;
mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, mockCmdQ->getCS(0), 0, blocked, enqueueProperties, &previousTimestampPacketNodes, eventsRequest, eventBuilder, 0);
mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, mockCmdQ->getCS(0), 0, blocked, enqueueProperties, &previousTimestampPacketNodes,
barrierTimestampPacketNodes, eventsRequest, eventBuilder, 0);
EXPECT_TRUE(mockCsr->passedDispatchFlags.outOfOrderExecutionAllowed);
}

View File

@@ -14,6 +14,7 @@ struct DispatchFlagsHelper {
static DispatchFlags createDefaultDispatchFlags() {
return DispatchFlags(
{}, //csrDependencies
nullptr, //barrierTimestampPacketNodes
{}, //pipelineSelectArgs
nullptr, //flushStampReference
QueueThrottle::MEDIUM, //throttle
@@ -34,4 +35,4 @@ struct DispatchFlagsHelper {
false //epilogueRequired
);
}
};
};

View File

@@ -1591,6 +1591,26 @@ HWTEST_F(TimestampPacketTests, givenPipeControlRequestWhenEstimatingCsrStreamSiz
EXPECT_EQ(sizeWithPcRequest, extendedSize);
}
HWTEST_F(TimestampPacketTests, givenPipeControlRequestWithBarrierWriteWhenEstimatingCsrStreamSizeThenAddSizeForPipeControlForWrite) {
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags();
TimestampPacketContainer barrierTimestampPacketNode;
barrierTimestampPacketNode.add(csr.getTimestampPacketAllocator()->getTag());
flags.barrierTimestampPacketNodes = &barrierTimestampPacketNode;
csr.stallingPipeControlOnNextFlushRequired = false;
auto sizeWithoutPcRequest = device->getUltCommandStreamReceiver<FamilyType>().getRequiredCmdStreamSize(flags, *device.get());
csr.stallingPipeControlOnNextFlushRequired = true;
auto sizeWithPcRequest = device->getUltCommandStreamReceiver<FamilyType>().getRequiredCmdStreamSize(flags, *device.get());
size_t extendedSize = sizeWithoutPcRequest + PipeControlHelper<FamilyType>::getSizeForPipeControlWithPostSyncOperation(device->getHardwareInfo());
EXPECT_EQ(sizeWithPcRequest, extendedSize);
}
HWTEST_F(TimestampPacketTests, givenInstructionCacheRequesWhenSizeIsEstimatedThenPipeControlIsAdded) {
auto &csr = device->getUltCommandStreamReceiver<FamilyType>();
DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags();

View File

@@ -931,6 +931,80 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenReadBufferEnqueueWhenProgrammingCommandS
EXPECT_EQ(initialTaskCount + 1, queueCsr->peekTaskCount());
}
HWTEST_TEMPLATED_F(BcsBufferTests, givenPipeControlRequestWhenDispatchingBlitEnqueueThenWaitPipeControlOnBcsEngine) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
auto cmdQ = clUniquePtr(new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr));
auto bcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(cmdQ->getBcsCommandStreamReceiver());
auto queueCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(cmdQ->gpgpuEngine->commandStreamReceiver);
queueCsr->stallingPipeControlOnNextFlushRequired = true;
cl_int retVal = CL_SUCCESS;
auto buffer = clUniquePtr<Buffer>(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal));
buffer->forceDisallowCPUCopy = true;
void *hostPtr = reinterpret_cast<void *>(0x12340000);
cmdQ->enqueueWriteBuffer(buffer.get(), true, 0, 1, hostPtr, nullptr, 0, nullptr, nullptr);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(queueCsr->commandStream);
uint64_t pipeControlWriteAddress = 0;
for (auto &cmd : hwParser.cmdList) {
if (auto pipeControlCmd = genCmdCast<PIPE_CONTROL *>(cmd)) {
if (pipeControlCmd->getPostSyncOperation() != PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
continue;
}
EXPECT_TRUE(pipeControlCmd->getCommandStreamerStallEnable());
auto addressLow = static_cast<uint64_t>(pipeControlCmd->getAddress());
auto addressHigh = static_cast<uint64_t>(pipeControlCmd->getAddressHigh());
pipeControlWriteAddress = (addressHigh << 32) | addressLow;
break;
}
}
EXPECT_NE(0u, pipeControlWriteAddress);
HardwareParse bcsHwParser;
bcsHwParser.parseCommands<FamilyType>(bcsCsr->commandStream);
auto semaphores = findAll<MI_SEMAPHORE_WAIT *>(bcsHwParser.cmdList.begin(), bcsHwParser.cmdList.end());
EXPECT_EQ(1u, semaphores.size());
EXPECT_EQ(pipeControlWriteAddress, genCmdCast<MI_SEMAPHORE_WAIT *>(*(semaphores[0]))->getSemaphoreGraphicsAddress());
}
HWTEST_TEMPLATED_F(BcsBufferTests, givenPipeControlRequestWhenDispatchingBlockedBlitEnqueueThenWaitPipeControlOnBcsEngine) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
auto cmdQ = clUniquePtr(new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr));
auto bcsCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(cmdQ->getBcsCommandStreamReceiver());
auto queueCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(cmdQ->gpgpuEngine->commandStreamReceiver);
queueCsr->stallingPipeControlOnNextFlushRequired = true;
cl_int retVal = CL_SUCCESS;
auto buffer = clUniquePtr<Buffer>(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal));
buffer->forceDisallowCPUCopy = true;
void *hostPtr = reinterpret_cast<void *>(0x12340000);
UserEvent userEvent;
cl_event waitlist = &userEvent;
cmdQ->enqueueWriteBuffer(buffer.get(), false, 0, 1, hostPtr, nullptr, 1, &waitlist, nullptr);
userEvent.setStatus(CL_COMPLETE);
HardwareParse bcsHwParser;
bcsHwParser.parseCommands<FamilyType>(bcsCsr->commandStream);
auto semaphores = findAll<MI_SEMAPHORE_WAIT *>(bcsHwParser.cmdList.begin(), bcsHwParser.cmdList.end());
EXPECT_EQ(1u, semaphores.size());
cmdQ->isQueueBlocked();
}
HWTEST_TEMPLATED_F(BcsBufferTests, givenReadOrWriteBufferOperationWithoutKernelWhenEstimatingCommandsSizeThenReturnCorrectValue) {
auto cmdQ = clUniquePtr(new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr));
CsrDependencies csrDependencies;