feat(ocl): move dependencies programming

Move out of csr dependencies programming to queue stream.
Correct method fillCsrDependenciesForTimestampPacketContainer.

Related-To: NEO-7321

Signed-off-by: Dominik Dabek <dominik.dabek@intel.com>
This commit is contained in:
Dominik Dabek
2023-02-17 15:36:35 +00:00
committed by Compute-Runtime-Automation
parent 2744501176
commit 22d7c2ea28
10 changed files with 142 additions and 93 deletions

View File

@@ -187,11 +187,12 @@ cl_int CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
if (computeCommandStreamReceiver.peekTimestampPacketWriteEnabled()) { if (computeCommandStreamReceiver.peekTimestampPacketWriteEnabled()) {
canUsePipeControlInsteadOfSemaphoresForOnCsrDependencies = this->peekLatestSentEnqueueOperation() == EnqueueProperties::Operation::GpuKernel && canUsePipeControlInsteadOfSemaphoresForOnCsrDependencies = this->peekLatestSentEnqueueOperation() == EnqueueProperties::Operation::GpuKernel &&
productHelper.isResolveDependenciesByPipeControlsSupported(hwInfo, this->isOOQEnabled()); productHelper.isResolveDependenciesByPipeControlsSupported(hwInfo, this->isOOQEnabled());
if (false == clearDependenciesForSubCapture && if (false == clearDependenciesForSubCapture) {
false == canUsePipeControlInsteadOfSemaphoresForOnCsrDependencies) { if (false == canUsePipeControlInsteadOfSemaphoresForOnCsrDependencies) {
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, computeCommandStreamReceiver, CsrDependencies::DependenciesType::OnCsr); eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, computeCommandStreamReceiver, CsrDependencies::DependenciesType::OnCsr);
}
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, computeCommandStreamReceiver, CsrDependencies::DependenciesType::OutOfCsr);
} }
auto allocator = computeCommandStreamReceiver.getTimestampPacketAllocator(); auto allocator = computeCommandStreamReceiver.getTimestampPacketAllocator();
size_t nodesCount = 0u; size_t nodesCount = 0u;
@@ -859,7 +860,6 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
const bool isHandlingBarrier = getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired(); const bool isHandlingBarrier = getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired();
if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled() && !clearDependenciesForSubCapture) { if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled() && !clearDependenciesForSubCapture) {
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(dispatchFlags.csrDependencies, getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OutOfCsr);
if (isHandlingBarrier) { if (isHandlingBarrier) {
fillCsrDependenciesWithLastBcsPackets(dispatchFlags.csrDependencies); fillCsrDependenciesWithLastBcsPackets(dispatchFlags.csrDependencies);
} }
@@ -1107,7 +1107,6 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
const bool isHandlingBarrier = getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired(); const bool isHandlingBarrier = getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired();
if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(dispatchFlags.csrDependencies, getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OutOfCsr);
if (isHandlingBarrier) { if (isHandlingBarrier) {
fillCsrDependenciesWithLastBcsPackets(dispatchFlags.csrDependencies); fillCsrDependenciesWithLastBcsPackets(dispatchFlags.csrDependencies);
} }

View File

@@ -238,6 +238,14 @@ TaskCountType Event::peekBcsTaskCountFromCommandQueue() {
} }
} }
bool Event::isBcsEvent() const {
return bcsState.isValid() && bcsState.taskCount > 0;
}
aub_stream::EngineType Event::getBcsEngineType() const {
return bcsState.engineType;
}
TaskCountType Event::getCompletionStamp() const { TaskCountType Event::getCompletionStamp() const {
return this->taskCount; return this->taskCount;
} }

View File

@@ -92,6 +92,8 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
void setupBcs(aub_stream::EngineType bcsEngineType); void setupBcs(aub_stream::EngineType bcsEngineType);
TaskCountType peekBcsTaskCountFromCommandQueue(); TaskCountType peekBcsTaskCountFromCommandQueue();
bool isBcsEvent() const;
aub_stream::EngineType getBcsEngineType() const;
TaskCountType getCompletionStamp() const; TaskCountType getCompletionStamp() const;
void updateCompletionStamp(TaskCountType taskCount, TaskCountType bcsTaskCount, TaskCountType tasklevel, FlushStamp flushStamp); void updateCompletionStamp(TaskCountType taskCount, TaskCountType bcsTaskCount, TaskCountType tasklevel, FlushStamp flushStamp);

View File

@@ -39,16 +39,21 @@ void EventsRequest::fillCsrDependenciesForTimestampPacketContainer(CsrDependenci
continue; continue;
} }
auto sameRootDevice = event->getCommandQueue()->getClDevice().getRootDeviceIndex() == currentCsr.getRootDeviceIndex(); const auto sameRootDevice = event->getCommandQueue()->getClDevice().getRootDeviceIndex() == currentCsr.getRootDeviceIndex();
if (!sameRootDevice) { if (!sameRootDevice) {
continue; continue;
} }
auto &dependentCsr = event->getCommandQueue()->getGpgpuCommandStreamReceiver(); CommandStreamReceiver *dependentCsr;
auto sameCsr = (&dependentCsr == &currentCsr); if (event->isBcsEvent()) {
bool pushDependency = (CsrDependencies::DependenciesType::OnCsr == depsType && sameCsr) || dependentCsr = event->getCommandQueue()->getBcsCommandStreamReceiver(event->getBcsEngineType());
(CsrDependencies::DependenciesType::OutOfCsr == depsType && !sameCsr) || } else {
(CsrDependencies::DependenciesType::All == depsType); dependentCsr = &event->getCommandQueue()->getGpgpuCommandStreamReceiver();
}
const auto sameCsr = (dependentCsr == &currentCsr);
const auto pushDependency = (CsrDependencies::DependenciesType::OnCsr == depsType && sameCsr) ||
(CsrDependencies::DependenciesType::OutOfCsr == depsType && !sameCsr) ||
(CsrDependencies::DependenciesType::All == depsType);
if (pushDependency) { if (pushDependency) {
csrDeps.timestampPacketContainer.push_back(timestampPacketContainer); csrDeps.timestampPacketContainer.push_back(timestampPacketContainer);
@@ -56,9 +61,9 @@ void EventsRequest::fillCsrDependenciesForTimestampPacketContainer(CsrDependenci
if (!sameCsr) { if (!sameCsr) {
const auto &productHelper = event->getCommandQueue()->getDevice().getProductHelper(); const auto &productHelper = event->getCommandQueue()->getDevice().getProductHelper();
if (productHelper.isDcFlushAllowed()) { if (productHelper.isDcFlushAllowed()) {
if (!dependentCsr.isLatestTaskCountFlushed()) { if (!dependentCsr->isLatestTaskCountFlushed()) {
flushDependentCsr(dependentCsr, csrDeps); flushDependentCsr(*dependentCsr, csrDeps);
currentCsr.makeResident(*dependentCsr.getTagAllocation()); currentCsr.makeResident(*dependentCsr->getTagAllocation());
} }
} }
} }

View File

@@ -227,7 +227,6 @@ CompletionStamp &CommandComputeKernel::submit(TaskCountType taskLevel, bool term
const bool isHandlingBarrier = commandQueue.getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired(); const bool isHandlingBarrier = commandQueue.getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired();
if (timestampPacketDependencies) { if (timestampPacketDependencies) {
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(dispatchFlags.csrDependencies, commandStreamReceiver, CsrDependencies::DependenciesType::OutOfCsr);
if (isHandlingBarrier) { if (isHandlingBarrier) {
commandQueue.fillCsrDependenciesWithLastBcsPackets(dispatchFlags.csrDependencies); commandQueue.fillCsrDependenciesWithLastBcsPackets(dispatchFlags.csrDependencies);
} }
@@ -403,7 +402,6 @@ CompletionStamp &CommandWithoutKernel::submit(TaskCountType taskLevel, bool term
const bool isHandlingBarrier = commandQueue.getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired(); const bool isHandlingBarrier = commandQueue.getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired();
if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) { if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) {
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(dispatchFlags.csrDependencies, commandStreamReceiver, CsrDependencies::DependenciesType::OutOfCsr);
if (isHandlingBarrier) { if (isHandlingBarrier) {
commandQueue.fillCsrDependenciesWithLastBcsPackets(dispatchFlags.csrDependencies); commandQueue.fillCsrDependenciesWithLastBcsPackets(dispatchFlags.csrDependencies);
} }

View File

@@ -458,11 +458,13 @@ HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventW
HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventWhenProgrammingSingleDeviceDependenciesForGpgpuCsrThenNoSemaphoreWaitIsProgrammed) { HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventWhenProgrammingSingleDeviceDependenciesForGpgpuCsrThenNoSemaphoreWaitIsProgrammed) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
DebugManagerStateRestore restorer;
DebugManager.flags.EnableBlitterForEnqueueOperations.set(false);
UserEvent userEvent1(&pCmdQ1->getContext()); UserEvent userEvent1(&pCmdQ1->getContext());
cl_event outputEvent1{}; cl_event outputEvent1{};
cl_event inputEvent1 = &userEvent1; cl_event inputEvent1 = &userEvent1;
pCmdQ1->enqueueMarkerWithWaitList( pCmdQ1->enqueueMarkerWithWaitList(
1, 1,
&inputEvent1, &inputEvent1,

View File

@@ -679,30 +679,32 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenBlock
{ {
auto queueSemaphores = findAll<MI_SEMAPHORE_WAIT *>(hwParserCmdQ.cmdList.begin(), hwParserCmdQ.cmdList.end()); auto queueSemaphores = findAll<MI_SEMAPHORE_WAIT *>(hwParserCmdQ.cmdList.begin(), hwParserCmdQ.cmdList.end());
auto expectedQueueSemaphoresCount = 1u; auto expectedQueueSemaphoresCount = 2u;
if (UnitTestHelper<FamilyType>::isAdditionalMiSemaphoreWaitRequired(pDevice->getRootDeviceEnvironment())) { if (UnitTestHelper<FamilyType>::isAdditionalMiSemaphoreWaitRequired(pDevice->getRootDeviceEnvironment())) {
expectedQueueSemaphoresCount += 1; expectedQueueSemaphoresCount += 1;
} }
EXPECT_EQ(expectedQueueSemaphoresCount, queueSemaphores.size()); EXPECT_EQ(expectedQueueSemaphoresCount, queueSemaphores.size());
ASSERT_GT(queueSemaphores.size(), 0u); ASSERT_GT(queueSemaphores.size(), 0u);
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[0])); {
EXPECT_EQ(semaphoreCmd->getCompareOperation(), MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD); auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[0]));
EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword()); EXPECT_EQ(semaphoreCmd->getCompareOperation(), MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD);
EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword());
auto dataAddress = TimestampPacketHelper::getContextEndGpuAddress(*node0.getNode(0)); auto dataAddress = TimestampPacketHelper::getContextEndGpuAddress(*node0.getNode(0));
EXPECT_EQ(dataAddress, semaphoreCmd->getSemaphoreGraphicsAddress()); EXPECT_EQ(dataAddress, semaphoreCmd->getSemaphoreGraphicsAddress());
}
{
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[1]));
EXPECT_EQ(semaphoreCmd->getCompareOperation(), MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD);
EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword());
auto dataAddress = TimestampPacketHelper::getContextEndGpuAddress(*node1.getNode(0));
EXPECT_EQ(dataAddress, semaphoreCmd->getSemaphoreGraphicsAddress());
}
} }
{ {
auto csrSemaphores = findAll<MI_SEMAPHORE_WAIT *>(hwParserCsr.cmdList.begin(), hwParserCsr.cmdList.end()); auto csrSemaphores = findAll<MI_SEMAPHORE_WAIT *>(hwParserCsr.cmdList.begin(), hwParserCsr.cmdList.end());
EXPECT_EQ(1u, csrSemaphores.size()); EXPECT_EQ(0u, csrSemaphores.size());
ASSERT_GT(csrSemaphores.size(), 0u);
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*(csrSemaphores[0]));
EXPECT_EQ(semaphoreCmd->getCompareOperation(), MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD);
EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword());
auto dataAddress = TimestampPacketHelper::getContextEndGpuAddress(*node1.getNode(0));
EXPECT_EQ(dataAddress, semaphoreCmd->getSemaphoreGraphicsAddress());
} }
EXPECT_TRUE(mockCsr->passedDispatchFlags.blocking); EXPECT_TRUE(mockCsr->passedDispatchFlags.blocking);

View File

@@ -501,7 +501,7 @@ HWTEST_F(TimestampPacketTests, givenEventsRequestWhenEstimatingStreamSizeForDiff
EXPECT_EQ(sizeWithEvents, extendedSize); EXPECT_EQ(sizeWithEvents, extendedSize);
} }
HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingThenProgramSemaphoresOnCsrStream) { HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingThenProgramSemaphoresOnQueueStream) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
auto device2 = std::make_unique<MockClDevice>(Device::create<MockDevice>(executionEnvironment, 0u)); auto device2 = std::make_unique<MockClDevice>(Device::create<MockDevice>(executionEnvironment, 0u));
@@ -536,20 +536,24 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingThe
cl_event waitlist[] = {&event1, &event2, &event3, &event4, &event5, &event6}; cl_event waitlist[] = {&event1, &event2, &event3, &event4, &event5, &event6};
cmdQ1->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, eventsOnWaitlist, waitlist, nullptr); cmdQ1->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, eventsOnWaitlist, waitlist, nullptr);
auto &cmdStream = device->getUltCommandStreamReceiver<FamilyType>().commandStream; auto &cmdStream = *cmdQ1->commandStream;
HardwareParse hwParser; HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream, 0); hwParser.parseCommands<FamilyType>(cmdStream, 0);
auto it = hwParser.cmdList.begin(); auto queueSemaphores = findAll<MI_SEMAPHORE_WAIT *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), timestamp4.getNode(0), 0); auto expectedQueueSemaphoresCount = 5u;
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), timestamp6.getNode(0), 0); if (UnitTestHelper<FamilyType>::isAdditionalMiSemaphoreWaitRequired(device->getRootDeviceEnvironment())) {
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), timestamp6.getNode(1), 0); expectedQueueSemaphoresCount += 1;
while (it != hwParser.cmdList.end()) {
EXPECT_EQ(nullptr, genCmdCast<MI_SEMAPHORE_WAIT *>(*it));
it++;
} }
EXPECT_EQ(expectedQueueSemaphoresCount, queueSemaphores.size());
ASSERT_GE(queueSemaphores.size(), 5u);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[0])), timestamp3.getNode(0), 0);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[1])), timestamp5.getNode(0), 0);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[2])), timestamp4.getNode(0), 0);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[3])), timestamp6.getNode(0), 0);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[4])), timestamp6.getNode(1), 0);
} }
HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingThenTrackOwnershipUntilQueueIsCompleted) { HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingThenTrackOwnershipUntilQueueIsCompleted) {
@@ -955,7 +959,7 @@ HWTEST_F(TimestampPacketTests, givenAllDependencyTypesModeWhenFillingFromDiffere
EXPECT_EQ(static_cast<size_t>(eventsOnWaitlist), csrDependencies.timestampPacketContainer.size()); EXPECT_EQ(static_cast<size_t>(eventsOnWaitlist), csrDependencies.timestampPacketContainer.size());
} }
HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledOnDifferentCSRsFromOneDeviceWhenEnqueueingThenProgramSemaphoresOnCsrStream) { HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledOnDifferentCSRsFromOneDeviceWhenEnqueueingThenProgramSemaphoresOnQueueStream) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true; device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
@@ -989,27 +993,34 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledOnDifferentCSRsFr
cl_event waitlist[] = {&event1, &event2, &event3, &event4, &event5, &event6}; cl_event waitlist[] = {&event1, &event2, &event3, &event4, &event5, &event6};
cmdQ1->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, eventsOnWaitlist, waitlist, nullptr); cmdQ1->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, eventsOnWaitlist, waitlist, nullptr);
auto &cmdStream = device->getUltCommandStreamReceiver<FamilyType>().commandStream; auto &cmdStream = *cmdQ1->commandStream;
HardwareParse hwParser; HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream, 0); hwParser.parseCommands<FamilyType>(cmdStream, 0);
auto it = hwParser.cmdList.begin(); auto queueSemaphores = findAll<MI_SEMAPHORE_WAIT *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), timestamp4.getNode(0), 0); auto expectedQueueSemaphoresCount = 5u;
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), timestamp6.getNode(0), 0); if (UnitTestHelper<FamilyType>::isAdditionalMiSemaphoreWaitRequired(device->getRootDeviceEnvironment())) {
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), timestamp6.getNode(1), 0); expectedQueueSemaphoresCount += 1;
while (it != hwParser.cmdList.end()) {
EXPECT_EQ(nullptr, genCmdCast<MI_SEMAPHORE_WAIT *>(*it));
it++;
} }
EXPECT_EQ(expectedQueueSemaphoresCount, queueSemaphores.size());
ASSERT_GE(queueSemaphores.size(), 5u);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[0])), timestamp3.getNode(0), 0);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[1])), timestamp5.getNode(0), 0);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[2])), timestamp4.getNode(0), 0);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[3])), timestamp6.getNode(0), 0);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[4])), timestamp6.getNode(1), 0);
} }
HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingBlockedThenProgramSemaphoresOnCsrStreamOnFlush) { HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingBlockedThenProgramSemaphoresOnQueueStreamOnFlush) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
auto device2 = std::make_unique<MockClDevice>(Device::create<MockDevice>(executionEnvironment, 0u));
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true; auto mockCsr = new MockCsrHw2<FamilyType>(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield());
device->resetCommandStreamReceiver(mockCsr);
mockCsr->timestampPacketWriteEnabled = true;
mockCsr->storeFlushedTaskStream = true;
auto device2 = std::make_unique<MockClDevice>(Device::create<MockDevice>(executionEnvironment, 0u));
device2->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true; device2->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
auto context2 = new MockContext(device2.get()); auto context2 = new MockContext(device2.get());
@@ -1027,32 +1038,47 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingBlo
cl_event waitlist[] = {&userEvent, &event0, &event1}; cl_event waitlist[] = {&userEvent, &event0, &event1};
cmdQ1->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 3, waitlist, nullptr); cmdQ1->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 3, waitlist, nullptr);
auto &cmdStream = device->getUltCommandStreamReceiver<FamilyType>().commandStream; auto initialCsrStreamOffset = mockCsr->commandStream.getUsed();
EXPECT_EQ(0u, cmdStream.getUsed()); EXPECT_EQ(0u, initialCsrStreamOffset);
userEvent.setStatus(CL_COMPLETE); userEvent.setStatus(CL_COMPLETE);
cmdQ1->isQueueBlocked(); cmdQ1->isQueueBlocked();
cmdQ2->isQueueBlocked(); cmdQ2->isQueueBlocked();
HardwareParse hwParser; HardwareParse hwParserCsr;
hwParser.parseCommands<FamilyType>(cmdStream, 0); HardwareParse hwParserCmdQ;
LinearStream taskStream(mockCsr->storedTaskStream.get(), mockCsr->storedTaskStreamSize);
taskStream.getSpace(mockCsr->storedTaskStreamSize);
hwParserCsr.parseCommands<FamilyType>(mockCsr->commandStream, initialCsrStreamOffset);
hwParserCmdQ.parseCommands<FamilyType>(taskStream, 0);
auto it = hwParser.cmdList.begin(); auto queueSemaphores = findAll<MI_SEMAPHORE_WAIT *>(hwParserCmdQ.cmdList.begin(), hwParserCmdQ.cmdList.end());
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), timestamp1.getNode(0), 0); auto expectedQueueSemaphoresCount = 2u;
if (UnitTestHelper<FamilyType>::isAdditionalMiSemaphoreWaitRequired(device->getRootDeviceEnvironment())) {
while (it != hwParser.cmdList.end()) { expectedQueueSemaphoresCount += 1;
EXPECT_EQ(nullptr, genCmdCast<MI_SEMAPHORE_WAIT *>(*it));
it++;
} }
EXPECT_EQ(expectedQueueSemaphoresCount, queueSemaphores.size());
ASSERT_GE(queueSemaphores.size(), 2u);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[0])), timestamp0.getNode(0), 0);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[1])), timestamp1.getNode(0), 0);
auto csrSemaphores = findAll<MI_SEMAPHORE_WAIT *>(hwParserCsr.cmdList.begin(), hwParserCsr.cmdList.end());
EXPECT_EQ(0u, csrSemaphores.size());
EXPECT_TRUE(mockCsr->passedDispatchFlags.blocking);
EXPECT_TRUE(mockCsr->passedDispatchFlags.guardCommandBufferWithPipeControl);
EXPECT_EQ(device->getPreemptionMode(), mockCsr->passedDispatchFlags.preemptionMode);
cmdQ2->release(); cmdQ2->release();
context2->release(); context2->release();
} }
HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledOnDifferentCSRsFromOneDeviceWhenEnqueueingBlockedThenProgramSemaphoresOnCsrStreamOnFlush) { HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledOnDifferentCSRsFromOneDeviceWhenEnqueueingBlockedThenProgramSemaphoresOnQueueStreamOnFlush) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
auto device2 = std::unique_ptr<MockDevice>(Device::create<MockDevice>(executionEnvironment, 1u));
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true; auto mockCsr = new MockCsrHw2<FamilyType>(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield());
device->resetCommandStreamReceiver(mockCsr);
mockCsr->timestampPacketWriteEnabled = true;
mockCsr->storeFlushedTaskStream = true;
auto cmdQ1 = clUniquePtr(new MockCommandQueueHw<FamilyType>(context, device.get(), nullptr)); auto cmdQ1 = clUniquePtr(new MockCommandQueueHw<FamilyType>(context, device.get(), nullptr));
@@ -1072,20 +1098,30 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledOnDifferentCSRsFr
cl_event waitlist[] = {&userEvent, &event0, &event1}; cl_event waitlist[] = {&userEvent, &event0, &event1};
cmdQ1->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 3, waitlist, nullptr); cmdQ1->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 3, waitlist, nullptr);
auto &cmdStream = device->getUltCommandStreamReceiver<FamilyType>().commandStream;
EXPECT_EQ(0u, cmdStream.getUsed()); auto initialCsrStreamOffset = mockCsr->commandStream.getUsed();
EXPECT_EQ(0u, initialCsrStreamOffset);
userEvent.setStatus(CL_COMPLETE); userEvent.setStatus(CL_COMPLETE);
HardwareParse hwParser; HardwareParse hwParserCsr;
hwParser.parseCommands<FamilyType>(cmdStream, 0); HardwareParse hwParserCmdQ;
LinearStream taskStream(mockCsr->storedTaskStream.get(), mockCsr->storedTaskStreamSize);
taskStream.getSpace(mockCsr->storedTaskStreamSize);
hwParserCsr.parseCommands<FamilyType>(mockCsr->commandStream, initialCsrStreamOffset);
hwParserCmdQ.parseCommands<FamilyType>(taskStream, 0);
auto it = hwParser.cmdList.begin(); auto queueSemaphores = findAll<MI_SEMAPHORE_WAIT *>(hwParserCmdQ.cmdList.begin(), hwParserCmdQ.cmdList.end());
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), timestamp1.getNode(0), 0); auto expectedQueueSemaphoresCount = 2u;
if (UnitTestHelper<FamilyType>::isAdditionalMiSemaphoreWaitRequired(device->getRootDeviceEnvironment())) {
while (it != hwParser.cmdList.end()) { expectedQueueSemaphoresCount += 1;
EXPECT_EQ(nullptr, genCmdCast<MI_SEMAPHORE_WAIT *>(*it));
it++;
} }
EXPECT_EQ(expectedQueueSemaphoresCount, queueSemaphores.size());
ASSERT_GE(queueSemaphores.size(), 2u);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[0])), timestamp0.getNode(0), 0);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[1])), timestamp1.getNode(0), 0);
auto csrSemaphores = findAll<MI_SEMAPHORE_WAIT *>(hwParserCsr.cmdList.begin(), hwParserCsr.cmdList.end());
EXPECT_EQ(0u, csrSemaphores.size());
cmdQ2->isQueueBlocked(); cmdQ2->isQueueBlocked();
cmdQ1->isQueueBlocked(); cmdQ1->isQueueBlocked();

View File

@@ -540,18 +540,17 @@ HWTEST_F(TimestampPacketTests, givenBlockedEnqueueWithoutKernelWhenSubmittingThe
hwParserCmdQ.parseCommands<FamilyType>(taskStream, 0); hwParserCmdQ.parseCommands<FamilyType>(taskStream, 0);
auto queueSemaphores = findAll<MI_SEMAPHORE_WAIT *>(hwParserCmdQ.cmdList.begin(), hwParserCmdQ.cmdList.end()); auto queueSemaphores = findAll<MI_SEMAPHORE_WAIT *>(hwParserCmdQ.cmdList.begin(), hwParserCmdQ.cmdList.end());
auto expectedQueueSemaphoresCount = 1u; auto expectedQueueSemaphoresCount = 2u;
if (UnitTestHelper<FamilyType>::isAdditionalMiSemaphoreWaitRequired(device->getRootDeviceEnvironment())) { if (UnitTestHelper<FamilyType>::isAdditionalMiSemaphoreWaitRequired(device->getRootDeviceEnvironment())) {
expectedQueueSemaphoresCount += 1; expectedQueueSemaphoresCount += 1;
} }
EXPECT_EQ(expectedQueueSemaphoresCount, queueSemaphores.size()); EXPECT_EQ(expectedQueueSemaphoresCount, queueSemaphores.size());
ASSERT_GT(queueSemaphores.size(), 0u); ASSERT_GE(queueSemaphores.size(), 2u);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[0])), node0.getNode(0), 0); verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[0])), node0.getNode(0), 0);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[1])), node1.getNode(0), 0);
auto csrSemaphores = findAll<MI_SEMAPHORE_WAIT *>(hwParserCsr.cmdList.begin(), hwParserCsr.cmdList.end()); auto csrSemaphores = findAll<MI_SEMAPHORE_WAIT *>(hwParserCsr.cmdList.begin(), hwParserCsr.cmdList.end());
EXPECT_EQ(1u, csrSemaphores.size()); EXPECT_EQ(0u, csrSemaphores.size());
ASSERT_GT(csrSemaphores.size(), 0u);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(csrSemaphores[0])), node1.getNode(0), 0);
EXPECT_TRUE(mockCsr->passedDispatchFlags.blocking); EXPECT_TRUE(mockCsr->passedDispatchFlags.blocking);
EXPECT_TRUE(mockCsr->passedDispatchFlags.guardCommandBufferWithPipeControl); EXPECT_TRUE(mockCsr->passedDispatchFlags.guardCommandBufferWithPipeControl);
@@ -592,18 +591,17 @@ HWTEST_F(TimestampPacketTests, givenWaitlistAndOutputEventWhenEnqueueingMarkerWi
hwParserCmdQ.parseCommands<FamilyType>(*cmdQ->commandStream, 0); hwParserCmdQ.parseCommands<FamilyType>(*cmdQ->commandStream, 0);
auto csrSemaphores = findAll<MI_SEMAPHORE_WAIT *>(hwParserCsr.cmdList.begin(), hwParserCsr.cmdList.end()); auto csrSemaphores = findAll<MI_SEMAPHORE_WAIT *>(hwParserCsr.cmdList.begin(), hwParserCsr.cmdList.end());
EXPECT_EQ(1u, csrSemaphores.size()); EXPECT_EQ(0u, csrSemaphores.size());
ASSERT_GT(csrSemaphores.size(), 0u);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(csrSemaphores[0])), node2.getNode(0), 0);
auto queueSemaphores = findAll<MI_SEMAPHORE_WAIT *>(hwParserCmdQ.cmdList.begin(), hwParserCmdQ.cmdList.end()); auto queueSemaphores = findAll<MI_SEMAPHORE_WAIT *>(hwParserCmdQ.cmdList.begin(), hwParserCmdQ.cmdList.end());
auto expectedQueueSemaphoresCount = 1u; auto expectedQueueSemaphoresCount = 2u;
if (UnitTestHelper<FamilyType>::isAdditionalMiSemaphoreWaitRequired(device->getRootDeviceEnvironment())) { if (UnitTestHelper<FamilyType>::isAdditionalMiSemaphoreWaitRequired(device->getRootDeviceEnvironment())) {
expectedQueueSemaphoresCount += 1; expectedQueueSemaphoresCount += 1;
} }
EXPECT_EQ(expectedQueueSemaphoresCount, queueSemaphores.size()); EXPECT_EQ(expectedQueueSemaphoresCount, queueSemaphores.size());
ASSERT_GT(queueSemaphores.size(), 0u); ASSERT_GE(queueSemaphores.size(), 2u);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[0])), node1.getNode(0), 0); verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[0])), node1.getNode(0), 0);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[1])), node2.getNode(0), 0);
} }
HWTEST_F(TimestampPacketTests, givenWaitlistAndOutputEventWhenEnqueueingBarrierWithoutKernelThenInheritTimestampPacketsAndProgramSemaphores) { HWTEST_F(TimestampPacketTests, givenWaitlistAndOutputEventWhenEnqueueingBarrierWithoutKernelThenInheritTimestampPacketsAndProgramSemaphores) {
@@ -637,12 +635,10 @@ HWTEST_F(TimestampPacketTests, givenWaitlistAndOutputEventWhenEnqueueingBarrierW
hwParserCmdQ.parseCommands<FamilyType>(*cmdQ->commandStream, 0); hwParserCmdQ.parseCommands<FamilyType>(*cmdQ->commandStream, 0);
auto csrSemaphores = findAll<MI_SEMAPHORE_WAIT *>(hwParserCsr.cmdList.begin(), hwParserCsr.cmdList.end()); auto csrSemaphores = findAll<MI_SEMAPHORE_WAIT *>(hwParserCsr.cmdList.begin(), hwParserCsr.cmdList.end());
EXPECT_EQ(1u, csrSemaphores.size()); EXPECT_EQ(0u, csrSemaphores.size());
ASSERT_GT(csrSemaphores.size(), 0u);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(csrSemaphores[0])), node2.getNode(0), 0);
auto queueSemaphores = findAll<MI_SEMAPHORE_WAIT *>(hwParserCmdQ.cmdList.begin(), hwParserCmdQ.cmdList.end()); auto queueSemaphores = findAll<MI_SEMAPHORE_WAIT *>(hwParserCmdQ.cmdList.begin(), hwParserCmdQ.cmdList.end());
auto expectedQueueSemaphoresCount = 1u; auto expectedQueueSemaphoresCount = 2u;
if (UnitTestHelper<FamilyType>::isAdditionalMiSemaphoreWaitRequired(device->getRootDeviceEnvironment())) { if (UnitTestHelper<FamilyType>::isAdditionalMiSemaphoreWaitRequired(device->getRootDeviceEnvironment())) {
expectedQueueSemaphoresCount += 1; expectedQueueSemaphoresCount += 1;
} }

View File

@@ -13,6 +13,7 @@
#include "shared/test/common/test_macros/header/per_product_test_definitions.h" #include "shared/test/common/test_macros/header/per_product_test_definitions.h"
#include "shared/test/common/test_macros/test.h" #include "shared/test/common/test_macros/test.h"
#include "opencl/source/command_queue/enqueue_common.h"
#include "opencl/source/event/event_builder.h" #include "opencl/source/event/event_builder.h"
#include "opencl/source/helpers/task_information.h" #include "opencl/source/helpers/task_information.h"
#include "opencl/test/unit_test/command_queue/command_queue_fixture.h" #include "opencl/test/unit_test/command_queue/command_queue_fixture.h"