feat(ocl): move dependencies programming

Move out of csr dependencies programming to queue stream.
Correct method fillCsrDependenciesForTimestampPacketContainer.

Related-To: NEO-7321

Signed-off-by: Dominik Dabek <dominik.dabek@intel.com>
This commit is contained in:
Dominik Dabek
2023-02-17 15:36:35 +00:00
committed by Compute-Runtime-Automation
parent 2744501176
commit 22d7c2ea28
10 changed files with 142 additions and 93 deletions

View File

@@ -187,11 +187,12 @@ cl_int CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
if (computeCommandStreamReceiver.peekTimestampPacketWriteEnabled()) {
canUsePipeControlInsteadOfSemaphoresForOnCsrDependencies = this->peekLatestSentEnqueueOperation() == EnqueueProperties::Operation::GpuKernel &&
productHelper.isResolveDependenciesByPipeControlsSupported(hwInfo, this->isOOQEnabled());
if (false == clearDependenciesForSubCapture &&
false == canUsePipeControlInsteadOfSemaphoresForOnCsrDependencies) {
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, computeCommandStreamReceiver, CsrDependencies::DependenciesType::OnCsr);
if (false == clearDependenciesForSubCapture) {
if (false == canUsePipeControlInsteadOfSemaphoresForOnCsrDependencies) {
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, computeCommandStreamReceiver, CsrDependencies::DependenciesType::OnCsr);
}
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(csrDeps, computeCommandStreamReceiver, CsrDependencies::DependenciesType::OutOfCsr);
}
auto allocator = computeCommandStreamReceiver.getTimestampPacketAllocator();
size_t nodesCount = 0u;
@@ -859,7 +860,6 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
const bool isHandlingBarrier = getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired();
if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled() && !clearDependenciesForSubCapture) {
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(dispatchFlags.csrDependencies, getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OutOfCsr);
if (isHandlingBarrier) {
fillCsrDependenciesWithLastBcsPackets(dispatchFlags.csrDependencies);
}
@@ -1107,7 +1107,6 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
const bool isHandlingBarrier = getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired();
if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(dispatchFlags.csrDependencies, getGpgpuCommandStreamReceiver(), CsrDependencies::DependenciesType::OutOfCsr);
if (isHandlingBarrier) {
fillCsrDependenciesWithLastBcsPackets(dispatchFlags.csrDependencies);
}

View File

@@ -238,6 +238,14 @@ TaskCountType Event::peekBcsTaskCountFromCommandQueue() {
}
}
bool Event::isBcsEvent() const {
return bcsState.isValid() && bcsState.taskCount > 0;
}
aub_stream::EngineType Event::getBcsEngineType() const {
return bcsState.engineType;
}
TaskCountType Event::getCompletionStamp() const {
return this->taskCount;
}

View File

@@ -92,6 +92,8 @@ class Event : public BaseObject<_cl_event>, public IDNode<Event> {
void setupBcs(aub_stream::EngineType bcsEngineType);
TaskCountType peekBcsTaskCountFromCommandQueue();
bool isBcsEvent() const;
aub_stream::EngineType getBcsEngineType() const;
TaskCountType getCompletionStamp() const;
void updateCompletionStamp(TaskCountType taskCount, TaskCountType bcsTaskCount, TaskCountType tasklevel, FlushStamp flushStamp);

View File

@@ -39,16 +39,21 @@ void EventsRequest::fillCsrDependenciesForTimestampPacketContainer(CsrDependenci
continue;
}
auto sameRootDevice = event->getCommandQueue()->getClDevice().getRootDeviceIndex() == currentCsr.getRootDeviceIndex();
const auto sameRootDevice = event->getCommandQueue()->getClDevice().getRootDeviceIndex() == currentCsr.getRootDeviceIndex();
if (!sameRootDevice) {
continue;
}
auto &dependentCsr = event->getCommandQueue()->getGpgpuCommandStreamReceiver();
auto sameCsr = (&dependentCsr == &currentCsr);
bool pushDependency = (CsrDependencies::DependenciesType::OnCsr == depsType && sameCsr) ||
(CsrDependencies::DependenciesType::OutOfCsr == depsType && !sameCsr) ||
(CsrDependencies::DependenciesType::All == depsType);
CommandStreamReceiver *dependentCsr;
if (event->isBcsEvent()) {
dependentCsr = event->getCommandQueue()->getBcsCommandStreamReceiver(event->getBcsEngineType());
} else {
dependentCsr = &event->getCommandQueue()->getGpgpuCommandStreamReceiver();
}
const auto sameCsr = (dependentCsr == &currentCsr);
const auto pushDependency = (CsrDependencies::DependenciesType::OnCsr == depsType && sameCsr) ||
(CsrDependencies::DependenciesType::OutOfCsr == depsType && !sameCsr) ||
(CsrDependencies::DependenciesType::All == depsType);
if (pushDependency) {
csrDeps.timestampPacketContainer.push_back(timestampPacketContainer);
@@ -56,9 +61,9 @@ void EventsRequest::fillCsrDependenciesForTimestampPacketContainer(CsrDependenci
if (!sameCsr) {
const auto &productHelper = event->getCommandQueue()->getDevice().getProductHelper();
if (productHelper.isDcFlushAllowed()) {
if (!dependentCsr.isLatestTaskCountFlushed()) {
flushDependentCsr(dependentCsr, csrDeps);
currentCsr.makeResident(*dependentCsr.getTagAllocation());
if (!dependentCsr->isLatestTaskCountFlushed()) {
flushDependentCsr(*dependentCsr, csrDeps);
currentCsr.makeResident(*dependentCsr->getTagAllocation());
}
}
}

View File

@@ -227,7 +227,6 @@ CompletionStamp &CommandComputeKernel::submit(TaskCountType taskLevel, bool term
const bool isHandlingBarrier = commandQueue.getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired();
if (timestampPacketDependencies) {
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(dispatchFlags.csrDependencies, commandStreamReceiver, CsrDependencies::DependenciesType::OutOfCsr);
if (isHandlingBarrier) {
commandQueue.fillCsrDependenciesWithLastBcsPackets(dispatchFlags.csrDependencies);
}
@@ -403,7 +402,6 @@ CompletionStamp &CommandWithoutKernel::submit(TaskCountType taskLevel, bool term
const bool isHandlingBarrier = commandQueue.getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired();
if (commandStreamReceiver.peekTimestampPacketWriteEnabled()) {
eventsRequest.fillCsrDependenciesForTimestampPacketContainer(dispatchFlags.csrDependencies, commandStreamReceiver, CsrDependencies::DependenciesType::OutOfCsr);
if (isHandlingBarrier) {
commandQueue.fillCsrDependenciesWithLastBcsPackets(dispatchFlags.csrDependencies);
}

View File

@@ -458,11 +458,13 @@ HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventW
HWTEST_F(CrossDeviceDependenciesTests, givenWaitListWithEventBlockedByUserEventWhenProgrammingSingleDeviceDependenciesForGpgpuCsrThenNoSemaphoreWaitIsProgrammed) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
DebugManagerStateRestore restorer;
DebugManager.flags.EnableBlitterForEnqueueOperations.set(false);
UserEvent userEvent1(&pCmdQ1->getContext());
cl_event outputEvent1{};
cl_event inputEvent1 = &userEvent1;
pCmdQ1->enqueueMarkerWithWaitList(
1,
&inputEvent1,

View File

@@ -679,30 +679,32 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenBlock
{
auto queueSemaphores = findAll<MI_SEMAPHORE_WAIT *>(hwParserCmdQ.cmdList.begin(), hwParserCmdQ.cmdList.end());
auto expectedQueueSemaphoresCount = 1u;
auto expectedQueueSemaphoresCount = 2u;
if (UnitTestHelper<FamilyType>::isAdditionalMiSemaphoreWaitRequired(pDevice->getRootDeviceEnvironment())) {
expectedQueueSemaphoresCount += 1;
}
EXPECT_EQ(expectedQueueSemaphoresCount, queueSemaphores.size());
ASSERT_GT(queueSemaphores.size(), 0u);
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[0]));
EXPECT_EQ(semaphoreCmd->getCompareOperation(), MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD);
EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword());
{
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[0]));
EXPECT_EQ(semaphoreCmd->getCompareOperation(), MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD);
EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword());
auto dataAddress = TimestampPacketHelper::getContextEndGpuAddress(*node0.getNode(0));
EXPECT_EQ(dataAddress, semaphoreCmd->getSemaphoreGraphicsAddress());
auto dataAddress = TimestampPacketHelper::getContextEndGpuAddress(*node0.getNode(0));
EXPECT_EQ(dataAddress, semaphoreCmd->getSemaphoreGraphicsAddress());
}
{
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[1]));
EXPECT_EQ(semaphoreCmd->getCompareOperation(), MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD);
EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword());
auto dataAddress = TimestampPacketHelper::getContextEndGpuAddress(*node1.getNode(0));
EXPECT_EQ(dataAddress, semaphoreCmd->getSemaphoreGraphicsAddress());
}
}
{
auto csrSemaphores = findAll<MI_SEMAPHORE_WAIT *>(hwParserCsr.cmdList.begin(), hwParserCsr.cmdList.end());
EXPECT_EQ(1u, csrSemaphores.size());
ASSERT_GT(csrSemaphores.size(), 0u);
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*(csrSemaphores[0]));
EXPECT_EQ(semaphoreCmd->getCompareOperation(), MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD);
EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword());
auto dataAddress = TimestampPacketHelper::getContextEndGpuAddress(*node1.getNode(0));
EXPECT_EQ(dataAddress, semaphoreCmd->getSemaphoreGraphicsAddress());
EXPECT_EQ(0u, csrSemaphores.size());
}
EXPECT_TRUE(mockCsr->passedDispatchFlags.blocking);

View File

@@ -501,7 +501,7 @@ HWTEST_F(TimestampPacketTests, givenEventsRequestWhenEstimatingStreamSizeForDiff
EXPECT_EQ(sizeWithEvents, extendedSize);
}
HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingThenProgramSemaphoresOnCsrStream) {
HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingThenProgramSemaphoresOnQueueStream) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
auto device2 = std::make_unique<MockClDevice>(Device::create<MockDevice>(executionEnvironment, 0u));
@@ -536,20 +536,24 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingThe
cl_event waitlist[] = {&event1, &event2, &event3, &event4, &event5, &event6};
cmdQ1->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, eventsOnWaitlist, waitlist, nullptr);
auto &cmdStream = device->getUltCommandStreamReceiver<FamilyType>().commandStream;
auto &cmdStream = *cmdQ1->commandStream;
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream, 0);
auto it = hwParser.cmdList.begin();
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), timestamp4.getNode(0), 0);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), timestamp6.getNode(0), 0);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), timestamp6.getNode(1), 0);
while (it != hwParser.cmdList.end()) {
EXPECT_EQ(nullptr, genCmdCast<MI_SEMAPHORE_WAIT *>(*it));
it++;
auto queueSemaphores = findAll<MI_SEMAPHORE_WAIT *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
auto expectedQueueSemaphoresCount = 5u;
if (UnitTestHelper<FamilyType>::isAdditionalMiSemaphoreWaitRequired(device->getRootDeviceEnvironment())) {
expectedQueueSemaphoresCount += 1;
}
EXPECT_EQ(expectedQueueSemaphoresCount, queueSemaphores.size());
ASSERT_GE(queueSemaphores.size(), 5u);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[0])), timestamp3.getNode(0), 0);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[1])), timestamp5.getNode(0), 0);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[2])), timestamp4.getNode(0), 0);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[3])), timestamp6.getNode(0), 0);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[4])), timestamp6.getNode(1), 0);
}
HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingThenTrackOwnershipUntilQueueIsCompleted) {
@@ -955,7 +959,7 @@ HWTEST_F(TimestampPacketTests, givenAllDependencyTypesModeWhenFillingFromDiffere
EXPECT_EQ(static_cast<size_t>(eventsOnWaitlist), csrDependencies.timestampPacketContainer.size());
}
HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledOnDifferentCSRsFromOneDeviceWhenEnqueueingThenProgramSemaphoresOnCsrStream) {
HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledOnDifferentCSRsFromOneDeviceWhenEnqueueingThenProgramSemaphoresOnQueueStream) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
@@ -989,27 +993,34 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledOnDifferentCSRsFr
cl_event waitlist[] = {&event1, &event2, &event3, &event4, &event5, &event6};
cmdQ1->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, eventsOnWaitlist, waitlist, nullptr);
auto &cmdStream = device->getUltCommandStreamReceiver<FamilyType>().commandStream;
auto &cmdStream = *cmdQ1->commandStream;
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream, 0);
auto it = hwParser.cmdList.begin();
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), timestamp4.getNode(0), 0);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), timestamp6.getNode(0), 0);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), timestamp6.getNode(1), 0);
while (it != hwParser.cmdList.end()) {
EXPECT_EQ(nullptr, genCmdCast<MI_SEMAPHORE_WAIT *>(*it));
it++;
auto queueSemaphores = findAll<MI_SEMAPHORE_WAIT *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
auto expectedQueueSemaphoresCount = 5u;
if (UnitTestHelper<FamilyType>::isAdditionalMiSemaphoreWaitRequired(device->getRootDeviceEnvironment())) {
expectedQueueSemaphoresCount += 1;
}
EXPECT_EQ(expectedQueueSemaphoresCount, queueSemaphores.size());
ASSERT_GE(queueSemaphores.size(), 5u);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[0])), timestamp3.getNode(0), 0);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[1])), timestamp5.getNode(0), 0);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[2])), timestamp4.getNode(0), 0);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[3])), timestamp6.getNode(0), 0);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[4])), timestamp6.getNode(1), 0);
}
HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingBlockedThenProgramSemaphoresOnCsrStreamOnFlush) {
HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingBlockedThenProgramSemaphoresOnQueueStreamOnFlush) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
auto device2 = std::make_unique<MockClDevice>(Device::create<MockDevice>(executionEnvironment, 0u));
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
auto mockCsr = new MockCsrHw2<FamilyType>(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield());
device->resetCommandStreamReceiver(mockCsr);
mockCsr->timestampPacketWriteEnabled = true;
mockCsr->storeFlushedTaskStream = true;
auto device2 = std::make_unique<MockClDevice>(Device::create<MockDevice>(executionEnvironment, 0u));
device2->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
auto context2 = new MockContext(device2.get());
@@ -1027,32 +1038,47 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingBlo
cl_event waitlist[] = {&userEvent, &event0, &event1};
cmdQ1->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 3, waitlist, nullptr);
auto &cmdStream = device->getUltCommandStreamReceiver<FamilyType>().commandStream;
EXPECT_EQ(0u, cmdStream.getUsed());
auto initialCsrStreamOffset = mockCsr->commandStream.getUsed();
EXPECT_EQ(0u, initialCsrStreamOffset);
userEvent.setStatus(CL_COMPLETE);
cmdQ1->isQueueBlocked();
cmdQ2->isQueueBlocked();
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream, 0);
HardwareParse hwParserCsr;
HardwareParse hwParserCmdQ;
LinearStream taskStream(mockCsr->storedTaskStream.get(), mockCsr->storedTaskStreamSize);
taskStream.getSpace(mockCsr->storedTaskStreamSize);
hwParserCsr.parseCommands<FamilyType>(mockCsr->commandStream, initialCsrStreamOffset);
hwParserCmdQ.parseCommands<FamilyType>(taskStream, 0);
auto it = hwParser.cmdList.begin();
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), timestamp1.getNode(0), 0);
while (it != hwParser.cmdList.end()) {
EXPECT_EQ(nullptr, genCmdCast<MI_SEMAPHORE_WAIT *>(*it));
it++;
auto queueSemaphores = findAll<MI_SEMAPHORE_WAIT *>(hwParserCmdQ.cmdList.begin(), hwParserCmdQ.cmdList.end());
auto expectedQueueSemaphoresCount = 2u;
if (UnitTestHelper<FamilyType>::isAdditionalMiSemaphoreWaitRequired(device->getRootDeviceEnvironment())) {
expectedQueueSemaphoresCount += 1;
}
EXPECT_EQ(expectedQueueSemaphoresCount, queueSemaphores.size());
ASSERT_GE(queueSemaphores.size(), 2u);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[0])), timestamp0.getNode(0), 0);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[1])), timestamp1.getNode(0), 0);
auto csrSemaphores = findAll<MI_SEMAPHORE_WAIT *>(hwParserCsr.cmdList.begin(), hwParserCsr.cmdList.end());
EXPECT_EQ(0u, csrSemaphores.size());
EXPECT_TRUE(mockCsr->passedDispatchFlags.blocking);
EXPECT_TRUE(mockCsr->passedDispatchFlags.guardCommandBufferWithPipeControl);
EXPECT_EQ(device->getPreemptionMode(), mockCsr->passedDispatchFlags.preemptionMode);
cmdQ2->release();
context2->release();
}
HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledOnDifferentCSRsFromOneDeviceWhenEnqueueingBlockedThenProgramSemaphoresOnCsrStreamOnFlush) {
HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledOnDifferentCSRsFromOneDeviceWhenEnqueueingBlockedThenProgramSemaphoresOnQueueStreamOnFlush) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
auto device2 = std::unique_ptr<MockDevice>(Device::create<MockDevice>(executionEnvironment, 1u));
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
auto mockCsr = new MockCsrHw2<FamilyType>(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield());
device->resetCommandStreamReceiver(mockCsr);
mockCsr->timestampPacketWriteEnabled = true;
mockCsr->storeFlushedTaskStream = true;
auto cmdQ1 = clUniquePtr(new MockCommandQueueHw<FamilyType>(context, device.get(), nullptr));
@@ -1072,20 +1098,30 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledOnDifferentCSRsFr
cl_event waitlist[] = {&userEvent, &event0, &event1};
cmdQ1->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 3, waitlist, nullptr);
auto &cmdStream = device->getUltCommandStreamReceiver<FamilyType>().commandStream;
EXPECT_EQ(0u, cmdStream.getUsed());
auto initialCsrStreamOffset = mockCsr->commandStream.getUsed();
EXPECT_EQ(0u, initialCsrStreamOffset);
userEvent.setStatus(CL_COMPLETE);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream, 0);
HardwareParse hwParserCsr;
HardwareParse hwParserCmdQ;
LinearStream taskStream(mockCsr->storedTaskStream.get(), mockCsr->storedTaskStreamSize);
taskStream.getSpace(mockCsr->storedTaskStreamSize);
hwParserCsr.parseCommands<FamilyType>(mockCsr->commandStream, initialCsrStreamOffset);
hwParserCmdQ.parseCommands<FamilyType>(taskStream, 0);
auto it = hwParser.cmdList.begin();
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), timestamp1.getNode(0), 0);
while (it != hwParser.cmdList.end()) {
EXPECT_EQ(nullptr, genCmdCast<MI_SEMAPHORE_WAIT *>(*it));
it++;
auto queueSemaphores = findAll<MI_SEMAPHORE_WAIT *>(hwParserCmdQ.cmdList.begin(), hwParserCmdQ.cmdList.end());
auto expectedQueueSemaphoresCount = 2u;
if (UnitTestHelper<FamilyType>::isAdditionalMiSemaphoreWaitRequired(device->getRootDeviceEnvironment())) {
expectedQueueSemaphoresCount += 1;
}
EXPECT_EQ(expectedQueueSemaphoresCount, queueSemaphores.size());
ASSERT_GE(queueSemaphores.size(), 2u);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[0])), timestamp0.getNode(0), 0);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[1])), timestamp1.getNode(0), 0);
auto csrSemaphores = findAll<MI_SEMAPHORE_WAIT *>(hwParserCsr.cmdList.begin(), hwParserCsr.cmdList.end());
EXPECT_EQ(0u, csrSemaphores.size());
cmdQ2->isQueueBlocked();
cmdQ1->isQueueBlocked();

View File

@@ -540,18 +540,17 @@ HWTEST_F(TimestampPacketTests, givenBlockedEnqueueWithoutKernelWhenSubmittingThe
hwParserCmdQ.parseCommands<FamilyType>(taskStream, 0);
auto queueSemaphores = findAll<MI_SEMAPHORE_WAIT *>(hwParserCmdQ.cmdList.begin(), hwParserCmdQ.cmdList.end());
auto expectedQueueSemaphoresCount = 1u;
auto expectedQueueSemaphoresCount = 2u;
if (UnitTestHelper<FamilyType>::isAdditionalMiSemaphoreWaitRequired(device->getRootDeviceEnvironment())) {
expectedQueueSemaphoresCount += 1;
}
EXPECT_EQ(expectedQueueSemaphoresCount, queueSemaphores.size());
ASSERT_GT(queueSemaphores.size(), 0u);
ASSERT_GE(queueSemaphores.size(), 2u);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[0])), node0.getNode(0), 0);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[1])), node1.getNode(0), 0);
auto csrSemaphores = findAll<MI_SEMAPHORE_WAIT *>(hwParserCsr.cmdList.begin(), hwParserCsr.cmdList.end());
EXPECT_EQ(1u, csrSemaphores.size());
ASSERT_GT(csrSemaphores.size(), 0u);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(csrSemaphores[0])), node1.getNode(0), 0);
EXPECT_EQ(0u, csrSemaphores.size());
EXPECT_TRUE(mockCsr->passedDispatchFlags.blocking);
EXPECT_TRUE(mockCsr->passedDispatchFlags.guardCommandBufferWithPipeControl);
@@ -592,18 +591,17 @@ HWTEST_F(TimestampPacketTests, givenWaitlistAndOutputEventWhenEnqueueingMarkerWi
hwParserCmdQ.parseCommands<FamilyType>(*cmdQ->commandStream, 0);
auto csrSemaphores = findAll<MI_SEMAPHORE_WAIT *>(hwParserCsr.cmdList.begin(), hwParserCsr.cmdList.end());
EXPECT_EQ(1u, csrSemaphores.size());
ASSERT_GT(csrSemaphores.size(), 0u);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(csrSemaphores[0])), node2.getNode(0), 0);
EXPECT_EQ(0u, csrSemaphores.size());
auto queueSemaphores = findAll<MI_SEMAPHORE_WAIT *>(hwParserCmdQ.cmdList.begin(), hwParserCmdQ.cmdList.end());
auto expectedQueueSemaphoresCount = 1u;
auto expectedQueueSemaphoresCount = 2u;
if (UnitTestHelper<FamilyType>::isAdditionalMiSemaphoreWaitRequired(device->getRootDeviceEnvironment())) {
expectedQueueSemaphoresCount += 1;
}
EXPECT_EQ(expectedQueueSemaphoresCount, queueSemaphores.size());
ASSERT_GT(queueSemaphores.size(), 0u);
ASSERT_GE(queueSemaphores.size(), 2u);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[0])), node1.getNode(0), 0);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(queueSemaphores[1])), node2.getNode(0), 0);
}
HWTEST_F(TimestampPacketTests, givenWaitlistAndOutputEventWhenEnqueueingBarrierWithoutKernelThenInheritTimestampPacketsAndProgramSemaphores) {
@@ -637,12 +635,10 @@ HWTEST_F(TimestampPacketTests, givenWaitlistAndOutputEventWhenEnqueueingBarrierW
hwParserCmdQ.parseCommands<FamilyType>(*cmdQ->commandStream, 0);
auto csrSemaphores = findAll<MI_SEMAPHORE_WAIT *>(hwParserCsr.cmdList.begin(), hwParserCsr.cmdList.end());
EXPECT_EQ(1u, csrSemaphores.size());
ASSERT_GT(csrSemaphores.size(), 0u);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*(csrSemaphores[0])), node2.getNode(0), 0);
EXPECT_EQ(0u, csrSemaphores.size());
auto queueSemaphores = findAll<MI_SEMAPHORE_WAIT *>(hwParserCmdQ.cmdList.begin(), hwParserCmdQ.cmdList.end());
auto expectedQueueSemaphoresCount = 1u;
auto expectedQueueSemaphoresCount = 2u;
if (UnitTestHelper<FamilyType>::isAdditionalMiSemaphoreWaitRequired(device->getRootDeviceEnvironment())) {
expectedQueueSemaphoresCount += 1;
}

View File

@@ -13,6 +13,7 @@
#include "shared/test/common/test_macros/header/per_product_test_definitions.h"
#include "shared/test/common/test_macros/test.h"
#include "opencl/source/command_queue/enqueue_common.h"
#include "opencl/source/event/event_builder.h"
#include "opencl/source/helpers/task_information.h"
#include "opencl/test/unit_test/command_queue/command_queue_fixture.h"