Remove TSP atomic dependency tracking

This reverts commit d17668f023bee409ee68e766bbc7d5f16ce8d52b.

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski
2021-06-23 10:34:31 +00:00
committed by Compute-Runtime-Automation
parent 13b9f54c7d
commit e1f42c2ae1
24 changed files with 58 additions and 459 deletions

View File

@ -203,7 +203,7 @@ HWTEST_F(BcsTests, WhenGetNumberOfBlitsIsCalledThenCorrectValuesAreReturned) {
}
}
HWTEST_F(BcsTests, givenCsrDependenciesWhenProgrammingCommandStreamThenAddSemaphoreAndAtomic) {
HWTEST_F(BcsTests, givenCsrDependenciesWhenProgrammingCommandStreamThenAddSemaphore) {
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
cl_int retVal = CL_SUCCESS;
@ -243,12 +243,9 @@ HWTEST_F(BcsTests, givenCsrDependenciesWhenProgrammingCommandStreamThenAddSemaph
}
dependenciesFound = true;
EXPECT_FALSE(xyCopyBltCmdFound);
auto miAtomic = genCmdCast<typename FamilyType::MI_ATOMIC *>(*(++cmdIterator));
EXPECT_NE(nullptr, miAtomic);
for (uint32_t i = 1; i < numberOfDependencyContainers * numberNodesPerContainer; i++) {
EXPECT_NE(nullptr, genCmdCast<typename FamilyType::MI_SEMAPHORE_WAIT *>(*(++cmdIterator)));
EXPECT_NE(nullptr, genCmdCast<typename FamilyType::MI_ATOMIC *>(*(++cmdIterator)));
}
}
}

View File

@ -83,9 +83,9 @@ HWTEST_F(CommandStreamReceiverTest, WhenCreatingCsrThenTimestampTypeIs32b) {
auto allocator = csr.getTimestampPacketAllocator();
auto tag = allocator->getTag();
auto expectedOffset = sizeof(typename FamilyType::TimestampPacketType) * 4 * static_cast<size_t>(TimestampPacketSizeControl::preferredPacketCount);
auto expectedOffset = sizeof(typename FamilyType::TimestampPacketType);
EXPECT_EQ(expectedOffset, tag->getImplicitGpuDependenciesCountOffset());
EXPECT_EQ(expectedOffset, tag->getGlobalStartOffset());
}
HWTEST_F(CommandStreamReceiverTest, WhenCreatingCsrThenFlagsAreSetCorrectly) {

View File

@ -297,8 +297,6 @@ struct CommandStreamReceiverTagTests : public ::testing::Test {
tag->assignDataToAllTimestamps(i, zeros);
}
EXPECT_TRUE(tag->isCompleted());
bool canBeReleased = tag->canBeReleased();
allocator->returnTag(tag);

View File

@ -1129,8 +1129,6 @@ HWTEST_F(EventTest, WhenGettingHwTimeStampsThenValidPointerIsReturned) {
ASSERT_EQ(0ULL, timeStamps->GlobalCompleteTS);
ASSERT_EQ(0ULL, timeStamps->ContextCompleteTS);
EXPECT_TRUE(event->getHwTimeStampNode()->isCompleted());
HwTimeStamps *timeStamps2 = static_cast<TagNode<HwTimeStamps> *>(event->getHwTimeStampNode())->tagForCpuAccess;
ASSERT_EQ(timeStamps, timeStamps2);
}

View File

@ -27,9 +27,8 @@
using namespace NEO;
HWTEST_F(TimestampPacketTests, givenTagNodeWhenSemaphoreAndAtomicAreProgrammedThenUseGpuAddress) {
HWTEST_F(TimestampPacketTests, givenTagNodeWhenSemaphoreIsProgrammedThenUseGpuAddress) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
TimestampPackets<uint32_t> tag;
MockTagNode mockNode;
@ -37,58 +36,16 @@ HWTEST_F(TimestampPacketTests, givenTagNodeWhenSemaphoreAndAtomicAreProgrammedTh
mockNode.gpuAddress = 0x1230000;
auto &cmdStream = mockCmdQ->getCS(0);
TimestampPacketHelper::programSemaphoreWithImplicitDependency<FamilyType>(cmdStream, mockNode, 1);
TimestampPacketHelper::programSemaphore<FamilyType>(cmdStream, mockNode);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream, 0);
auto it = hwParser.cmdList.begin();
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), &mockNode, 0);
verifyMiAtomic<FamilyType>(genCmdCast<MI_ATOMIC *>(*it++), &mockNode);
}
HWTEST_F(TimestampPacketTests, givenDebugModeWhereAtomicsAreNotEmittedWhenCommandIsParsedThenNoAtomicOperation) {
DebugManagerStateRestore restorer;
DebugManager.flags.DisableAtomicForPostSyncs.set(true);
HWTEST_F(TimestampPacketTests, givenTagNodeWithPacketsUsed2WhenSemaphoreIsProgrammedThenUseGpuAddress) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
MockTimestampPacketStorage tag;
MockTagNode mockNode;
mockNode.tagForCpuAccess = &tag;
mockNode.gpuAddress = 0x1230000;
auto &cmdStream = mockCmdQ->getCS(0);
TimestampPacketHelper::programSemaphoreWithImplicitDependency<FamilyType>(cmdStream, mockNode, 1);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream, 0);
auto it = hwParser.cmdList.begin();
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), &mockNode, 0);
EXPECT_EQ(it, hwParser.cmdList.end());
EXPECT_EQ(0u, mockNode.getImplicitCpuDependenciesCount());
tag.packets[0].contextEnd = 0u;
tag.packets[0].globalEnd = 0u;
EXPECT_FALSE(tag.isCompleted());
}
HWTEST_F(TimestampPacketTests, givenMultipleDeviesWhenIncrementingCpuDependenciesThenIncrementMultipleTimes) {
TimestampPackets<uint32_t> tag;
MockTagNode mockNode;
mockNode.tagForCpuAccess = &tag;
mockNode.gpuAddress = 0x1230000;
auto &cmdStream = mockCmdQ->getCS(0);
const uint32_t numDevices = 3;
TimestampPacketHelper::programSemaphoreWithImplicitDependency<FamilyType>(cmdStream, mockNode, numDevices);
EXPECT_EQ(numDevices, mockNode.getImplicitCpuDependenciesCount());
}
HWTEST_F(TimestampPacketTests, givenTagNodeWithPacketsUsed2WhenSemaphoreAndAtomicAreProgrammedThenUseGpuAddress) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
TimestampPackets<uint32_t> tag;
tag.setPacketsUsed(2);
@ -97,7 +54,7 @@ HWTEST_F(TimestampPacketTests, givenTagNodeWithPacketsUsed2WhenSemaphoreAndAtomi
mockNode.gpuAddress = 0x1230000;
auto &cmdStream = mockCmdQ->getCS(0);
TimestampPacketHelper::programSemaphoreWithImplicitDependency<FamilyType>(cmdStream, mockNode, 1);
TimestampPacketHelper::programSemaphore<FamilyType>(cmdStream, mockNode);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream, 0);
@ -105,7 +62,6 @@ HWTEST_F(TimestampPacketTests, givenTagNodeWithPacketsUsed2WhenSemaphoreAndAtomi
for (uint32_t packetId = 0; packetId < tag.getPacketsUsed(); packetId++) {
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), &mockNode, packetId);
}
verifyMiAtomic<FamilyType>(genCmdCast<MI_ATOMIC *>(*it++), &mockNode);
}
TEST_F(TimestampPacketTests, givenTagNodeWhatAskingForGpuAddressesThenReturnCorrectValue) {
@ -116,31 +72,6 @@ TEST_F(TimestampPacketTests, givenTagNodeWhatAskingForGpuAddressesThenReturnCorr
auto expectedEndAddress = mockNode.getGpuAddress() + (2 * sizeof(uint32_t));
EXPECT_EQ(expectedEndAddress, TimestampPacketHelper::getContextEndGpuAddress(mockNode));
auto expectedCounterAddress = mockNode.getGpuAddress() + (TimestampPacketSizeControl::preferredPacketCount * 4 * sizeof(uint32_t));
EXPECT_EQ(expectedCounterAddress, TimestampPacketHelper::getGpuDependenciesCountGpuAddress(mockNode));
}
TEST_F(TimestampPacketSimpleTests, whenContextEndTagIsNotOneThenMarkAsCompleted) {
MockTimestampPacketStorage timestampPacketStorage;
auto &packet = timestampPacketStorage.packets[0];
timestampPacketStorage.initialize();
packet.contextEnd = 1;
packet.globalEnd = 1;
EXPECT_FALSE(timestampPacketStorage.isCompleted());
packet.contextEnd = 1;
packet.globalEnd = 0;
EXPECT_FALSE(timestampPacketStorage.isCompleted());
packet.contextEnd = 0;
packet.globalEnd = 1;
EXPECT_TRUE(timestampPacketStorage.isCompleted());
packet.contextEnd = 0;
packet.globalEnd = 0;
EXPECT_TRUE(timestampPacketStorage.isCompleted());
}
TEST_F(TimestampPacketSimpleTests, givenTimestampPacketContainerWhenMovedThenMoveAllNodes) {
@ -178,38 +109,6 @@ TEST_F(TimestampPacketSimpleTests, givenTimestampPacketContainerWhenMovedThenMov
EXPECT_EQ(1u, node1.returnCalls);
}
TEST_F(TimestampPacketSimpleTests, whenIsCompletedIsCalledThenItReturnsProperTimestampPacketStatus) {
MockTimestampPacketStorage timestampPacketStorage;
auto &packet = timestampPacketStorage.packets[0];
timestampPacketStorage.initialize();
EXPECT_FALSE(timestampPacketStorage.isCompleted());
packet.contextEnd = 0;
EXPECT_TRUE(timestampPacketStorage.isCompleted());
packet.globalEnd = 0;
EXPECT_TRUE(timestampPacketStorage.isCompleted());
}
TEST_F(TimestampPacketSimpleTests, givenMultiplePacketsInUseWhenCompletionIsCheckedThenVerifyAllUsedNodes) {
MockTimestampPacketStorage timestampPacketStorage;
auto &packets = timestampPacketStorage.packets;
timestampPacketStorage.initialize();
timestampPacketStorage.setPacketsUsed(TimestampPacketSizeControl::preferredPacketCount - 1);
for (uint32_t i = 0; i < timestampPacketStorage.getPacketsUsed() - 1; i++) {
packets[i].contextEnd = 0;
packets[i].globalEnd = 0;
EXPECT_FALSE(timestampPacketStorage.isCompleted());
}
packets[timestampPacketStorage.getPacketsUsed() - 1].globalEnd = 0;
EXPECT_FALSE(timestampPacketStorage.isCompleted());
packets[timestampPacketStorage.getPacketsUsed() - 1].contextEnd = 0;
EXPECT_TRUE(timestampPacketStorage.isCompleted());
}
TEST_F(TimestampPacketSimpleTests, whenNewTagIsTakenThenReinitialize) {
MockExecutionEnvironment executionEnvironment(defaultHwInfo.get());
MockMemoryManager memoryManager(executionEnvironment);
@ -226,17 +125,12 @@ TEST_F(TimestampPacketSimpleTests, whenNewTagIsTakenThenReinitialize) {
packet.globalEnd = i++;
}
auto &dependenciesCount = firstNode->tagForCpuAccess->implicitGpuDependenciesCount;
setTagToReadyState(firstNode);
allocator.returnTag(firstNode);
dependenciesCount++;
auto secondNode = allocator.getTag();
EXPECT_EQ(secondNode, firstNode);
EXPECT_EQ(0u, dependenciesCount);
EXPECT_EQ(0u, firstNode->getImplicitCpuDependenciesCount());
for (const auto &packet : firstNode->tagForCpuAccess->packets) {
EXPECT_EQ(1u, packet.contextStart);
EXPECT_EQ(1u, packet.globalStart);
@ -339,7 +233,6 @@ HWTEST_F(TimestampPacketTests, givenDebugFlagSetWhenCreatingTimestampPacketAlloc
auto tag = csr.getTimestampPacketAllocator()->getTag();
setTagToReadyState(tag);
EXPECT_TRUE(tag->isCompleted());
EXPECT_FALSE(tag->canBeReleased());
}
@ -528,31 +421,11 @@ HWTEST_F(TimestampPacketTests, whenEstimatingSizeForNodeDependencyThenReturnCorr
size_t sizeForNodeDependency = 0;
sizeForNodeDependency += TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependency<FamilyType>(mockNode);
size_t expectedSize = mockNode.tagForCpuAccess->getPacketsUsed() * sizeof(typename FamilyType::MI_SEMAPHORE_WAIT) + sizeof(typename FamilyType::MI_ATOMIC);
size_t expectedSize = mockNode.tagForCpuAccess->getPacketsUsed() * sizeof(typename FamilyType::MI_SEMAPHORE_WAIT);
EXPECT_EQ(expectedSize, sizeForNodeDependency);
}
HWTEST_F(TimestampPacketTests, givenTagNodeWhenCpuAndGpuDependenciesCountAreEqualThenCanBeReleased) {
MockTimestampPacketStorage tag;
MockTagNode mockNode;
mockNode.tagForCpuAccess = &tag;
mockNode.gpuAddress = 0x1230000;
setTagToReadyState(&mockNode);
EXPECT_EQ(mockNode.getImplicitCpuDependenciesCount(), tag.getImplicitGpuDependenciesCount());
EXPECT_TRUE(mockNode.canBeReleased());
mockNode.incImplicitCpuDependenciesCount();
EXPECT_FALSE(mockNode.canBeReleased());
tag.implicitGpuDependenciesCount++;
EXPECT_TRUE(mockNode.canBeReleased());
tag.implicitGpuDependenciesCount++;
EXPECT_FALSE(mockNode.canBeReleased());
}
HWCMDTEST_F(IGFX_GEN8_CORE, TimestampPacketTests, givenTimestampPacketWhenDispatchingGpuWalkerThenAddTwoPcForLastWalker) {
using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
@ -810,7 +683,6 @@ HWTEST_F(TimestampPacketTests, givenEventsRequestWhenEstimatingStreamSizeForDiff
HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingThenProgramSemaphoresOnCsrStream) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
auto device2 = std::make_unique<MockClDevice>(Device::create<MockDevice>(executionEnvironment, 0u));
@ -851,13 +723,8 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingThe
auto it = hwParser.cmdList.begin();
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), timestamp4.getNode(0), 0);
verifyMiAtomic<FamilyType>(genCmdCast<MI_ATOMIC *>(*it++), timestamp4.getNode(0));
verifyDependencyCounterValues(event4.getTimestampPacketNodes(), 1);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), timestamp6.getNode(0), 0);
verifyMiAtomic<FamilyType>(genCmdCast<MI_ATOMIC *>(*it++), timestamp6.getNode(0));
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), timestamp6.getNode(1), 0);
verifyMiAtomic<FamilyType>(genCmdCast<MI_ATOMIC *>(*it++), timestamp6.getNode(1));
verifyDependencyCounterValues(event6.getTimestampPacketNodes(), 1);
while (it != hwParser.cmdList.end()) {
EXPECT_EQ(nullptr, genCmdCast<MI_SEMAPHORE_WAIT *>(*it));
@ -939,7 +806,6 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingBlo
HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingOnDifferentRootDeviceThenDontProgramSemaphoresOnCsrStream) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
auto device2 = std::make_unique<MockClDevice>(Device::create<MockDevice>(executionEnvironment, 1u));
@ -974,59 +840,20 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingOnD
cmdQ1->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, eventsOnWaitlist, waitlist, nullptr);
verifyDependencyCounterValues(event4.getTimestampPacketNodes(), 0);
verifyDependencyCounterValues(event6.getTimestampPacketNodes(), 0);
}
auto &cmdStream = device->getUltCommandStreamReceiver<FamilyType>().commandStream;
HWTEST_F(TimestampPacketTests, givenMultipleDevicesOnCsrWhenIncrementingCpuDependenciesCountThenIncrementByTargetCsrDeviceCountValue) {
DeviceBitfield osContext0DeviceBitfiled = 0b011;
DeviceBitfield osContext1DeviceBitfiled = 0b1011;
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream, 0);
UltClDeviceFactory factory{2, 4};
auto it = hwParser.cmdList.begin();
auto osContext0 = std::unique_ptr<OsContext>(OsContext::create(nullptr, 0, osContext0DeviceBitfiled, EngineTypeUsage{getChosenEngineType(*defaultHwInfo), EngineUsage::Regular}, PreemptionMode::Disabled, false));
auto osContext1 = std::unique_ptr<OsContext>(OsContext::create(nullptr, 1, osContext1DeviceBitfiled, EngineTypeUsage{getChosenEngineType(*defaultHwInfo), EngineUsage::Regular}, PreemptionMode::Disabled, false));
EXPECT_EQ(2u, osContext0->getNumSupportedDevices());
EXPECT_EQ(3u, osContext1->getNumSupportedDevices());
auto device0 = std::make_unique<MockClDevice>(Device::create<MockDevice>(factory.rootDevices[0]->getExecutionEnvironment(), 0u));
auto device1 = std::make_unique<MockClDevice>(Device::create<MockDevice>(factory.rootDevices[0]->getExecutionEnvironment(), 0u));
device0->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
device0->getUltCommandStreamReceiver<FamilyType>().setupContext(*osContext0);
device1->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
device1->getUltCommandStreamReceiver<FamilyType>().setupContext(*osContext1);
MockContext context0(device0.get());
MockContext context1(device1.get());
MockKernelWithInternals kernel0(*device0, &context0);
MockKernelWithInternals kernel1(*device1, &context1);
auto cmdQ0 = std::make_unique<MockCommandQueueHw<FamilyType>>(&context0, device0.get(), nullptr);
auto cmdQ1 = std::make_unique<MockCommandQueueHw<FamilyType>>(&context1, device1.get(), nullptr);
const cl_uint eventsOnWaitlist = 2;
MockTimestampPacketContainer timestamp0(*device0->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1);
MockTimestampPacketContainer timestamp1(*device1->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1);
Event event0(cmdQ0.get(), 0, 0, 0);
Event event1(cmdQ1.get(), 0, 0, 0);
event0.addTimestampPacketNodes(timestamp0);
event1.addTimestampPacketNodes(timestamp1);
cl_event waitlist[] = {&event0, &event1};
cmdQ0->enqueueKernel(kernel0.mockKernel, 1, nullptr, gws, nullptr, eventsOnWaitlist, waitlist, nullptr);
verifyDependencyCounterValues(event0.getTimestampPacketNodes(), osContext0->getNumSupportedDevices());
verifyDependencyCounterValues(event1.getTimestampPacketNodes(), osContext0->getNumSupportedDevices());
cmdQ1->enqueueKernel(kernel1.mockKernel, 1, nullptr, gws, nullptr, eventsOnWaitlist, waitlist, nullptr);
verifyDependencyCounterValues(event0.getTimestampPacketNodes(), osContext0->getNumSupportedDevices() + osContext1->getNumSupportedDevices());
verifyDependencyCounterValues(event1.getTimestampPacketNodes(), osContext0->getNumSupportedDevices() + osContext1->getNumSupportedDevices());
while (it != hwParser.cmdList.end()) {
auto semaphoreWait = genCmdCast<MI_SEMAPHORE_WAIT *>(*it);
if (semaphoreWait) {
EXPECT_TRUE(UnitTestHelper<FamilyType>::isAdditionalMiSemaphoreWait(*semaphoreWait));
}
it++;
}
}
HWTEST_F(TimestampPacketTests, givenAllDependencyTypesModeWhenFillingFromDifferentCsrsThenPushEverything) {
@ -1061,7 +888,6 @@ HWTEST_F(TimestampPacketTests, givenAllDependencyTypesModeWhenFillingFromDiffere
HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledOnDifferentCSRsFromOneDeviceWhenEnqueueingThenProgramSemaphoresOnCsrStream) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
@ -1101,13 +927,8 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledOnDifferentCSRsFr
auto it = hwParser.cmdList.begin();
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), timestamp4.getNode(0), 0);
verifyMiAtomic<FamilyType>(genCmdCast<MI_ATOMIC *>(*it++), timestamp4.getNode(0));
verifyDependencyCounterValues(event4.getTimestampPacketNodes(), 1);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), timestamp6.getNode(0), 0);
verifyMiAtomic<FamilyType>(genCmdCast<MI_ATOMIC *>(*it++), timestamp6.getNode(0));
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), timestamp6.getNode(1), 0);
verifyMiAtomic<FamilyType>(genCmdCast<MI_ATOMIC *>(*it++), timestamp6.getNode(1));
verifyDependencyCounterValues(event6.getTimestampPacketNodes(), 1);
while (it != hwParser.cmdList.end()) {
EXPECT_EQ(nullptr, genCmdCast<MI_SEMAPHORE_WAIT *>(*it));
@ -1148,8 +969,6 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingBlo
auto it = hwParser.cmdList.begin();
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), timestamp1.getNode(0), 0);
verifyMiAtomic<FamilyType>(genCmdCast<typename FamilyType::MI_ATOMIC *>(*it++), timestamp1.getNode(0));
verifyDependencyCounterValues(event1.getTimestampPacketNodes(), 1);
while (it != hwParser.cmdList.end()) {
EXPECT_EQ(nullptr, genCmdCast<MI_SEMAPHORE_WAIT *>(*it));
@ -1193,8 +1012,6 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledOnDifferentCSRsFr
auto it = hwParser.cmdList.begin();
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), timestamp1.getNode(0), 0);
verifyMiAtomic<FamilyType>(genCmdCast<typename FamilyType::MI_ATOMIC *>(*it++), timestamp1.getNode(0));
verifyDependencyCounterValues(event1.getTimestampPacketNodes(), 1);
while (it != hwParser.cmdList.end()) {
EXPECT_EQ(nullptr, genCmdCast<MI_SEMAPHORE_WAIT *>(*it));
@ -1266,16 +1083,10 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenDispatchingTh
semaphoresFound++;
if (semaphoresFound == 1) {
verifySemaphore(semaphoreCmd, timestamp3.getNode(0), 0);
verifyMiAtomic<FamilyType>(genCmdCast<typename FamilyType::MI_ATOMIC *>(*++it), timestamp3.getNode(0));
verifyDependencyCounterValues(event3.getTimestampPacketNodes(), 1);
} else if (semaphoresFound == 2) {
verifySemaphore(semaphoreCmd, timestamp5.getNode(0), 0);
verifyMiAtomic<FamilyType>(genCmdCast<typename FamilyType::MI_ATOMIC *>(*++it), timestamp5.getNode(0));
verifyDependencyCounterValues(event5.getTimestampPacketNodes(), 1);
} else if (semaphoresFound == 3) {
verifySemaphore(semaphoreCmd, timestamp5.getNode(1), 0);
verifyMiAtomic<FamilyType>(genCmdCast<typename FamilyType::MI_ATOMIC *>(*++it), timestamp5.getNode(1));
verifyDependencyCounterValues(event5.getTimestampPacketNodes(), 1);
}
}
if (genCmdCast<WALKER *>(*it)) {
@ -1349,16 +1160,10 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledOnDifferentCSRsFr
semaphoresFound++;
if (semaphoresFound == 1) {
verifySemaphore(semaphoreCmd, timestamp3.getNode(0), 0);
verifyMiAtomic<FamilyType>(genCmdCast<typename FamilyType::MI_ATOMIC *>(*++it), timestamp3.getNode(0));
verifyDependencyCounterValues(event3.getTimestampPacketNodes(), 1);
} else if (semaphoresFound == 2) {
verifySemaphore(semaphoreCmd, timestamp5.getNode(0), 0);
verifyMiAtomic<FamilyType>(genCmdCast<typename FamilyType::MI_ATOMIC *>(*++it), timestamp5.getNode(0));
verifyDependencyCounterValues(event5.getTimestampPacketNodes(), 1);
} else if (semaphoresFound == 3) {
verifySemaphore(semaphoreCmd, timestamp5.getNode(1), 0);
verifyMiAtomic<FamilyType>(genCmdCast<typename FamilyType::MI_ATOMIC *>(*++it), timestamp5.getNode(1));
verifyDependencyCounterValues(event5.getTimestampPacketNodes(), 1);
}
}
if (genCmdCast<WALKER *>(*it)) {
@ -1421,7 +1226,7 @@ HWTEST_F(TimestampPacketTests, givenAlreadyAssignedNodeWhenEnqueueingBlockedThen
HWTEST_F(TimestampPacketTests, givenAlreadyAssignedNodeWhenEnqueueingThenKeepDependencyOnPreviousNodeIfItsNotReady) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
MockTimestampPacketContainer firstNode(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 0);
@ -1433,19 +1238,16 @@ HWTEST_F(TimestampPacketTests, givenAlreadyAssignedNodeWhenEnqueueingThenKeepDep
auto firstTag0 = firstNode.getNode(0);
auto firstTag1 = firstNode.getNode(1);
verifyDependencyCounterValues(&firstNode, 0);
cmdQ.enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
verifyDependencyCounterValues(&firstNode, 1);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(*cmdQ.commandStream, 0);
auto it = hwParser.cmdList.begin();
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it), firstTag0, 0);
verifyMiAtomic<FamilyType>(genCmdCast<MI_ATOMIC *>(*++it), firstTag0);
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*++it), firstTag1, 0);
verifyMiAtomic<FamilyType>(genCmdCast<MI_ATOMIC *>(*++it), firstTag1);
it++;
while (it != hwParser.cmdList.end()) {
auto semaphoreWait = genCmdCast<MI_SEMAPHORE_WAIT *>(*it);
@ -1471,18 +1273,13 @@ HWTEST_F(TimestampPacketTests, givenAlreadyAssignedNodeWhenEnqueueingToOoqThenDo
hwParser.parseCommands<FamilyType>(*cmdQ.commandStream, 0);
uint32_t semaphoresFound = 0;
uint32_t atomicsFound = 0;
for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) {
if (genCmdCast<typename FamilyType::MI_SEMAPHORE_WAIT *>(*it)) {
semaphoresFound++;
}
if (genCmdCast<typename FamilyType::MI_ATOMIC *>(*it)) {
atomicsFound++;
}
}
uint32_t expectedSemaphoresCount = (UnitTestHelper<FamilyType>::isAdditionalMiSemaphoreWaitRequired(device->getHardwareInfo()) ? 2 : 0);
EXPECT_EQ(expectedSemaphoresCount, semaphoresFound);
EXPECT_EQ(0u, atomicsFound);
}
HWTEST_F(TimestampPacketTests, givenAlreadyAssignedNodeWhenEnqueueingWithOmitTimestampPacketDependenciesThenDontKeepDependencyOnPreviousNodeIfItsNotReady) {
@ -1502,18 +1299,13 @@ HWTEST_F(TimestampPacketTests, givenAlreadyAssignedNodeWhenEnqueueingWithOmitTim
hwParser.parseCommands<FamilyType>(*cmdQ.commandStream, 0);
uint32_t semaphoresFound = 0;
uint32_t atomicsFound = 0;
for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) {
if (genCmdCast<typename FamilyType::MI_SEMAPHORE_WAIT *>(*it)) {
semaphoresFound++;
}
if (genCmdCast<typename FamilyType::MI_ATOMIC *>(*it)) {
atomicsFound++;
}
}
uint32_t expectedSemaphoresCount = (UnitTestHelper<FamilyType>::isAdditionalMiSemaphoreWaitRequired(device->getHardwareInfo()) ? 2 : 0);
EXPECT_EQ(expectedSemaphoresCount, semaphoresFound);
EXPECT_EQ(0u, atomicsFound);
}
HWTEST_F(TimestampPacketTests, givenEventsWaitlistFromDifferentDevicesWhenEnqueueingThenMakeAllTimestampsResident) {

View File

@ -21,7 +21,6 @@ using namespace NEO;
struct TimestampPacketSimpleTests : public ::testing::Test {
class MockTimestampPacketStorage : public TimestampPackets<uint32_t> {
public:
using TimestampPackets<uint32_t>::implicitGpuDependenciesCount;
using TimestampPackets<uint32_t>::packets;
};
@ -76,24 +75,6 @@ struct TimestampPacketTests : public TimestampPacketSimpleTests {
EXPECT_EQ(dataAddress, semaphoreCmd->getSemaphoreGraphicsAddress());
};
template <typename GfxFamily>
void verifyMiAtomic(typename GfxFamily::MI_ATOMIC *miAtomicCmd, TagNodeBase *timestampPacketNode) {
using MI_ATOMIC = typename GfxFamily::MI_ATOMIC;
EXPECT_NE(nullptr, miAtomicCmd);
auto writeAddress = TimestampPacketHelper::getGpuDependenciesCountGpuAddress(*timestampPacketNode);
EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomicCmd->getAtomicOpcode());
EXPECT_EQ(writeAddress, UnitTestHelper<GfxFamily>::getAtomicMemoryAddress(*miAtomicCmd));
};
void verifyDependencyCounterValues(TimestampPacketContainer *timestampPacketContainer, uint32_t expectedValue) {
auto &nodes = timestampPacketContainer->peekNodes();
EXPECT_NE(0u, nodes.size());
for (auto &node : nodes) {
EXPECT_EQ(expectedValue, node->getImplicitCpuDependenciesCount());
}
}
ExecutionEnvironment *executionEnvironment;
std::unique_ptr<MockClDevice> device;
MockContext *context;

View File

@ -493,7 +493,6 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenMapAllocationWhenEnqueueingReadOrWriteBu
HWTEST_TEMPLATED_F(BcsBufferTests, givenWriteBufferEnqueueWhenProgrammingCommandStreamThenAddSemaphoreWait) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
auto cmdQ = clUniquePtr(new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr));
@ -512,7 +511,6 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenWriteBufferEnqueueWhenProgrammingCommand
hwParser.parseCommands<FamilyType>(*cmdQ->peekCommandStream());
uint32_t semaphoresCount = 0;
uint32_t miAtomicsCount = 0;
for (auto &cmd : hwParser.cmdList) {
if (auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(cmd)) {
if (UnitTestHelper<FamilyType>::isAdditionalMiSemaphoreWait(*semaphoreCmd)) {
@ -521,24 +519,14 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenWriteBufferEnqueueWhenProgrammingCommand
semaphoresCount++;
auto dataAddress = TimestampPacketHelper::getContextEndGpuAddress(*timestampPacketNode);
EXPECT_EQ(dataAddress, semaphoreCmd->getSemaphoreGraphicsAddress());
EXPECT_EQ(0u, miAtomicsCount);
} else if (auto miAtomicCmd = genCmdCast<MI_ATOMIC *>(cmd)) {
miAtomicsCount++;
auto dataAddress = TimestampPacketHelper::getGpuDependenciesCountGpuAddress(*timestampPacketNode);
EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomicCmd->getAtomicOpcode());
EXPECT_EQ(dataAddress, UnitTestHelper<FamilyType>::getAtomicMemoryAddress(*miAtomicCmd));
EXPECT_EQ(1u, semaphoresCount);
}
}
EXPECT_EQ(1u, semaphoresCount);
EXPECT_EQ(1u, miAtomicsCount);
EXPECT_EQ(initialTaskCount + 1, queueCsr->peekTaskCount());
}
HWTEST_TEMPLATED_F(BcsBufferTests, givenReadBufferEnqueueWhenProgrammingCommandStreamThenAddSemaphoreWait) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
auto cmdQ = clUniquePtr(new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr));
@ -557,7 +545,6 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenReadBufferEnqueueWhenProgrammingCommandS
hwParser.parseCommands<FamilyType>(*cmdQ->peekCommandStream());
uint32_t semaphoresCount = 0;
uint32_t miAtomicsCount = 0;
for (auto &cmd : hwParser.cmdList) {
if (auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(cmd)) {
if (UnitTestHelper<FamilyType>::isAdditionalMiSemaphoreWait(*semaphoreCmd)) {
@ -566,25 +553,15 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenReadBufferEnqueueWhenProgrammingCommandS
semaphoresCount++;
auto dataAddress = TimestampPacketHelper::getContextEndGpuAddress(*timestampPacketNode);
EXPECT_EQ(dataAddress, semaphoreCmd->getSemaphoreGraphicsAddress());
EXPECT_EQ(0u, miAtomicsCount);
} else if (auto miAtomicCmd = genCmdCast<MI_ATOMIC *>(cmd)) {
miAtomicsCount++;
auto dataAddress = TimestampPacketHelper::getGpuDependenciesCountGpuAddress(*timestampPacketNode);
EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomicCmd->getAtomicOpcode());
EXPECT_EQ(dataAddress, UnitTestHelper<FamilyType>::getAtomicMemoryAddress(*miAtomicCmd));
EXPECT_EQ(1u, semaphoresCount);
}
}
EXPECT_EQ(1u, semaphoresCount);
EXPECT_EQ(1u, miAtomicsCount);
EXPECT_EQ(initialTaskCount + 1, queueCsr->peekTaskCount());
}
template <typename FamilyType>
void BcsBufferTests::waitForCacheFlushFromBcsTest(MockCommandQueueHw<FamilyType> &commandQueue) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
bool isCacheFlushForBcsRequired = commandQueue.isCacheFlushForBcsRequired();

View File

@ -592,7 +592,6 @@ TEST_F(PerformanceCountersMetricsLibraryTest, WhenGettingHwPerfCounterThenValidP
ASSERT_NE(nullptr, perfCounter);
ASSERT_EQ(0ULL, perfCounter->tagForCpuAccess->report[0]);
EXPECT_TRUE(perfCounter->isCompleted());
auto perfCounter2 = event->getHwPerfCounterNode();
ASSERT_EQ(perfCounter, perfCounter2);

View File

@ -172,7 +172,6 @@ AllocateSharedAllocationsWithCpuAndGpuStorage = -1
UseMaxSimdSizeToDeduceMaxWorkgroupSize = 0
ReturnRawGpuTimestamps = 0
ForcePerDssBackedBufferProgramming = 0
DisableAtomicForPostSyncs = 0
MaxHwThreadsPercent = 0
MinHwThreadsUnoccupied = 0
LimitBlitterMaxWidth = -1

View File

@ -34,13 +34,10 @@ struct TagAllocatorTest : public Test<MemoryAllocatorFixture> {
assignDataToAllTimestamps(i, zeros);
}
setPacketsUsed(packetsUsed);
EXPECT_TRUE(isCompleted());
}
void setToNonReadyState() {
packets[0].contextEnd = 1;
EXPECT_FALSE(isCompleted());
}
};
@ -99,6 +96,7 @@ class MockTagAllocator : public TagAllocator<TagType> {
using BaseClass::gfxAllocations;
using BaseClass::populateFreeTags;
using BaseClass::releaseDeferredTags;
using BaseClass::returnTagToDeferredPool;
using BaseClass::rootDeviceIndices;
using BaseClass::TagAllocator;
using BaseClass::usedTags;
@ -386,15 +384,15 @@ TEST_F(TagAllocatorTest, givenMultipleReferencesOnTagWhenReleasingThenReturnWhen
EXPECT_EQ(nullptr, tagAllocator.getUsedTagsHead());
}
TEST_F(TagAllocatorTest, givenNotReadyTagWhenReturnedThenMoveToDeferredList) {
TEST_F(TagAllocatorTest, givenNotReadyTagWhenReturnedThenMoveToFreeList) {
MockTagAllocator<MockTimestampPackets32> tagAllocator(memoryManager, 1, 1, deviceBitfield);
auto node = static_cast<TagNode<MockTimestampPackets32> *>(tagAllocator.getTag());
node->tagForCpuAccess->setToNonReadyState();
EXPECT_TRUE(tagAllocator.deferredTags.peekIsEmpty());
tagAllocator.returnTag(node);
EXPECT_FALSE(tagAllocator.deferredTags.peekIsEmpty());
EXPECT_TRUE(tagAllocator.freeTags.peekIsEmpty());
EXPECT_TRUE(tagAllocator.deferredTags.peekIsEmpty());
EXPECT_FALSE(tagAllocator.freeTags.peekIsEmpty());
}
TEST_F(TagAllocatorTest, givenTagNodeWhenCompletionCheckIsDisabledThenStatusIsMarkedAsNotReady) {
@ -410,6 +408,11 @@ TEST_F(TagAllocatorTest, givenTagNodeWhenCompletionCheckIsDisabledThenStatusIsMa
tagAllocator.returnTag(node);
EXPECT_FALSE(tagAllocator.deferredTags.peekIsEmpty());
EXPECT_TRUE(tagAllocator.freeTags.peekIsEmpty());
tagAllocator.releaseDeferredTags();
EXPECT_FALSE(tagAllocator.deferredTags.peekIsEmpty());
EXPECT_TRUE(tagAllocator.freeTags.peekIsEmpty());
}
TEST_F(TagAllocatorTest, givenTagAllocatorWhenDisabledCompletionCheckThenNodeInheritsItsState) {
@ -443,40 +446,13 @@ TEST_F(TagAllocatorTest, givenEmptyFreeListWhenAskingForNewTagThenTryToReleaseDe
MockTagAllocator<MockTimestampPackets32> tagAllocator(memoryManager, 1, 1, deviceBitfield);
auto node = static_cast<TagNode<MockTimestampPackets32> *>(tagAllocator.getTag());
node->tagForCpuAccess->setToNonReadyState();
tagAllocator.returnTag(node);
node->tagForCpuAccess->setToNonReadyState();
tagAllocator.returnTagToDeferredPool(node);
EXPECT_TRUE(tagAllocator.freeTags.peekIsEmpty());
node = static_cast<TagNode<MockTimestampPackets32> *>(tagAllocator.getTag());
EXPECT_NE(nullptr, node);
EXPECT_TRUE(tagAllocator.freeTags.peekIsEmpty()); // empty again - new pool wasnt allocated
}
TEST_F(TagAllocatorTest, givenTagsOnDeferredListWhenReleasingItThenMoveReadyTagsToFreePool) {
MockTagAllocator<MockTimestampPackets32> tagAllocator(memoryManager, 2, 1, deviceBitfield); // pool with 2 tags
auto node1 = static_cast<TagNode<MockTimestampPackets32> *>(tagAllocator.getTag());
auto node2 = static_cast<TagNode<MockTimestampPackets32> *>(tagAllocator.getTag());
node1->tagForCpuAccess->setToNonReadyState();
node2->tagForCpuAccess->setToNonReadyState();
tagAllocator.returnTag(node1);
tagAllocator.returnTag(node2);
tagAllocator.releaseDeferredTags();
EXPECT_FALSE(tagAllocator.deferredTags.peekIsEmpty());
EXPECT_TRUE(tagAllocator.freeTags.peekIsEmpty());
node1->tagForCpuAccess->setTagToReadyState();
tagAllocator.releaseDeferredTags();
EXPECT_FALSE(tagAllocator.deferredTags.peekIsEmpty());
EXPECT_FALSE(tagAllocator.freeTags.peekIsEmpty());
node2->tagForCpuAccess->setTagToReadyState();
tagAllocator.releaseDeferredTags();
EXPECT_TRUE(tagAllocator.deferredTags.peekIsEmpty());
EXPECT_FALSE(tagAllocator.freeTags.peekIsEmpty());
}
TEST_F(TagAllocatorTest, givenTagAllocatorWhenGraphicsAllocationIsCreatedThenSetValidllocationType) {
MockTagAllocator<TimestampPackets<uint32_t>> timestampPacketAllocator(mockRootDeviceIndex, memoryManager, 1, 1, sizeof(TimestampPackets<uint32_t>), false, mockDeviceBitfield);
MockTagAllocator<HwTimeStamps> hwTimeStampsAllocator(mockRootDeviceIndex, memoryManager, 1, 1, sizeof(HwTimeStamps), false, mockDeviceBitfield);
@ -571,7 +547,6 @@ TEST_F(TagAllocatorTest, givenNotSupportedTagTypeWhenCallingMethodThenAbortOrRet
EXPECT_ANY_THROW(perfCounterNode.getContextStartOffset());
EXPECT_ANY_THROW(perfCounterNode.getContextEndOffset());
EXPECT_ANY_THROW(perfCounterNode.getGlobalEndOffset());
EXPECT_ANY_THROW(perfCounterNode.getImplicitGpuDependenciesCountOffset());
EXPECT_ANY_THROW(perfCounterNode.getContextStartValue(0));
EXPECT_ANY_THROW(perfCounterNode.getGlobalStartValue(0));
EXPECT_ANY_THROW(perfCounterNode.getContextEndValue(0));
@ -580,10 +555,8 @@ TEST_F(TagAllocatorTest, givenNotSupportedTagTypeWhenCallingMethodThenAbortOrRet
EXPECT_ANY_THROW(perfCounterNode.getGlobalEndRef());
EXPECT_ANY_THROW(perfCounterNode.setPacketsUsed(0));
EXPECT_ANY_THROW(perfCounterNode.getPacketsUsed());
EXPECT_EQ(0u, perfCounterNode.getImplicitGpuDependenciesCount());
EXPECT_ANY_THROW(perfCounterNode.getSinglePacketSize());
EXPECT_ANY_THROW(perfCounterNode.assignDataToAllTimestamps(0, nullptr));
EXPECT_TRUE(perfCounterNode.isCompleted());
}
{
@ -593,13 +566,10 @@ TEST_F(TagAllocatorTest, givenNotSupportedTagTypeWhenCallingMethodThenAbortOrRet
EXPECT_ANY_THROW(hwTimestampNode.getContextStartOffset());
EXPECT_ANY_THROW(hwTimestampNode.getContextEndOffset());
EXPECT_ANY_THROW(hwTimestampNode.getGlobalEndOffset());
EXPECT_ANY_THROW(hwTimestampNode.getImplicitGpuDependenciesCountOffset());
EXPECT_ANY_THROW(hwTimestampNode.setPacketsUsed(0));
EXPECT_ANY_THROW(hwTimestampNode.getPacketsUsed());
EXPECT_EQ(0u, hwTimestampNode.getImplicitGpuDependenciesCount());
EXPECT_ANY_THROW(hwTimestampNode.getSinglePacketSize());
EXPECT_ANY_THROW(hwTimestampNode.assignDataToAllTimestamps(0, nullptr));
EXPECT_TRUE(hwTimestampNode.isCompleted());
EXPECT_ANY_THROW(hwTimestampNode.getQueryHandleRef());
}