mirror of
https://github.com/intel/compute-runtime.git
synced 2025-11-10 05:49:51 +08:00
Revert TSP changes
This commit reverts: a1d2bdc76666059653c79fe39a26113ce47c632a, 71a115129c1698ff15305fd0ea3828cba861be47, e1a9087a466bfba54d84a64247e6596092034a91. Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
48feca4f44
commit
2e97aeccfd
@@ -80,7 +80,7 @@ class BuiltInOp<EBuiltInOps::AuxTranslation> : public BuiltinDispatchInfoBuilder
|
||||
using RegisteredMethodDispatcherT = RegisteredMethodDispatcher<DispatchInfo::DispatchCommandMethodT,
|
||||
DispatchInfo::EstimateCommandsMethodT>;
|
||||
template <typename GfxFamily, bool dcFlush>
|
||||
static void dispatchPipeControl(LinearStream &linearStream, TimestampPacketDependencies *, const HardwareInfo &) {
|
||||
static void dispatchPipeControl(LinearStream &linearStream, TimestampPacketDependencies *, const HardwareInfo &, uint32_t) {
|
||||
PipeControlArgs args(dcFlush);
|
||||
MemorySynchronizationCommands<GfxFamily>::addPipeControl(linearStream, args);
|
||||
}
|
||||
|
||||
@@ -137,13 +137,13 @@ bool CommandQueueHw<Family>::isCacheFlushForBcsRequired() const {
|
||||
template <typename Family>
|
||||
void CommandQueueHw<Family>::setupBlitAuxTranslation(MultiDispatchInfo &multiDispatchInfo) {
|
||||
multiDispatchInfo.begin()->dispatchInitCommands.registerMethod(
|
||||
TimestampPacketHelper::programSemaphoreForAuxTranslation<Family, AuxTranslationDirection::AuxToNonAux>);
|
||||
TimestampPacketHelper::programSemaphoreWithImplicitDependencyForAuxTranslation<Family, AuxTranslationDirection::AuxToNonAux>);
|
||||
|
||||
multiDispatchInfo.begin()->dispatchInitCommands.registerCommandsSizeEstimationMethod(
|
||||
TimestampPacketHelper::getRequiredCmdStreamSizeForAuxTranslationNodeDependency<Family, AuxTranslationDirection::AuxToNonAux>);
|
||||
|
||||
multiDispatchInfo.rbegin()->dispatchEpilogueCommands.registerMethod(
|
||||
TimestampPacketHelper::programSemaphoreForAuxTranslation<Family, AuxTranslationDirection::NonAuxToAux>);
|
||||
TimestampPacketHelper::programSemaphoreWithImplicitDependencyForAuxTranslation<Family, AuxTranslationDirection::NonAuxToAux>);
|
||||
|
||||
multiDispatchInfo.rbegin()->dispatchEpilogueCommands.registerCommandsSizeEstimationMethod(
|
||||
TimestampPacketHelper::getRequiredCmdStreamSizeForAuxTranslationNodeDependency<Family, AuxTranslationDirection::NonAuxToAux>);
|
||||
|
||||
@@ -275,7 +275,10 @@ void CommandQueueHw<GfxFamily>::enqueueHandler(Surface **surfacesForResidency,
|
||||
}
|
||||
|
||||
if (flushDependenciesForNonKernelCommand) {
|
||||
TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer<GfxFamily>(commandStream, csrDeps);
|
||||
TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer<GfxFamily>(
|
||||
commandStream,
|
||||
csrDeps,
|
||||
getGpgpuCommandStreamReceiver().getOsContext().getNumSupportedDevices());
|
||||
}
|
||||
|
||||
if (isMarkerWithProfiling) {
|
||||
@@ -504,7 +507,8 @@ BlitProperties CommandQueueHw<GfxFamily>::processDispatchForBlitEnqueue(const Mu
|
||||
args);
|
||||
}
|
||||
|
||||
TimestampPacketHelper::programSemaphore<GfxFamily>(*commandStream, *currentTimestampPacketNode);
|
||||
TimestampPacketHelper::programSemaphoreWithImplicitDependency<GfxFamily>(*commandStream, *currentTimestampPacketNode,
|
||||
getGpgpuCommandStreamReceiver().getOsContext().getNumSupportedDevices());
|
||||
}
|
||||
return blitProperties;
|
||||
}
|
||||
@@ -562,7 +566,10 @@ void CommandQueueHw<GfxFamily>::processDispatchForCacheFlush(Surface **surfaces,
|
||||
LinearStream *commandStream,
|
||||
CsrDependencies &csrDeps) {
|
||||
|
||||
TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer<GfxFamily>(*commandStream, csrDeps);
|
||||
TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer<GfxFamily>(
|
||||
*commandStream,
|
||||
csrDeps,
|
||||
getGpgpuCommandStreamReceiver().getOsContext().getNumSupportedDevices());
|
||||
|
||||
uint64_t postSyncAddress = 0;
|
||||
if (getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
||||
|
||||
@@ -106,7 +106,8 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
|
||||
mainKernel->areMultipleSubDevicesInContext());
|
||||
}
|
||||
|
||||
TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer<GfxFamily>(*commandStream, csrDependencies);
|
||||
auto numSupportedDevices = commandQueue.getGpgpuCommandStreamReceiver().getOsContext().getNumSupportedDevices();
|
||||
TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer<GfxFamily>(*commandStream, csrDependencies, numSupportedDevices);
|
||||
|
||||
dsh->align(EncodeStates<GfxFamily>::alignInterfaceDescriptorData);
|
||||
|
||||
@@ -140,7 +141,7 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
|
||||
|
||||
size_t currentDispatchIndex = 0;
|
||||
for (auto &dispatchInfo : multiDispatchInfo) {
|
||||
dispatchInfo.dispatchInitCommands(*commandStream, timestampPacketDependencies, commandQueue.getDevice().getHardwareInfo());
|
||||
dispatchInfo.dispatchInitCommands(*commandStream, timestampPacketDependencies, commandQueue.getDevice().getHardwareInfo(), numSupportedDevices);
|
||||
bool isMainKernel = (dispatchInfo.getKernel() == mainKernel);
|
||||
|
||||
dispatchKernelCommands(commandQueue, dispatchInfo, commandType, *commandStream, isMainKernel,
|
||||
@@ -148,7 +149,7 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
|
||||
offsetInterfaceDescriptorTable, *dsh, *ioh, *ssh);
|
||||
|
||||
currentDispatchIndex++;
|
||||
dispatchInfo.dispatchEpilogueCommands(*commandStream, timestampPacketDependencies, commandQueue.getDevice().getHardwareInfo());
|
||||
dispatchInfo.dispatchEpilogueCommands(*commandStream, timestampPacketDependencies, commandQueue.getDevice().getHardwareInfo(), numSupportedDevices);
|
||||
}
|
||||
|
||||
if (mainKernel->requiresCacheFlushCommand(commandQueue)) {
|
||||
|
||||
@@ -28,7 +28,7 @@ struct TimestampPacketDependencies;
|
||||
class DispatchInfo {
|
||||
|
||||
public:
|
||||
using DispatchCommandMethodT = void(LinearStream &commandStream, TimestampPacketDependencies *timestampPacketDependencies, const HardwareInfo &);
|
||||
using DispatchCommandMethodT = void(LinearStream &commandStream, TimestampPacketDependencies *timestampPacketDependencies, const HardwareInfo &, uint32_t);
|
||||
using EstimateCommandsMethodT = size_t(size_t, const HardwareInfo &, bool);
|
||||
|
||||
DispatchInfo() = default;
|
||||
|
||||
@@ -1150,12 +1150,10 @@ bool Kernel::hasTunningFinished(KernelSubmissionData &submissionData) {
|
||||
|
||||
bool Kernel::hasRunFinished(TimestampPacketContainer *timestampContainer) {
|
||||
for (const auto &node : timestampContainer->peekNodes()) {
|
||||
for (uint32_t i = 0; i < node->getPacketsUsed(); i++) {
|
||||
if (node->getContextEndValue(i) == 1) {
|
||||
if (!node->isCompleted()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@@ -203,7 +203,7 @@ HWTEST_F(BcsTests, WhenGetNumberOfBlitsIsCalledThenCorrectValuesAreReturned) {
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(BcsTests, givenCsrDependenciesWhenProgrammingCommandStreamThenAddSemaphore) {
|
||||
HWTEST_F(BcsTests, givenCsrDependenciesWhenProgrammingCommandStreamThenAddSemaphoreAndAtomic) {
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
@@ -243,9 +243,12 @@ HWTEST_F(BcsTests, givenCsrDependenciesWhenProgrammingCommandStreamThenAddSemaph
|
||||
}
|
||||
dependenciesFound = true;
|
||||
EXPECT_FALSE(xyCopyBltCmdFound);
|
||||
auto miAtomic = genCmdCast<typename FamilyType::MI_ATOMIC *>(*(++cmdIterator));
|
||||
EXPECT_NE(nullptr, miAtomic);
|
||||
|
||||
for (uint32_t i = 1; i < numberOfDependencyContainers * numberNodesPerContainer; i++) {
|
||||
EXPECT_NE(nullptr, genCmdCast<typename FamilyType::MI_SEMAPHORE_WAIT *>(*(++cmdIterator)));
|
||||
EXPECT_NE(nullptr, genCmdCast<typename FamilyType::MI_ATOMIC *>(*(++cmdIterator)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -83,9 +83,9 @@ HWTEST_F(CommandStreamReceiverTest, WhenCreatingCsrThenTimestampTypeIs32b) {
|
||||
auto allocator = csr.getTimestampPacketAllocator();
|
||||
auto tag = allocator->getTag();
|
||||
|
||||
auto expectedOffset = sizeof(typename FamilyType::TimestampPacketType);
|
||||
auto expectedOffset = sizeof(typename FamilyType::TimestampPacketType) * 4 * static_cast<size_t>(TimestampPacketSizeControl::preferredPacketCount);
|
||||
|
||||
EXPECT_EQ(expectedOffset, tag->getGlobalStartOffset());
|
||||
EXPECT_EQ(expectedOffset, tag->getImplicitGpuDependenciesCountOffset());
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverTest, WhenCreatingCsrThenFlagsAreSetCorrectly) {
|
||||
|
||||
@@ -297,6 +297,8 @@ struct CommandStreamReceiverTagTests : public ::testing::Test {
|
||||
tag->assignDataToAllTimestamps(i, zeros);
|
||||
}
|
||||
|
||||
EXPECT_TRUE(tag->isCompleted());
|
||||
|
||||
bool canBeReleased = tag->canBeReleased();
|
||||
allocator->returnTag(tag);
|
||||
|
||||
|
||||
@@ -1129,6 +1129,8 @@ HWTEST_F(EventTest, WhenGettingHwTimeStampsThenValidPointerIsReturned) {
|
||||
ASSERT_EQ(0ULL, timeStamps->GlobalCompleteTS);
|
||||
ASSERT_EQ(0ULL, timeStamps->ContextCompleteTS);
|
||||
|
||||
EXPECT_TRUE(event->getHwTimeStampNode()->isCompleted());
|
||||
|
||||
HwTimeStamps *timeStamps2 = static_cast<TagNode<HwTimeStamps> *>(event->getHwTimeStampNode())->tagForCpuAccess;
|
||||
ASSERT_EQ(timeStamps, timeStamps2);
|
||||
}
|
||||
|
||||
@@ -27,8 +27,9 @@
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
HWTEST_F(TimestampPacketTests, givenTagNodeWhenSemaphoreIsProgrammedThenUseGpuAddress) {
|
||||
HWTEST_F(TimestampPacketTests, givenTagNodeWhenSemaphoreAndAtomicAreProgrammedThenUseGpuAddress) {
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
|
||||
|
||||
TimestampPackets<uint32_t> tag;
|
||||
MockTagNode mockNode;
|
||||
@@ -36,16 +37,58 @@ HWTEST_F(TimestampPacketTests, givenTagNodeWhenSemaphoreIsProgrammedThenUseGpuAd
|
||||
mockNode.gpuAddress = 0x1230000;
|
||||
auto &cmdStream = mockCmdQ->getCS(0);
|
||||
|
||||
TimestampPacketHelper::programSemaphore<FamilyType>(cmdStream, mockNode);
|
||||
TimestampPacketHelper::programSemaphoreWithImplicitDependency<FamilyType>(cmdStream, mockNode, 1);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(cmdStream, 0);
|
||||
auto it = hwParser.cmdList.begin();
|
||||
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), &mockNode, 0);
|
||||
verifyMiAtomic<FamilyType>(genCmdCast<MI_ATOMIC *>(*it++), &mockNode);
|
||||
}
|
||||
|
||||
HWTEST_F(TimestampPacketTests, givenTagNodeWithPacketsUsed2WhenSemaphoreIsProgrammedThenUseGpuAddress) {
|
||||
HWTEST_F(TimestampPacketTests, givenDebugModeWhereAtomicsAreNotEmittedWhenCommandIsParsedThenNoAtomicOperation) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.DisableAtomicForPostSyncs.set(true);
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
|
||||
|
||||
MockTimestampPacketStorage tag;
|
||||
MockTagNode mockNode;
|
||||
mockNode.tagForCpuAccess = &tag;
|
||||
mockNode.gpuAddress = 0x1230000;
|
||||
auto &cmdStream = mockCmdQ->getCS(0);
|
||||
|
||||
TimestampPacketHelper::programSemaphoreWithImplicitDependency<FamilyType>(cmdStream, mockNode, 1);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(cmdStream, 0);
|
||||
auto it = hwParser.cmdList.begin();
|
||||
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), &mockNode, 0);
|
||||
EXPECT_EQ(it, hwParser.cmdList.end());
|
||||
EXPECT_EQ(0u, mockNode.getImplicitCpuDependenciesCount());
|
||||
tag.packets[0].contextEnd = 0u;
|
||||
tag.packets[0].globalEnd = 0u;
|
||||
|
||||
EXPECT_FALSE(tag.isCompleted());
|
||||
}
|
||||
|
||||
HWTEST_F(TimestampPacketTests, givenMultipleDeviesWhenIncrementingCpuDependenciesThenIncrementMultipleTimes) {
|
||||
TimestampPackets<uint32_t> tag;
|
||||
MockTagNode mockNode;
|
||||
mockNode.tagForCpuAccess = &tag;
|
||||
mockNode.gpuAddress = 0x1230000;
|
||||
auto &cmdStream = mockCmdQ->getCS(0);
|
||||
|
||||
const uint32_t numDevices = 3;
|
||||
|
||||
TimestampPacketHelper::programSemaphoreWithImplicitDependency<FamilyType>(cmdStream, mockNode, numDevices);
|
||||
|
||||
EXPECT_EQ(numDevices, mockNode.getImplicitCpuDependenciesCount());
|
||||
}
|
||||
|
||||
HWTEST_F(TimestampPacketTests, givenTagNodeWithPacketsUsed2WhenSemaphoreAndAtomicAreProgrammedThenUseGpuAddress) {
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
|
||||
|
||||
TimestampPackets<uint32_t> tag;
|
||||
tag.setPacketsUsed(2);
|
||||
@@ -54,7 +97,7 @@ HWTEST_F(TimestampPacketTests, givenTagNodeWithPacketsUsed2WhenSemaphoreIsProgra
|
||||
mockNode.gpuAddress = 0x1230000;
|
||||
auto &cmdStream = mockCmdQ->getCS(0);
|
||||
|
||||
TimestampPacketHelper::programSemaphore<FamilyType>(cmdStream, mockNode);
|
||||
TimestampPacketHelper::programSemaphoreWithImplicitDependency<FamilyType>(cmdStream, mockNode, 1);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(cmdStream, 0);
|
||||
@@ -62,6 +105,7 @@ HWTEST_F(TimestampPacketTests, givenTagNodeWithPacketsUsed2WhenSemaphoreIsProgra
|
||||
for (uint32_t packetId = 0; packetId < tag.getPacketsUsed(); packetId++) {
|
||||
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), &mockNode, packetId);
|
||||
}
|
||||
verifyMiAtomic<FamilyType>(genCmdCast<MI_ATOMIC *>(*it++), &mockNode);
|
||||
}
|
||||
|
||||
TEST_F(TimestampPacketTests, givenTagNodeWhatAskingForGpuAddressesThenReturnCorrectValue) {
|
||||
@@ -72,6 +116,31 @@ TEST_F(TimestampPacketTests, givenTagNodeWhatAskingForGpuAddressesThenReturnCorr
|
||||
|
||||
auto expectedEndAddress = mockNode.getGpuAddress() + (2 * sizeof(uint32_t));
|
||||
EXPECT_EQ(expectedEndAddress, TimestampPacketHelper::getContextEndGpuAddress(mockNode));
|
||||
|
||||
auto expectedCounterAddress = mockNode.getGpuAddress() + (TimestampPacketSizeControl::preferredPacketCount * 4 * sizeof(uint32_t));
|
||||
EXPECT_EQ(expectedCounterAddress, TimestampPacketHelper::getGpuDependenciesCountGpuAddress(mockNode));
|
||||
}
|
||||
|
||||
TEST_F(TimestampPacketSimpleTests, whenContextEndTagIsNotOneThenMarkAsCompleted) {
|
||||
MockTimestampPacketStorage timestampPacketStorage;
|
||||
auto &packet = timestampPacketStorage.packets[0];
|
||||
timestampPacketStorage.initialize();
|
||||
|
||||
packet.contextEnd = 1;
|
||||
packet.globalEnd = 1;
|
||||
EXPECT_FALSE(timestampPacketStorage.isCompleted());
|
||||
|
||||
packet.contextEnd = 1;
|
||||
packet.globalEnd = 0;
|
||||
EXPECT_FALSE(timestampPacketStorage.isCompleted());
|
||||
|
||||
packet.contextEnd = 0;
|
||||
packet.globalEnd = 1;
|
||||
EXPECT_TRUE(timestampPacketStorage.isCompleted());
|
||||
|
||||
packet.contextEnd = 0;
|
||||
packet.globalEnd = 0;
|
||||
EXPECT_TRUE(timestampPacketStorage.isCompleted());
|
||||
}
|
||||
|
||||
TEST_F(TimestampPacketSimpleTests, givenTimestampPacketContainerWhenMovedThenMoveAllNodes) {
|
||||
@@ -109,6 +178,38 @@ TEST_F(TimestampPacketSimpleTests, givenTimestampPacketContainerWhenMovedThenMov
|
||||
EXPECT_EQ(1u, node1.returnCalls);
|
||||
}
|
||||
|
||||
TEST_F(TimestampPacketSimpleTests, whenIsCompletedIsCalledThenItReturnsProperTimestampPacketStatus) {
|
||||
MockTimestampPacketStorage timestampPacketStorage;
|
||||
auto &packet = timestampPacketStorage.packets[0];
|
||||
timestampPacketStorage.initialize();
|
||||
|
||||
EXPECT_FALSE(timestampPacketStorage.isCompleted());
|
||||
packet.contextEnd = 0;
|
||||
EXPECT_TRUE(timestampPacketStorage.isCompleted());
|
||||
packet.globalEnd = 0;
|
||||
EXPECT_TRUE(timestampPacketStorage.isCompleted());
|
||||
}
|
||||
|
||||
TEST_F(TimestampPacketSimpleTests, givenMultiplePacketsInUseWhenCompletionIsCheckedThenVerifyAllUsedNodes) {
|
||||
MockTimestampPacketStorage timestampPacketStorage;
|
||||
auto &packets = timestampPacketStorage.packets;
|
||||
timestampPacketStorage.initialize();
|
||||
|
||||
timestampPacketStorage.setPacketsUsed(TimestampPacketSizeControl::preferredPacketCount - 1);
|
||||
|
||||
for (uint32_t i = 0; i < timestampPacketStorage.getPacketsUsed() - 1; i++) {
|
||||
packets[i].contextEnd = 0;
|
||||
packets[i].globalEnd = 0;
|
||||
EXPECT_FALSE(timestampPacketStorage.isCompleted());
|
||||
}
|
||||
|
||||
packets[timestampPacketStorage.getPacketsUsed() - 1].globalEnd = 0;
|
||||
EXPECT_FALSE(timestampPacketStorage.isCompleted());
|
||||
|
||||
packets[timestampPacketStorage.getPacketsUsed() - 1].contextEnd = 0;
|
||||
EXPECT_TRUE(timestampPacketStorage.isCompleted());
|
||||
}
|
||||
|
||||
TEST_F(TimestampPacketSimpleTests, whenNewTagIsTakenThenReinitialize) {
|
||||
MockExecutionEnvironment executionEnvironment(defaultHwInfo.get());
|
||||
MockMemoryManager memoryManager(executionEnvironment);
|
||||
@@ -125,12 +226,17 @@ TEST_F(TimestampPacketSimpleTests, whenNewTagIsTakenThenReinitialize) {
|
||||
packet.globalEnd = i++;
|
||||
}
|
||||
|
||||
auto &dependenciesCount = firstNode->tagForCpuAccess->implicitGpuDependenciesCount;
|
||||
|
||||
setTagToReadyState(firstNode);
|
||||
allocator.returnTag(firstNode);
|
||||
dependenciesCount++;
|
||||
|
||||
auto secondNode = allocator.getTag();
|
||||
EXPECT_EQ(secondNode, firstNode);
|
||||
|
||||
EXPECT_EQ(0u, dependenciesCount);
|
||||
EXPECT_EQ(0u, firstNode->getImplicitCpuDependenciesCount());
|
||||
for (const auto &packet : firstNode->tagForCpuAccess->packets) {
|
||||
EXPECT_EQ(1u, packet.contextStart);
|
||||
EXPECT_EQ(1u, packet.globalStart);
|
||||
@@ -233,6 +339,7 @@ HWTEST_F(TimestampPacketTests, givenDebugFlagSetWhenCreatingTimestampPacketAlloc
|
||||
auto tag = csr.getTimestampPacketAllocator()->getTag();
|
||||
setTagToReadyState(tag);
|
||||
|
||||
EXPECT_TRUE(tag->isCompleted());
|
||||
EXPECT_FALSE(tag->canBeReleased());
|
||||
}
|
||||
|
||||
@@ -421,11 +528,31 @@ HWTEST_F(TimestampPacketTests, whenEstimatingSizeForNodeDependencyThenReturnCorr
|
||||
size_t sizeForNodeDependency = 0;
|
||||
sizeForNodeDependency += TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependency<FamilyType>(mockNode);
|
||||
|
||||
size_t expectedSize = mockNode.tagForCpuAccess->getPacketsUsed() * sizeof(typename FamilyType::MI_SEMAPHORE_WAIT);
|
||||
size_t expectedSize = mockNode.tagForCpuAccess->getPacketsUsed() * sizeof(typename FamilyType::MI_SEMAPHORE_WAIT) + sizeof(typename FamilyType::MI_ATOMIC);
|
||||
|
||||
EXPECT_EQ(expectedSize, sizeForNodeDependency);
|
||||
}
|
||||
|
||||
HWTEST_F(TimestampPacketTests, givenTagNodeWhenCpuAndGpuDependenciesCountAreEqualThenCanBeReleased) {
|
||||
MockTimestampPacketStorage tag;
|
||||
MockTagNode mockNode;
|
||||
mockNode.tagForCpuAccess = &tag;
|
||||
mockNode.gpuAddress = 0x1230000;
|
||||
|
||||
setTagToReadyState(&mockNode);
|
||||
EXPECT_EQ(mockNode.getImplicitCpuDependenciesCount(), tag.getImplicitGpuDependenciesCount());
|
||||
EXPECT_TRUE(mockNode.canBeReleased());
|
||||
|
||||
mockNode.incImplicitCpuDependenciesCount();
|
||||
EXPECT_FALSE(mockNode.canBeReleased());
|
||||
|
||||
tag.implicitGpuDependenciesCount++;
|
||||
EXPECT_TRUE(mockNode.canBeReleased());
|
||||
|
||||
tag.implicitGpuDependenciesCount++;
|
||||
EXPECT_FALSE(mockNode.canBeReleased());
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_GEN8_CORE, TimestampPacketTests, givenTimestampPacketWhenDispatchingGpuWalkerThenAddTwoPcForLastWalker) {
|
||||
using GPGPU_WALKER = typename FamilyType::GPGPU_WALKER;
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
@@ -683,6 +810,7 @@ HWTEST_F(TimestampPacketTests, givenEventsRequestWhenEstimatingStreamSizeForDiff
|
||||
|
||||
HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingThenProgramSemaphoresOnCsrStream) {
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
|
||||
|
||||
auto device2 = std::make_unique<MockClDevice>(Device::create<MockDevice>(executionEnvironment, 0u));
|
||||
|
||||
@@ -723,8 +851,13 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingThe
|
||||
|
||||
auto it = hwParser.cmdList.begin();
|
||||
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), timestamp4.getNode(0), 0);
|
||||
verifyMiAtomic<FamilyType>(genCmdCast<MI_ATOMIC *>(*it++), timestamp4.getNode(0));
|
||||
verifyDependencyCounterValues(event4.getTimestampPacketNodes(), 1);
|
||||
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), timestamp6.getNode(0), 0);
|
||||
verifyMiAtomic<FamilyType>(genCmdCast<MI_ATOMIC *>(*it++), timestamp6.getNode(0));
|
||||
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), timestamp6.getNode(1), 0);
|
||||
verifyMiAtomic<FamilyType>(genCmdCast<MI_ATOMIC *>(*it++), timestamp6.getNode(1));
|
||||
verifyDependencyCounterValues(event6.getTimestampPacketNodes(), 1);
|
||||
|
||||
while (it != hwParser.cmdList.end()) {
|
||||
EXPECT_EQ(nullptr, genCmdCast<MI_SEMAPHORE_WAIT *>(*it));
|
||||
@@ -790,6 +923,7 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingBlo
|
||||
|
||||
HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingOnDifferentRootDeviceThenDontProgramSemaphoresOnCsrStream) {
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
|
||||
|
||||
auto device2 = std::make_unique<MockClDevice>(Device::create<MockDevice>(executionEnvironment, 1u));
|
||||
|
||||
@@ -824,20 +958,59 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingOnD
|
||||
|
||||
cmdQ1->enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, eventsOnWaitlist, waitlist, nullptr);
|
||||
|
||||
auto &cmdStream = device->getUltCommandStreamReceiver<FamilyType>().commandStream;
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(cmdStream, 0);
|
||||
|
||||
auto it = hwParser.cmdList.begin();
|
||||
|
||||
while (it != hwParser.cmdList.end()) {
|
||||
auto semaphoreWait = genCmdCast<MI_SEMAPHORE_WAIT *>(*it);
|
||||
if (semaphoreWait) {
|
||||
EXPECT_TRUE(UnitTestHelper<FamilyType>::isAdditionalMiSemaphoreWait(*semaphoreWait));
|
||||
}
|
||||
it++;
|
||||
verifyDependencyCounterValues(event4.getTimestampPacketNodes(), 0);
|
||||
verifyDependencyCounterValues(event6.getTimestampPacketNodes(), 0);
|
||||
}
|
||||
|
||||
HWTEST_F(TimestampPacketTests, givenMultipleDevicesOnCsrWhenIncrementingCpuDependenciesCountThenIncrementByTargetCsrDeviceCountValue) {
|
||||
DeviceBitfield osContext0DeviceBitfiled = 0b011;
|
||||
DeviceBitfield osContext1DeviceBitfiled = 0b1011;
|
||||
|
||||
UltClDeviceFactory factory{2, 4};
|
||||
|
||||
auto osContext0 = std::unique_ptr<OsContext>(OsContext::create(nullptr, 0, osContext0DeviceBitfiled, EngineTypeUsage{getChosenEngineType(*defaultHwInfo), EngineUsage::Regular}, PreemptionMode::Disabled, false));
|
||||
auto osContext1 = std::unique_ptr<OsContext>(OsContext::create(nullptr, 1, osContext1DeviceBitfiled, EngineTypeUsage{getChosenEngineType(*defaultHwInfo), EngineUsage::Regular}, PreemptionMode::Disabled, false));
|
||||
EXPECT_EQ(2u, osContext0->getNumSupportedDevices());
|
||||
EXPECT_EQ(3u, osContext1->getNumSupportedDevices());
|
||||
|
||||
auto device0 = std::make_unique<MockClDevice>(Device::create<MockDevice>(factory.rootDevices[0]->getExecutionEnvironment(), 0u));
|
||||
auto device1 = std::make_unique<MockClDevice>(Device::create<MockDevice>(factory.rootDevices[0]->getExecutionEnvironment(), 0u));
|
||||
|
||||
device0->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
|
||||
device0->getUltCommandStreamReceiver<FamilyType>().setupContext(*osContext0);
|
||||
device1->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
|
||||
device1->getUltCommandStreamReceiver<FamilyType>().setupContext(*osContext1);
|
||||
|
||||
MockContext context0(device0.get());
|
||||
MockContext context1(device1.get());
|
||||
MockKernelWithInternals kernel0(*device0, &context0);
|
||||
MockKernelWithInternals kernel1(*device1, &context1);
|
||||
|
||||
auto cmdQ0 = std::make_unique<MockCommandQueueHw<FamilyType>>(&context0, device0.get(), nullptr);
|
||||
auto cmdQ1 = std::make_unique<MockCommandQueueHw<FamilyType>>(&context1, device1.get(), nullptr);
|
||||
|
||||
const cl_uint eventsOnWaitlist = 2;
|
||||
MockTimestampPacketContainer timestamp0(*device0->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1);
|
||||
MockTimestampPacketContainer timestamp1(*device1->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 1);
|
||||
|
||||
Event event0(cmdQ0.get(), 0, 0, 0);
|
||||
Event event1(cmdQ1.get(), 0, 0, 0);
|
||||
event0.addTimestampPacketNodes(timestamp0);
|
||||
event1.addTimestampPacketNodes(timestamp1);
|
||||
|
||||
cl_event waitlist[] = {&event0, &event1};
|
||||
|
||||
cmdQ0->enqueueKernel(kernel0.mockKernel, 1, nullptr, gws, nullptr, eventsOnWaitlist, waitlist, nullptr);
|
||||
|
||||
verifyDependencyCounterValues(event0.getTimestampPacketNodes(), osContext0->getNumSupportedDevices());
|
||||
|
||||
verifyDependencyCounterValues(event1.getTimestampPacketNodes(), osContext0->getNumSupportedDevices());
|
||||
|
||||
cmdQ1->enqueueKernel(kernel1.mockKernel, 1, nullptr, gws, nullptr, eventsOnWaitlist, waitlist, nullptr);
|
||||
|
||||
verifyDependencyCounterValues(event0.getTimestampPacketNodes(), osContext0->getNumSupportedDevices() + osContext1->getNumSupportedDevices());
|
||||
|
||||
verifyDependencyCounterValues(event1.getTimestampPacketNodes(), osContext0->getNumSupportedDevices() + osContext1->getNumSupportedDevices());
|
||||
}
|
||||
|
||||
HWTEST_F(TimestampPacketTests, givenAllDependencyTypesModeWhenFillingFromDifferentCsrsThenPushEverything) {
|
||||
@@ -872,6 +1045,7 @@ HWTEST_F(TimestampPacketTests, givenAllDependencyTypesModeWhenFillingFromDiffere
|
||||
|
||||
HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledOnDifferentCSRsFromOneDeviceWhenEnqueueingThenProgramSemaphoresOnCsrStream) {
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
|
||||
|
||||
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
|
||||
|
||||
@@ -911,8 +1085,13 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledOnDifferentCSRsFr
|
||||
|
||||
auto it = hwParser.cmdList.begin();
|
||||
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), timestamp4.getNode(0), 0);
|
||||
verifyMiAtomic<FamilyType>(genCmdCast<MI_ATOMIC *>(*it++), timestamp4.getNode(0));
|
||||
verifyDependencyCounterValues(event4.getTimestampPacketNodes(), 1);
|
||||
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), timestamp6.getNode(0), 0);
|
||||
verifyMiAtomic<FamilyType>(genCmdCast<MI_ATOMIC *>(*it++), timestamp6.getNode(0));
|
||||
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), timestamp6.getNode(1), 0);
|
||||
verifyMiAtomic<FamilyType>(genCmdCast<MI_ATOMIC *>(*it++), timestamp6.getNode(1));
|
||||
verifyDependencyCounterValues(event6.getTimestampPacketNodes(), 1);
|
||||
|
||||
while (it != hwParser.cmdList.end()) {
|
||||
EXPECT_EQ(nullptr, genCmdCast<MI_SEMAPHORE_WAIT *>(*it));
|
||||
@@ -953,6 +1132,8 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingBlo
|
||||
|
||||
auto it = hwParser.cmdList.begin();
|
||||
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), timestamp1.getNode(0), 0);
|
||||
verifyMiAtomic<FamilyType>(genCmdCast<typename FamilyType::MI_ATOMIC *>(*it++), timestamp1.getNode(0));
|
||||
verifyDependencyCounterValues(event1.getTimestampPacketNodes(), 1);
|
||||
|
||||
while (it != hwParser.cmdList.end()) {
|
||||
EXPECT_EQ(nullptr, genCmdCast<MI_SEMAPHORE_WAIT *>(*it));
|
||||
@@ -996,6 +1177,8 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledOnDifferentCSRsFr
|
||||
|
||||
auto it = hwParser.cmdList.begin();
|
||||
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it++), timestamp1.getNode(0), 0);
|
||||
verifyMiAtomic<FamilyType>(genCmdCast<typename FamilyType::MI_ATOMIC *>(*it++), timestamp1.getNode(0));
|
||||
verifyDependencyCounterValues(event1.getTimestampPacketNodes(), 1);
|
||||
|
||||
while (it != hwParser.cmdList.end()) {
|
||||
EXPECT_EQ(nullptr, genCmdCast<MI_SEMAPHORE_WAIT *>(*it));
|
||||
@@ -1067,10 +1250,16 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenDispatchingTh
|
||||
semaphoresFound++;
|
||||
if (semaphoresFound == 1) {
|
||||
verifySemaphore(semaphoreCmd, timestamp3.getNode(0), 0);
|
||||
verifyMiAtomic<FamilyType>(genCmdCast<typename FamilyType::MI_ATOMIC *>(*++it), timestamp3.getNode(0));
|
||||
verifyDependencyCounterValues(event3.getTimestampPacketNodes(), 1);
|
||||
} else if (semaphoresFound == 2) {
|
||||
verifySemaphore(semaphoreCmd, timestamp5.getNode(0), 0);
|
||||
verifyMiAtomic<FamilyType>(genCmdCast<typename FamilyType::MI_ATOMIC *>(*++it), timestamp5.getNode(0));
|
||||
verifyDependencyCounterValues(event5.getTimestampPacketNodes(), 1);
|
||||
} else if (semaphoresFound == 3) {
|
||||
verifySemaphore(semaphoreCmd, timestamp5.getNode(1), 0);
|
||||
verifyMiAtomic<FamilyType>(genCmdCast<typename FamilyType::MI_ATOMIC *>(*++it), timestamp5.getNode(1));
|
||||
verifyDependencyCounterValues(event5.getTimestampPacketNodes(), 1);
|
||||
}
|
||||
}
|
||||
if (genCmdCast<WALKER *>(*it)) {
|
||||
@@ -1144,10 +1333,16 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledOnDifferentCSRsFr
|
||||
semaphoresFound++;
|
||||
if (semaphoresFound == 1) {
|
||||
verifySemaphore(semaphoreCmd, timestamp3.getNode(0), 0);
|
||||
verifyMiAtomic<FamilyType>(genCmdCast<typename FamilyType::MI_ATOMIC *>(*++it), timestamp3.getNode(0));
|
||||
verifyDependencyCounterValues(event3.getTimestampPacketNodes(), 1);
|
||||
} else if (semaphoresFound == 2) {
|
||||
verifySemaphore(semaphoreCmd, timestamp5.getNode(0), 0);
|
||||
verifyMiAtomic<FamilyType>(genCmdCast<typename FamilyType::MI_ATOMIC *>(*++it), timestamp5.getNode(0));
|
||||
verifyDependencyCounterValues(event5.getTimestampPacketNodes(), 1);
|
||||
} else if (semaphoresFound == 3) {
|
||||
verifySemaphore(semaphoreCmd, timestamp5.getNode(1), 0);
|
||||
verifyMiAtomic<FamilyType>(genCmdCast<typename FamilyType::MI_ATOMIC *>(*++it), timestamp5.getNode(1));
|
||||
verifyDependencyCounterValues(event5.getTimestampPacketNodes(), 1);
|
||||
}
|
||||
}
|
||||
if (genCmdCast<WALKER *>(*it)) {
|
||||
@@ -1208,9 +1403,38 @@ HWTEST_F(TimestampPacketTests, givenAlreadyAssignedNodeWhenEnqueueingBlockedThen
|
||||
cmdQ->isQueueBlocked();
|
||||
}
|
||||
|
||||
HWTEST_F(TimestampPacketTests, givenAlreadyAssignedNodeWhenEnqueueingThenDontKeepDependencyOnPreviousNodeIfItsReady) {
|
||||
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
|
||||
|
||||
MockCommandQueueHw<FamilyType> cmdQ(context, device.get(), nullptr);
|
||||
TimestampPacketContainer previousNodes;
|
||||
cmdQ.obtainNewTimestampPacketNodes(1, previousNodes, false, false);
|
||||
auto firstNode = cmdQ.timestampPacketContainer->peekNodes().at(0);
|
||||
setTagToReadyState(firstNode);
|
||||
|
||||
cmdQ.enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(*cmdQ.commandStream, 0);
|
||||
|
||||
uint32_t semaphoresFound = 0;
|
||||
uint32_t atomicsFound = 0;
|
||||
for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) {
|
||||
if (genCmdCast<typename FamilyType::MI_SEMAPHORE_WAIT *>(*it)) {
|
||||
semaphoresFound++;
|
||||
}
|
||||
if (genCmdCast<typename FamilyType::MI_ATOMIC *>(*it)) {
|
||||
atomicsFound++;
|
||||
}
|
||||
}
|
||||
uint32_t expectedSemaphoresCount = (UnitTestHelper<FamilyType>::isAdditionalMiSemaphoreWaitRequired(device->getHardwareInfo()) ? 2 : 0);
|
||||
EXPECT_EQ(expectedSemaphoresCount, semaphoresFound);
|
||||
EXPECT_EQ(0u, atomicsFound);
|
||||
}
|
||||
|
||||
HWTEST_F(TimestampPacketTests, givenAlreadyAssignedNodeWhenEnqueueingThenKeepDependencyOnPreviousNodeIfItsNotReady) {
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
|
||||
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
|
||||
device->getUltCommandStreamReceiver<FamilyType>().timestampPacketWriteEnabled = true;
|
||||
MockTimestampPacketContainer firstNode(*device->getGpgpuCommandStreamReceiver().getTimestampPacketAllocator(), 0);
|
||||
|
||||
@@ -1222,16 +1446,19 @@ HWTEST_F(TimestampPacketTests, givenAlreadyAssignedNodeWhenEnqueueingThenKeepDep
|
||||
auto firstTag0 = firstNode.getNode(0);
|
||||
auto firstTag1 = firstNode.getNode(1);
|
||||
|
||||
verifyDependencyCounterValues(&firstNode, 0);
|
||||
cmdQ.enqueueKernel(kernel->mockKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||
verifyDependencyCounterValues(&firstNode, 1);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(*cmdQ.commandStream, 0);
|
||||
|
||||
auto it = hwParser.cmdList.begin();
|
||||
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*it), firstTag0, 0);
|
||||
verifyMiAtomic<FamilyType>(genCmdCast<MI_ATOMIC *>(*++it), firstTag0);
|
||||
|
||||
verifySemaphore(genCmdCast<MI_SEMAPHORE_WAIT *>(*++it), firstTag1, 0);
|
||||
it++;
|
||||
verifyMiAtomic<FamilyType>(genCmdCast<MI_ATOMIC *>(*++it), firstTag1);
|
||||
|
||||
while (it != hwParser.cmdList.end()) {
|
||||
auto semaphoreWait = genCmdCast<MI_SEMAPHORE_WAIT *>(*it);
|
||||
@@ -1257,13 +1484,18 @@ HWTEST_F(TimestampPacketTests, givenAlreadyAssignedNodeWhenEnqueueingToOoqThenDo
|
||||
hwParser.parseCommands<FamilyType>(*cmdQ.commandStream, 0);
|
||||
|
||||
uint32_t semaphoresFound = 0;
|
||||
uint32_t atomicsFound = 0;
|
||||
for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) {
|
||||
if (genCmdCast<typename FamilyType::MI_SEMAPHORE_WAIT *>(*it)) {
|
||||
semaphoresFound++;
|
||||
}
|
||||
if (genCmdCast<typename FamilyType::MI_ATOMIC *>(*it)) {
|
||||
atomicsFound++;
|
||||
}
|
||||
}
|
||||
uint32_t expectedSemaphoresCount = (UnitTestHelper<FamilyType>::isAdditionalMiSemaphoreWaitRequired(device->getHardwareInfo()) ? 2 : 0);
|
||||
EXPECT_EQ(expectedSemaphoresCount, semaphoresFound);
|
||||
EXPECT_EQ(0u, atomicsFound);
|
||||
}
|
||||
|
||||
HWTEST_F(TimestampPacketTests, givenAlreadyAssignedNodeWhenEnqueueingWithOmitTimestampPacketDependenciesThenDontKeepDependencyOnPreviousNodeIfItsNotReady) {
|
||||
@@ -1283,13 +1515,18 @@ HWTEST_F(TimestampPacketTests, givenAlreadyAssignedNodeWhenEnqueueingWithOmitTim
|
||||
hwParser.parseCommands<FamilyType>(*cmdQ.commandStream, 0);
|
||||
|
||||
uint32_t semaphoresFound = 0;
|
||||
uint32_t atomicsFound = 0;
|
||||
for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) {
|
||||
if (genCmdCast<typename FamilyType::MI_SEMAPHORE_WAIT *>(*it)) {
|
||||
semaphoresFound++;
|
||||
}
|
||||
if (genCmdCast<typename FamilyType::MI_ATOMIC *>(*it)) {
|
||||
atomicsFound++;
|
||||
}
|
||||
}
|
||||
uint32_t expectedSemaphoresCount = (UnitTestHelper<FamilyType>::isAdditionalMiSemaphoreWaitRequired(device->getHardwareInfo()) ? 2 : 0);
|
||||
EXPECT_EQ(expectedSemaphoresCount, semaphoresFound);
|
||||
EXPECT_EQ(0u, atomicsFound);
|
||||
}
|
||||
|
||||
HWTEST_F(TimestampPacketTests, givenEventsWaitlistFromDifferentDevicesWhenEnqueueingThenMakeAllTimestampsResident) {
|
||||
|
||||
@@ -21,6 +21,7 @@ using namespace NEO;
|
||||
struct TimestampPacketSimpleTests : public ::testing::Test {
|
||||
class MockTimestampPacketStorage : public TimestampPackets<uint32_t> {
|
||||
public:
|
||||
using TimestampPackets<uint32_t>::implicitGpuDependenciesCount;
|
||||
using TimestampPackets<uint32_t>::packets;
|
||||
};
|
||||
|
||||
@@ -75,6 +76,24 @@ struct TimestampPacketTests : public TimestampPacketSimpleTests {
|
||||
EXPECT_EQ(dataAddress, semaphoreCmd->getSemaphoreGraphicsAddress());
|
||||
};
|
||||
|
||||
template <typename GfxFamily>
|
||||
void verifyMiAtomic(typename GfxFamily::MI_ATOMIC *miAtomicCmd, TagNodeBase *timestampPacketNode) {
|
||||
using MI_ATOMIC = typename GfxFamily::MI_ATOMIC;
|
||||
EXPECT_NE(nullptr, miAtomicCmd);
|
||||
auto writeAddress = TimestampPacketHelper::getGpuDependenciesCountGpuAddress(*timestampPacketNode);
|
||||
|
||||
EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomicCmd->getAtomicOpcode());
|
||||
EXPECT_EQ(writeAddress, UnitTestHelper<GfxFamily>::getAtomicMemoryAddress(*miAtomicCmd));
|
||||
};
|
||||
|
||||
void verifyDependencyCounterValues(TimestampPacketContainer *timestampPacketContainer, uint32_t expectedValue) {
|
||||
auto &nodes = timestampPacketContainer->peekNodes();
|
||||
EXPECT_NE(0u, nodes.size());
|
||||
for (auto &node : nodes) {
|
||||
EXPECT_EQ(expectedValue, node->getImplicitCpuDependenciesCount());
|
||||
}
|
||||
}
|
||||
|
||||
ExecutionEnvironment *executionEnvironment;
|
||||
std::unique_ptr<MockClDevice> device;
|
||||
MockContext *context;
|
||||
|
||||
@@ -493,6 +493,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenMapAllocationWhenEnqueueingReadOrWriteBu
|
||||
|
||||
HWTEST_TEMPLATED_F(BcsBufferTests, givenWriteBufferEnqueueWhenProgrammingCommandStreamThenAddSemaphoreWait) {
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
|
||||
|
||||
auto cmdQ = clUniquePtr(new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr));
|
||||
|
||||
@@ -511,6 +512,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenWriteBufferEnqueueWhenProgrammingCommand
|
||||
hwParser.parseCommands<FamilyType>(*cmdQ->peekCommandStream());
|
||||
|
||||
uint32_t semaphoresCount = 0;
|
||||
uint32_t miAtomicsCount = 0;
|
||||
for (auto &cmd : hwParser.cmdList) {
|
||||
if (auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(cmd)) {
|
||||
if (UnitTestHelper<FamilyType>::isAdditionalMiSemaphoreWait(*semaphoreCmd)) {
|
||||
@@ -519,14 +521,24 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenWriteBufferEnqueueWhenProgrammingCommand
|
||||
semaphoresCount++;
|
||||
auto dataAddress = TimestampPacketHelper::getContextEndGpuAddress(*timestampPacketNode);
|
||||
EXPECT_EQ(dataAddress, semaphoreCmd->getSemaphoreGraphicsAddress());
|
||||
EXPECT_EQ(0u, miAtomicsCount);
|
||||
|
||||
} else if (auto miAtomicCmd = genCmdCast<MI_ATOMIC *>(cmd)) {
|
||||
miAtomicsCount++;
|
||||
auto dataAddress = TimestampPacketHelper::getGpuDependenciesCountGpuAddress(*timestampPacketNode);
|
||||
EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomicCmd->getAtomicOpcode());
|
||||
EXPECT_EQ(dataAddress, UnitTestHelper<FamilyType>::getAtomicMemoryAddress(*miAtomicCmd));
|
||||
EXPECT_EQ(1u, semaphoresCount);
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(1u, semaphoresCount);
|
||||
EXPECT_EQ(1u, miAtomicsCount);
|
||||
EXPECT_EQ(initialTaskCount + 1, queueCsr->peekTaskCount());
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(BcsBufferTests, givenReadBufferEnqueueWhenProgrammingCommandStreamThenAddSemaphoreWait) {
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
|
||||
|
||||
auto cmdQ = clUniquePtr(new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr));
|
||||
|
||||
@@ -545,6 +557,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenReadBufferEnqueueWhenProgrammingCommandS
|
||||
hwParser.parseCommands<FamilyType>(*cmdQ->peekCommandStream());
|
||||
|
||||
uint32_t semaphoresCount = 0;
|
||||
uint32_t miAtomicsCount = 0;
|
||||
for (auto &cmd : hwParser.cmdList) {
|
||||
if (auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(cmd)) {
|
||||
if (UnitTestHelper<FamilyType>::isAdditionalMiSemaphoreWait(*semaphoreCmd)) {
|
||||
@@ -553,15 +566,25 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenReadBufferEnqueueWhenProgrammingCommandS
|
||||
semaphoresCount++;
|
||||
auto dataAddress = TimestampPacketHelper::getContextEndGpuAddress(*timestampPacketNode);
|
||||
EXPECT_EQ(dataAddress, semaphoreCmd->getSemaphoreGraphicsAddress());
|
||||
EXPECT_EQ(0u, miAtomicsCount);
|
||||
|
||||
} else if (auto miAtomicCmd = genCmdCast<MI_ATOMIC *>(cmd)) {
|
||||
miAtomicsCount++;
|
||||
auto dataAddress = TimestampPacketHelper::getGpuDependenciesCountGpuAddress(*timestampPacketNode);
|
||||
EXPECT_EQ(MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT, miAtomicCmd->getAtomicOpcode());
|
||||
EXPECT_EQ(dataAddress, UnitTestHelper<FamilyType>::getAtomicMemoryAddress(*miAtomicCmd));
|
||||
EXPECT_EQ(1u, semaphoresCount);
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(1u, semaphoresCount);
|
||||
EXPECT_EQ(1u, miAtomicsCount);
|
||||
EXPECT_EQ(initialTaskCount + 1, queueCsr->peekTaskCount());
|
||||
}
|
||||
|
||||
template <typename FamilyType>
|
||||
void BcsBufferTests::waitForCacheFlushFromBcsTest(MockCommandQueueHw<FamilyType> &commandQueue) {
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
|
||||
bool isCacheFlushForBcsRequired = commandQueue.isCacheFlushForBcsRequired();
|
||||
|
||||
@@ -592,6 +592,7 @@ TEST_F(PerformanceCountersMetricsLibraryTest, WhenGettingHwPerfCounterThenValidP
|
||||
ASSERT_NE(nullptr, perfCounter);
|
||||
|
||||
ASSERT_EQ(0ULL, perfCounter->tagForCpuAccess->report[0]);
|
||||
EXPECT_TRUE(perfCounter->isCompleted());
|
||||
|
||||
auto perfCounter2 = event->getHwPerfCounterNode();
|
||||
ASSERT_EQ(perfCounter, perfCounter2);
|
||||
|
||||
@@ -172,6 +172,7 @@ AllocateSharedAllocationsWithCpuAndGpuStorage = -1
|
||||
UseMaxSimdSizeToDeduceMaxWorkgroupSize = 0
|
||||
ReturnRawGpuTimestamps = 0
|
||||
ForcePerDssBackedBufferProgramming = 0
|
||||
DisableAtomicForPostSyncs = 0
|
||||
MaxHwThreadsPercent = 0
|
||||
MinHwThreadsUnoccupied = 0
|
||||
LimitBlitterMaxWidth = -1
|
||||
|
||||
@@ -34,10 +34,13 @@ struct TagAllocatorTest : public Test<MemoryAllocatorFixture> {
|
||||
assignDataToAllTimestamps(i, zeros);
|
||||
}
|
||||
setPacketsUsed(packetsUsed);
|
||||
|
||||
EXPECT_TRUE(isCompleted());
|
||||
}
|
||||
|
||||
void setToNonReadyState() {
|
||||
packets[0].contextEnd = 1;
|
||||
EXPECT_FALSE(isCompleted());
|
||||
}
|
||||
};
|
||||
|
||||
@@ -96,7 +99,6 @@ class MockTagAllocator : public TagAllocator<TagType> {
|
||||
using BaseClass::gfxAllocations;
|
||||
using BaseClass::populateFreeTags;
|
||||
using BaseClass::releaseDeferredTags;
|
||||
using BaseClass::returnTagToDeferredPool;
|
||||
using BaseClass::rootDeviceIndices;
|
||||
using BaseClass::TagAllocator;
|
||||
using BaseClass::usedTags;
|
||||
@@ -384,15 +386,15 @@ TEST_F(TagAllocatorTest, givenMultipleReferencesOnTagWhenReleasingThenReturnWhen
|
||||
EXPECT_EQ(nullptr, tagAllocator.getUsedTagsHead());
|
||||
}
|
||||
|
||||
TEST_F(TagAllocatorTest, givenNotReadyTagWhenReturnedThenMoveToFreeList) {
|
||||
TEST_F(TagAllocatorTest, givenNotReadyTagWhenReturnedThenMoveToDeferredList) {
|
||||
MockTagAllocator<MockTimestampPackets32> tagAllocator(memoryManager, 1, 1, deviceBitfield);
|
||||
auto node = static_cast<TagNode<MockTimestampPackets32> *>(tagAllocator.getTag());
|
||||
|
||||
node->tagForCpuAccess->setToNonReadyState();
|
||||
EXPECT_TRUE(tagAllocator.deferredTags.peekIsEmpty());
|
||||
tagAllocator.returnTag(node);
|
||||
EXPECT_TRUE(tagAllocator.deferredTags.peekIsEmpty());
|
||||
EXPECT_FALSE(tagAllocator.freeTags.peekIsEmpty());
|
||||
EXPECT_FALSE(tagAllocator.deferredTags.peekIsEmpty());
|
||||
EXPECT_TRUE(tagAllocator.freeTags.peekIsEmpty());
|
||||
}
|
||||
|
||||
TEST_F(TagAllocatorTest, givenTagNodeWhenCompletionCheckIsDisabledThenStatusIsMarkedAsNotReady) {
|
||||
@@ -441,13 +443,40 @@ TEST_F(TagAllocatorTest, givenEmptyFreeListWhenAskingForNewTagThenTryToReleaseDe
|
||||
MockTagAllocator<MockTimestampPackets32> tagAllocator(memoryManager, 1, 1, deviceBitfield);
|
||||
auto node = static_cast<TagNode<MockTimestampPackets32> *>(tagAllocator.getTag());
|
||||
|
||||
tagAllocator.returnTagToDeferredPool(node);
|
||||
node->tagForCpuAccess->setToNonReadyState();
|
||||
tagAllocator.returnTag(node);
|
||||
node->tagForCpuAccess->setToNonReadyState();
|
||||
EXPECT_TRUE(tagAllocator.freeTags.peekIsEmpty());
|
||||
node = static_cast<TagNode<MockTimestampPackets32> *>(tagAllocator.getTag());
|
||||
EXPECT_NE(nullptr, node);
|
||||
EXPECT_TRUE(tagAllocator.freeTags.peekIsEmpty()); // empty again - new pool wasnt allocated
|
||||
}
|
||||
|
||||
TEST_F(TagAllocatorTest, givenTagsOnDeferredListWhenReleasingItThenMoveReadyTagsToFreePool) {
|
||||
MockTagAllocator<MockTimestampPackets32> tagAllocator(memoryManager, 2, 1, deviceBitfield); // pool with 2 tags
|
||||
auto node1 = static_cast<TagNode<MockTimestampPackets32> *>(tagAllocator.getTag());
|
||||
auto node2 = static_cast<TagNode<MockTimestampPackets32> *>(tagAllocator.getTag());
|
||||
|
||||
node1->tagForCpuAccess->setToNonReadyState();
|
||||
node2->tagForCpuAccess->setToNonReadyState();
|
||||
tagAllocator.returnTag(node1);
|
||||
tagAllocator.returnTag(node2);
|
||||
|
||||
tagAllocator.releaseDeferredTags();
|
||||
EXPECT_FALSE(tagAllocator.deferredTags.peekIsEmpty());
|
||||
EXPECT_TRUE(tagAllocator.freeTags.peekIsEmpty());
|
||||
|
||||
node1->tagForCpuAccess->setTagToReadyState();
|
||||
tagAllocator.releaseDeferredTags();
|
||||
EXPECT_FALSE(tagAllocator.deferredTags.peekIsEmpty());
|
||||
EXPECT_FALSE(tagAllocator.freeTags.peekIsEmpty());
|
||||
|
||||
node2->tagForCpuAccess->setTagToReadyState();
|
||||
tagAllocator.releaseDeferredTags();
|
||||
EXPECT_TRUE(tagAllocator.deferredTags.peekIsEmpty());
|
||||
EXPECT_FALSE(tagAllocator.freeTags.peekIsEmpty());
|
||||
}
|
||||
|
||||
TEST_F(TagAllocatorTest, givenTagAllocatorWhenGraphicsAllocationIsCreatedThenSetValidllocationType) {
|
||||
MockTagAllocator<TimestampPackets<uint32_t>> timestampPacketAllocator(mockRootDeviceIndex, memoryManager, 1, 1, sizeof(TimestampPackets<uint32_t>), false, mockDeviceBitfield);
|
||||
MockTagAllocator<HwTimeStamps> hwTimeStampsAllocator(mockRootDeviceIndex, memoryManager, 1, 1, sizeof(HwTimeStamps), false, mockDeviceBitfield);
|
||||
@@ -542,6 +571,7 @@ TEST_F(TagAllocatorTest, givenNotSupportedTagTypeWhenCallingMethodThenAbortOrRet
|
||||
EXPECT_ANY_THROW(perfCounterNode.getContextStartOffset());
|
||||
EXPECT_ANY_THROW(perfCounterNode.getContextEndOffset());
|
||||
EXPECT_ANY_THROW(perfCounterNode.getGlobalEndOffset());
|
||||
EXPECT_ANY_THROW(perfCounterNode.getImplicitGpuDependenciesCountOffset());
|
||||
EXPECT_ANY_THROW(perfCounterNode.getContextStartValue(0));
|
||||
EXPECT_ANY_THROW(perfCounterNode.getGlobalStartValue(0));
|
||||
EXPECT_ANY_THROW(perfCounterNode.getContextEndValue(0));
|
||||
@@ -550,8 +580,10 @@ TEST_F(TagAllocatorTest, givenNotSupportedTagTypeWhenCallingMethodThenAbortOrRet
|
||||
EXPECT_ANY_THROW(perfCounterNode.getGlobalEndRef());
|
||||
EXPECT_ANY_THROW(perfCounterNode.setPacketsUsed(0));
|
||||
EXPECT_ANY_THROW(perfCounterNode.getPacketsUsed());
|
||||
EXPECT_EQ(0u, perfCounterNode.getImplicitGpuDependenciesCount());
|
||||
EXPECT_ANY_THROW(perfCounterNode.getSinglePacketSize());
|
||||
EXPECT_ANY_THROW(perfCounterNode.assignDataToAllTimestamps(0, nullptr));
|
||||
EXPECT_TRUE(perfCounterNode.isCompleted());
|
||||
}
|
||||
|
||||
{
|
||||
@@ -561,10 +593,13 @@ TEST_F(TagAllocatorTest, givenNotSupportedTagTypeWhenCallingMethodThenAbortOrRet
|
||||
EXPECT_ANY_THROW(hwTimestampNode.getContextStartOffset());
|
||||
EXPECT_ANY_THROW(hwTimestampNode.getContextEndOffset());
|
||||
EXPECT_ANY_THROW(hwTimestampNode.getGlobalEndOffset());
|
||||
EXPECT_ANY_THROW(hwTimestampNode.getImplicitGpuDependenciesCountOffset());
|
||||
EXPECT_ANY_THROW(hwTimestampNode.setPacketsUsed(0));
|
||||
EXPECT_ANY_THROW(hwTimestampNode.getPacketsUsed());
|
||||
EXPECT_EQ(0u, hwTimestampNode.getImplicitGpuDependenciesCount());
|
||||
EXPECT_ANY_THROW(hwTimestampNode.getSinglePacketSize());
|
||||
EXPECT_ANY_THROW(hwTimestampNode.assignDataToAllTimestamps(0, nullptr));
|
||||
EXPECT_TRUE(hwTimestampNode.isCompleted());
|
||||
EXPECT_ANY_THROW(hwTimestampNode.getQueryHandleRef());
|
||||
}
|
||||
|
||||
|
||||
@@ -316,7 +316,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||
auto &commandStreamCSR = this->getCS(getRequiredCmdStreamSizeAligned(dispatchFlags, device));
|
||||
auto commandStreamStartCSR = commandStreamCSR.getUsed();
|
||||
|
||||
TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer<GfxFamily>(commandStreamCSR, dispatchFlags.csrDependencies);
|
||||
TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer<GfxFamily>(commandStreamCSR, dispatchFlags.csrDependencies, getOsContext().getNumSupportedDevices());
|
||||
TimestampPacketHelper::programCsrDependenciesForForTaskCountContainer<GfxFamily>(commandStreamCSR, dispatchFlags.csrDependencies);
|
||||
|
||||
if (stallingPipeControlOnNextFlushRequired) {
|
||||
@@ -1011,7 +1011,7 @@ uint32_t CommandStreamReceiverHw<GfxFamily>::blitBuffer(const BlitPropertiesCont
|
||||
programEnginePrologue(commandStream);
|
||||
|
||||
for (auto &blitProperties : blitPropertiesContainer) {
|
||||
TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer<GfxFamily>(commandStream, blitProperties.csrDependencies);
|
||||
TimestampPacketHelper::programCsrDependenciesForTimestampPacketContainer<GfxFamily>(commandStream, blitProperties.csrDependencies, getOsContext().getNumSupportedDevices());
|
||||
TimestampPacketHelper::programCsrDependenciesForForTaskCountContainer<GfxFamily>(commandStream, blitProperties.csrDependencies);
|
||||
|
||||
if (blitProperties.outputTimestampPacket && profilingEnabled) {
|
||||
|
||||
@@ -265,6 +265,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, ForceHostPointerImport, -1, "-1: default, 0: dis
|
||||
DECLARE_DEBUG_VARIABLE(bool, UseMaxSimdSizeToDeduceMaxWorkgroupSize, false, "With this flag on, max workgroup size is deduced using SIMD32 instead of SIMD8, this causes the max wkg size to be 4 times bigger")
|
||||
DECLARE_DEBUG_VARIABLE(bool, ReturnRawGpuTimestamps, false, "Driver returns raw GPU tiemstamps instead of calculated ones.")
|
||||
DECLARE_DEBUG_VARIABLE(bool, ForcePerDssBackedBufferProgramming, false, "Always program per-DSS memory backed buffer in preamble")
|
||||
DECLARE_DEBUG_VARIABLE(bool, DisableAtomicForPostSyncs, false, "When enabled, post syncs are not tracked with atomics")
|
||||
DECLARE_DEBUG_VARIABLE(bool, UseCommandBufferHeaderSizeForWddmQueueSubmission, true, "0: Page size (4096), 1: sizeof(COMMAND_BUFFER_HEADER)")
|
||||
DECLARE_DEBUG_VARIABLE(bool, DisableDeepBind, false, "Disable passing RTLD_DEEPBIND flag to all dlopen calls.")
|
||||
DECLARE_DEBUG_VARIABLE(bool, UseUmKmDataTranslator, false, "Use helper library for UMD<->KMD (WDDM) struct layout compatibility")
|
||||
|
||||
@@ -102,6 +102,7 @@ set(NEO_CORE_HELPERS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/timestamp_offsets.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/timestamp_packet.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/timestamp_packet.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}/timestamp_packet_extra.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/uint16_avx2.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/uint16_sse4.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/vec.h
|
||||
|
||||
@@ -31,7 +31,7 @@ void TimestampPacketContainer::resolveDependencies(bool clearAllDependencies) {
|
||||
std::vector<TagNodeBase *> pendingNodes;
|
||||
|
||||
for (auto node : timestampPacketNodes) {
|
||||
if (clearAllDependencies) {
|
||||
if (node->canBeReleased() || clearAllDependencies) {
|
||||
node->returnTag();
|
||||
} else {
|
||||
pendingNodes.push_back(node);
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
|
||||
#include "pipe_control_args.h"
|
||||
|
||||
#include <atomic>
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
@@ -48,6 +49,20 @@ class TimestampPackets : public TagTypeBase {
|
||||
|
||||
static constexpr size_t getSinglePacketSize() { return sizeof(Packet); }
|
||||
|
||||
bool isCompleted() const {
|
||||
if (DebugManager.flags.DisableAtomicForPostSyncs.get()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < packetsUsed; i++) {
|
||||
if (packets[i].contextEnd == 1) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void initialize() {
|
||||
for (auto &packet : packets) {
|
||||
packet.contextStart = 1u;
|
||||
@@ -56,6 +71,7 @@ class TimestampPackets : public TagTypeBase {
|
||||
packet.globalEnd = 1u;
|
||||
}
|
||||
packetsUsed = 1;
|
||||
implicitGpuDependenciesCount = 0;
|
||||
}
|
||||
|
||||
void assignDataToAllTimestamps(uint32_t packetIndex, void *source) {
|
||||
@@ -66,6 +82,7 @@ class TimestampPackets : public TagTypeBase {
|
||||
static constexpr size_t getContextStartOffset() { return offsetof(Packet, contextStart); }
|
||||
static constexpr size_t getContextEndOffset() { return offsetof(Packet, contextEnd); }
|
||||
static constexpr size_t getGlobalEndOffset() { return offsetof(Packet, globalEnd); }
|
||||
size_t getImplicitGpuDependenciesCountOffset() const { return ptrDiff(&implicitGpuDependenciesCount, this); }
|
||||
|
||||
uint64_t getContextStartValue(uint32_t packetIndex) const { return static_cast<uint64_t>(packets[packetIndex].contextStart); }
|
||||
uint64_t getGlobalStartValue(uint32_t packetIndex) const { return static_cast<uint64_t>(packets[packetIndex].globalStart); }
|
||||
@@ -75,13 +92,16 @@ class TimestampPackets : public TagTypeBase {
|
||||
void setPacketsUsed(uint32_t used) { packetsUsed = used; }
|
||||
uint32_t getPacketsUsed() const { return packetsUsed; }
|
||||
|
||||
uint32_t getImplicitGpuDependenciesCount() const { return implicitGpuDependenciesCount; }
|
||||
|
||||
protected:
|
||||
Packet packets[TimestampPacketSizeControl::preferredPacketCount];
|
||||
uint32_t implicitGpuDependenciesCount = 0;
|
||||
uint32_t packetsUsed = 1;
|
||||
};
|
||||
#pragma pack()
|
||||
|
||||
static_assert(((4 * TimestampPacketSizeControl::preferredPacketCount + 1) * sizeof(uint32_t)) == sizeof(TimestampPackets<uint32_t>),
|
||||
static_assert(((4 * TimestampPacketSizeControl::preferredPacketCount + 2) * sizeof(uint32_t)) == sizeof(TimestampPackets<uint32_t>),
|
||||
"This structure is consumed by GPU and has to follow specific restrictions for padding and size");
|
||||
|
||||
class TimestampPacketContainer : public NonCopyableClass {
|
||||
@@ -124,24 +144,49 @@ struct TimestampPacketHelper {
|
||||
return timestampPacketNode.getGpuAddress() + timestampPacketNode.getGlobalStartOffset();
|
||||
}
|
||||
|
||||
static uint64_t getGpuDependenciesCountGpuAddress(const TagNodeBase ×tampPacketNode) {
|
||||
return timestampPacketNode.getGpuAddress() + timestampPacketNode.getImplicitGpuDependenciesCountOffset();
|
||||
}
|
||||
|
||||
static void overrideSupportedDevicesCount(uint32_t &numSupportedDevices);
|
||||
|
||||
template <typename GfxFamily>
|
||||
static void programSemaphore(LinearStream &cmdStream, TagNodeBase ×tampPacketNode) {
|
||||
static void programSemaphoreWithImplicitDependency(LinearStream &cmdStream, TagNodeBase ×tampPacketNode, uint32_t numSupportedDevices) {
|
||||
using MI_ATOMIC = typename GfxFamily::MI_ATOMIC;
|
||||
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
|
||||
using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT;
|
||||
|
||||
auto compareAddress = getContextEndGpuAddress(timestampPacketNode);
|
||||
auto dependenciesCountAddress = getGpuDependenciesCountGpuAddress(timestampPacketNode);
|
||||
|
||||
for (uint32_t packetId = 0; packetId < timestampPacketNode.getPacketsUsed(); packetId++) {
|
||||
uint64_t compareOffset = packetId * timestampPacketNode.getSinglePacketSize();
|
||||
EncodeSempahore<GfxFamily>::addMiSemaphoreWaitCommand(cmdStream, compareAddress + compareOffset, 1, COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD);
|
||||
}
|
||||
|
||||
bool trackPostSyncDependencies = true;
|
||||
if (DebugManager.flags.DisableAtomicForPostSyncs.get()) {
|
||||
trackPostSyncDependencies = false;
|
||||
}
|
||||
|
||||
if (trackPostSyncDependencies) {
|
||||
overrideSupportedDevicesCount(numSupportedDevices);
|
||||
|
||||
for (uint32_t i = 0; i < numSupportedDevices; i++) {
|
||||
timestampPacketNode.incImplicitCpuDependenciesCount();
|
||||
}
|
||||
EncodeAtomic<GfxFamily>::programMiAtomic(cmdStream, dependenciesCountAddress,
|
||||
MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT,
|
||||
MI_ATOMIC::DATA_SIZE::DATA_SIZE_DWORD,
|
||||
0u, 0u, 0x0u, 0x0u);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
static void programCsrDependenciesForTimestampPacketContainer(LinearStream &cmdStream, const CsrDependencies &csrDependencies) {
|
||||
static void programCsrDependenciesForTimestampPacketContainer(LinearStream &cmdStream, const CsrDependencies &csrDependencies, uint32_t numSupportedDevices) {
|
||||
for (auto timestampPacketContainer : csrDependencies.timestampPacketContainer) {
|
||||
for (auto &node : timestampPacketContainer->peekNodes()) {
|
||||
TimestampPacketHelper::programSemaphore<GfxFamily>(cmdStream, *node);
|
||||
TimestampPacketHelper::programSemaphoreWithImplicitDependency<GfxFamily>(cmdStream, *node, numSupportedDevices);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -162,9 +207,9 @@ struct TimestampPacketHelper {
|
||||
}
|
||||
|
||||
template <typename GfxFamily, AuxTranslationDirection auxTranslationDirection>
|
||||
static void programSemaphoreForAuxTranslation(LinearStream &cmdStream,
|
||||
static void programSemaphoreWithImplicitDependencyForAuxTranslation(LinearStream &cmdStream,
|
||||
const TimestampPacketDependencies *timestampPacketDependencies,
|
||||
const HardwareInfo &hwInfo) {
|
||||
const HardwareInfo &hwInfo, uint32_t numSupportedDevices) {
|
||||
auto &container = (auxTranslationDirection == AuxTranslationDirection::AuxToNonAux)
|
||||
? timestampPacketDependencies->auxToNonAuxNodes
|
||||
: timestampPacketDependencies->nonAuxToAuxNodes;
|
||||
@@ -181,7 +226,7 @@ struct TimestampPacketHelper {
|
||||
}
|
||||
|
||||
for (auto &node : container.peekNodes()) {
|
||||
TimestampPacketHelper::programSemaphore<GfxFamily>(cmdStream, *node);
|
||||
TimestampPacketHelper::programSemaphoreWithImplicitDependency<GfxFamily>(cmdStream, *node, numSupportedDevices);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -198,12 +243,14 @@ struct TimestampPacketHelper {
|
||||
|
||||
template <typename GfxFamily>
|
||||
static size_t getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue() {
|
||||
return sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT);
|
||||
return sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT) + sizeof(typename GfxFamily::MI_ATOMIC);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
static size_t getRequiredCmdStreamSizeForNodeDependency(TagNodeBase ×tampPacketNode) {
|
||||
return (timestampPacketNode.getPacketsUsed() * sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT));
|
||||
size_t totalMiSemaphoreWaitSize = timestampPacketNode.getPacketsUsed() * sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT);
|
||||
|
||||
return totalMiSemaphoreWaitSize + sizeof(typename GfxFamily::MI_ATOMIC);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
||||
13
shared/source/helpers/timestamp_packet_extra.cpp
Normal file
13
shared/source/helpers/timestamp_packet_extra.cpp
Normal file
@@ -0,0 +1,13 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2021 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/helpers/timestamp_packet.h"
|
||||
|
||||
namespace NEO {
|
||||
void TimestampPacketHelper::overrideSupportedDevicesCount(uint32_t &numSupportedDevices) {
|
||||
}
|
||||
} // namespace NEO
|
||||
@@ -40,7 +40,9 @@ void TagNodeBase::returnTag() {
|
||||
}
|
||||
|
||||
bool TagNodeBase::canBeReleased() const {
|
||||
return !doNotReleaseNodes;
|
||||
return (!doNotReleaseNodes) &&
|
||||
(isCompleted()) &&
|
||||
(getImplicitGpuDependenciesCount() == getImplicitCpuDependenciesCount());
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -54,15 +54,21 @@ class TagNodeBase : public NonCopyableOrMovableClass {
|
||||
|
||||
bool isProfilingCapable() const { return profilingCapable; }
|
||||
|
||||
void incImplicitCpuDependenciesCount() { implicitCpuDependenciesCount++; }
|
||||
|
||||
uint32_t getImplicitCpuDependenciesCount() const { return implicitCpuDependenciesCount.load(); }
|
||||
|
||||
const TagAllocatorBase *getAllocator() const { return allocator; }
|
||||
|
||||
// TagType specific calls
|
||||
virtual bool isCompleted() const = 0;
|
||||
virtual void assignDataToAllTimestamps(uint32_t packetIndex, void *source) = 0;
|
||||
|
||||
virtual size_t getGlobalStartOffset() const = 0;
|
||||
virtual size_t getContextStartOffset() const = 0;
|
||||
virtual size_t getContextEndOffset() const = 0;
|
||||
virtual size_t getGlobalEndOffset() const = 0;
|
||||
virtual size_t getImplicitGpuDependenciesCountOffset() const = 0;
|
||||
|
||||
virtual uint64_t getContextStartValue(uint32_t packetIndex) const = 0;
|
||||
virtual uint64_t getGlobalStartValue(uint32_t packetIndex) const = 0;
|
||||
@@ -77,6 +83,8 @@ class TagNodeBase : public NonCopyableOrMovableClass {
|
||||
|
||||
virtual size_t getSinglePacketSize() const = 0;
|
||||
|
||||
virtual uint32_t getImplicitGpuDependenciesCount() const = 0;
|
||||
|
||||
virtual MetricsLibraryApi::QueryHandle_1_0 &getQueryHandleRef() const = 0;
|
||||
|
||||
protected:
|
||||
@@ -87,6 +95,7 @@ class TagNodeBase : public NonCopyableOrMovableClass {
|
||||
MultiGraphicsAllocation *gfxAllocation = nullptr;
|
||||
uint64_t gpuAddress = 0;
|
||||
std::atomic<uint32_t> refCount{0};
|
||||
std::atomic<uint32_t> implicitCpuDependenciesCount{0};
|
||||
bool doNotReleaseNodes = false;
|
||||
bool profilingCapable = true;
|
||||
|
||||
@@ -104,6 +113,7 @@ class TagNode : public TagNodeBase, public IDNode<TagNode<TagType>> {
|
||||
|
||||
void initialize() override {
|
||||
tagForCpuAccess->initialize();
|
||||
implicitCpuDependenciesCount.store(0);
|
||||
setProfilingCapable(true);
|
||||
}
|
||||
|
||||
@@ -111,10 +121,13 @@ class TagNode : public TagNodeBase, public IDNode<TagNode<TagType>> {
|
||||
|
||||
void assignDataToAllTimestamps(uint32_t packetIndex, void *source) override;
|
||||
|
||||
bool isCompleted() const override;
|
||||
|
||||
size_t getGlobalStartOffset() const override;
|
||||
size_t getContextStartOffset() const override;
|
||||
size_t getContextEndOffset() const override;
|
||||
size_t getGlobalEndOffset() const override;
|
||||
size_t getImplicitGpuDependenciesCountOffset() const override;
|
||||
|
||||
uint64_t getContextStartValue(uint32_t packetIndex) const override;
|
||||
uint64_t getGlobalStartValue(uint32_t packetIndex) const override;
|
||||
@@ -129,6 +142,8 @@ class TagNode : public TagNodeBase, public IDNode<TagNode<TagType>> {
|
||||
|
||||
size_t getSinglePacketSize() const override;
|
||||
|
||||
uint32_t getImplicitGpuDependenciesCount() const override;
|
||||
|
||||
MetricsLibraryApi::QueryHandle_1_0 &getQueryHandleRef() const override;
|
||||
};
|
||||
|
||||
|
||||
@@ -164,6 +164,15 @@ size_t TagNode<TagType>::getGlobalEndOffset() const {
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TagType>
|
||||
size_t TagNode<TagType>::getImplicitGpuDependenciesCountOffset() const {
|
||||
if constexpr (TagType::getTagNodeType() == TagNodeType::TimestampPacket) {
|
||||
return tagForCpuAccess->getImplicitGpuDependenciesCountOffset();
|
||||
} else {
|
||||
UNRECOVERABLE_IF(true);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TagType>
|
||||
uint64_t TagNode<TagType>::getContextStartValue(uint32_t packetIndex) const {
|
||||
if constexpr (TagType::getTagNodeType() != TagNodeType::HwPerfCounter) {
|
||||
@@ -241,6 +250,15 @@ uint32_t TagNode<TagType>::getPacketsUsed() const {
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TagType>
|
||||
uint32_t TagNode<TagType>::getImplicitGpuDependenciesCount() const {
|
||||
if constexpr (TagType::getTagNodeType() == TagNodeType::TimestampPacket) {
|
||||
return tagForCpuAccess->getImplicitGpuDependenciesCount();
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TagType>
|
||||
size_t TagNode<TagType>::getSinglePacketSize() const {
|
||||
if constexpr (TagType::getTagNodeType() == TagNodeType::TimestampPacket) {
|
||||
@@ -261,6 +279,15 @@ void TagNode<TagType>::assignDataToAllTimestamps(uint32_t packetIndex, void *sou
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TagType>
|
||||
bool TagNode<TagType>::isCompleted() const {
|
||||
if constexpr (TagType::getTagNodeType() == TagNodeType::TimestampPacket) {
|
||||
return tagForCpuAccess->isCompleted();
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename TagType>
|
||||
MetricsLibraryApi::QueryHandle_1_0 &TagNode<TagType>::getQueryHandleRef() const {
|
||||
if constexpr (TagType::getTagNodeType() == TagNodeType::HwPerfCounter) {
|
||||
|
||||
Reference in New Issue
Block a user