performance(ocl): timestamp packet count per gfx

Add support for different timestamp packet counts per gfx family.
Change all packet counts to 1 except for xe-hpc.

Related-To: NEO-8154

Signed-off-by: Dominik Dabek <dominik.dabek@intel.com>
This commit is contained in:
Dominik Dabek
2023-09-25 07:45:36 +00:00
committed by Compute-Runtime-Automation
parent ebc024f0f1
commit eebf2bbd26
26 changed files with 114 additions and 102 deletions

View File

@@ -203,7 +203,7 @@ HWTEST_F(CommandStreamReceiverTest, whenRegisterClientThenIncrementClientNum) {
}
HWTEST_F(CommandStreamReceiverTest, WhenCreatingCsrThenTimestampTypeIs32b) {
using ExpectedType = TimestampPackets<typename FamilyType::TimestampPacketType>;
using ExpectedType = TimestampPackets<typename FamilyType::TimestampPacketType, FamilyType::timestampPacketCount>;
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();

View File

@@ -39,8 +39,8 @@ void setTagToReadyState(TagNodeBase *tagNode) {
} // namespace
struct TimestampPacketTests : public ::testing::Test {
struct MockTagNode : public TagNode<TimestampPackets<uint32_t>> {
using TagNode<TimestampPackets<uint32_t>>::gpuAddress;
struct MockTagNode : public TagNode<TimestampPackets<uint32_t, TimestampPacketConstants::preferredPacketCount>> {
using TagNode<TimestampPackets<uint32_t, TimestampPacketConstants::preferredPacketCount>>::gpuAddress;
};
template <typename MI_SEMAPHORE_WAIT>
@@ -49,7 +49,7 @@ struct TimestampPacketTests : public ::testing::Test {
EXPECT_EQ(semaphoreCmd->getCompareOperation(), MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD);
EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword());
uint64_t compareOffset = packetId * TimestampPackets<uint32_t>::getSinglePacketSize();
uint64_t compareOffset = packetId * TimestampPackets<uint32_t, TimestampPacketConstants::preferredPacketCount>::getSinglePacketSize();
auto dataAddress = TimestampPacketHelper::getContextEndGpuAddress(*timestampPacketNode) + compareOffset;
EXPECT_EQ(dataAddress, semaphoreCmd->getSemaphoreGraphicsAddress());
@@ -59,7 +59,7 @@ struct TimestampPacketTests : public ::testing::Test {
HWTEST_F(TimestampPacketTests, givenTagNodeWhenSemaphoreIsProgrammedThenUseGpuAddress) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
TimestampPackets<uint32_t> tag;
TimestampPackets<uint32_t, TimestampPacketConstants::preferredPacketCount> tag;
MockTagNode mockNode;
mockNode.tagForCpuAccess = &tag;
mockNode.gpuAddress = 0x1230000;
@@ -77,8 +77,7 @@ HWTEST_F(TimestampPacketTests, givenTagNodeWhenSemaphoreIsProgrammedThenUseGpuAd
HWTEST_F(TimestampPacketTests, givenTagNodeWithPacketsUsed2WhenSemaphoreIsProgrammedThenUseGpuAddress) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
TimestampPackets<uint32_t> tag;
TimestampPackets<uint32_t, TimestampPacketConstants::preferredPacketCount> tag;
MockTagNode mockNode;
mockNode.tagForCpuAccess = &tag;
mockNode.gpuAddress = 0x1230000;
@@ -98,7 +97,7 @@ HWTEST_F(TimestampPacketTests, givenTagNodeWithPacketsUsed2WhenSemaphoreIsProgra
}
TEST_F(TimestampPacketTests, givenTagNodeWhatAskingForGpuAddressesThenReturnCorrectValue) {
TimestampPackets<uint32_t> tag;
TimestampPackets<uint32_t, TimestampPacketConstants::preferredPacketCount> tag;
MockTagNode mockNode;
mockNode.tagForCpuAccess = &tag;
mockNode.gpuAddress = 0x1230000;
@@ -113,11 +112,11 @@ TEST_F(TimestampPacketTests, givenTimestampPacketContainerWhenMovedThenMoveAllNo
EXPECT_FALSE(std::is_copy_assignable<TimestampPacketContainer>::value);
EXPECT_FALSE(std::is_copy_constructible<TimestampPacketContainer>::value);
struct MockTagNode : public TagNode<TimestampPackets<uint32_t>> {
struct MockTagNode : public TagNode<TimestampPackets<uint32_t, TimestampPacketConstants::preferredPacketCount>> {
void returnTag() override {
returnCalls++;
}
using TagNode<TimestampPackets<uint32_t>>::refCount;
using TagNode<TimestampPackets<uint32_t, TimestampPacketConstants::preferredPacketCount>>::refCount;
uint32_t returnCalls = 0;
};
@@ -143,7 +142,7 @@ TEST_F(TimestampPacketTests, givenTimestampPacketContainerWhenMovedThenMoveAllNo
}
TEST_F(TimestampPacketTests, givenTagNodesWhenReleaseIsCalledThenReturnAllTagsToPool) {
struct MockTagNode : public TagNode<TimestampPackets<uint32_t>> {
struct MockTagNode : public TagNode<TimestampPackets<uint32_t, TimestampPacketConstants::preferredPacketCount>> {
void returnTag() override {
returnCalls++;
}
@@ -213,7 +212,7 @@ TEST_F(TimestampPacketTests, whenObjectIsCreatedThenInitializeAllStamps) {
}
HWTEST_F(TimestampPacketTests, whenEstimatingSizeForNodeDependencyThenReturnCorrectValue) {
TimestampPackets<uint32_t> tag;
TimestampPackets<uint32_t, TimestampPacketConstants::preferredPacketCount> tag;
MockTagNode mockNode;
mockNode.tagForCpuAccess = &tag;
mockNode.gpuAddress = 0x1230000;
@@ -331,6 +330,11 @@ HWTEST_F(DeviceTimestampPacketTests, givenDebugFlagSetWhenCreatingTimestampPacke
EXPECT_FALSE(tag->canBeReleased());
}
HWTEST_F(DeviceTimestampPacketTests, givenTimestampPacketTypeAndSizeWhenCheckingSizeOfTimestampPacketsThenItIsCorrect) {
EXPECT_EQ((4 * FamilyType::timestampPacketCount) * sizeof(typename FamilyType::TimestampPacketType),
sizeof(TimestampPackets<typename FamilyType::TimestampPacketType, FamilyType::timestampPacketCount>));
}
using TimestampPacketHelperTests = Test<DeviceFixture>;
HWTEST_F(TimestampPacketHelperTests, givenTagNodesInMultiRootSyncContainerWhenProgramingDependensiecThenSemaforesAreProgrammed) {
@@ -361,4 +365,4 @@ HWTEST_F(TimestampPacketHelperTests, givenEmptyMultiRootSyncContainerWhenProgram
CsrDependencies deps;
TimestampPacketHelper::programCsrDependenciesForForMultiRootDeviceSyncContainer<FamilyType>(cmdStream, deps);
EXPECT_EQ(cmdStream.getUsed(), 0u);
}
}

View File

@@ -222,7 +222,7 @@ TEST_F(TagAllocatorTest, givenInputTagCountWhenCreatingAllocatorThenRequestedNum
public:
using MockMemoryManager::MockMemoryManager;
GraphicsAllocation *allocateGraphicsMemoryWithAlignment(const AllocationData &allocationData) override {
return new MemoryAllocation(0, TimestampPackets<uint32_t>::getAllocationType(), nullptr, nullptr, 0, MemoryConstants::pageSize,
return new MemoryAllocation(0, TimestampPackets<uint32_t, TimestampPacketConstants::preferredPacketCount>::getAllocationType(), nullptr, nullptr, 0, MemoryConstants::pageSize,
1, MemoryPool::System4KBPages, false, false, MemoryManager::maxOsContextCount);
}
};
@@ -230,7 +230,7 @@ TEST_F(TagAllocatorTest, givenInputTagCountWhenCreatingAllocatorThenRequestedNum
auto mockMemoryManager = std::make_unique<MyMockMemoryManager>(true, true, *executionEnvironment);
const size_t tagsCount = 3;
MockTagAllocator<TimestampPackets<uint32_t>> tagAllocator(mockMemoryManager.get(), tagsCount, 1, deviceBitfield);
MockTagAllocator<TimestampPackets<uint32_t, TimestampPacketConstants::preferredPacketCount>> tagAllocator(mockMemoryManager.get(), tagsCount, 1, deviceBitfield);
size_t nodesFound = 0;
auto head = tagAllocator.freeTags.peekHead();
@@ -440,7 +440,7 @@ TEST_F(TagAllocatorTest, givenEmptyFreeListWhenAskingForNewTagThenTryToReleaseDe
}
TEST_F(TagAllocatorTest, givenTagAllocatorWhenGraphicsAllocationIsCreatedThenSetValidllocationType) {
MockTagAllocator<TimestampPackets<uint32_t>> timestampPacketAllocator(mockRootDeviceIndex, memoryManager, 1, 1, sizeof(TimestampPackets<uint32_t>), false, mockDeviceBitfield);
MockTagAllocator<TimestampPackets<uint32_t, TimestampPacketConstants::preferredPacketCount>> timestampPacketAllocator(mockRootDeviceIndex, memoryManager, 1, 1, sizeof(TimestampPackets<uint32_t, TimestampPacketConstants::preferredPacketCount>), false, mockDeviceBitfield);
MockTagAllocator<HwTimeStamps> hwTimeStampsAllocator(mockRootDeviceIndex, memoryManager, 1, 1, sizeof(HwTimeStamps), false, mockDeviceBitfield);
MockTagAllocator<HwPerfCounter> hwPerfCounterAllocator(mockRootDeviceIndex, memoryManager, 1, 1, sizeof(HwPerfCounter), false, mockDeviceBitfield);
@@ -468,7 +468,7 @@ TEST_F(TagAllocatorTest, givenMultipleRootDevicesWhenPopulatingTagsThenCreateMul
const RootDeviceIndicesContainer indices = {0, 2, maxRootDeviceIndex};
MockTagAllocator<TimestampPackets<uint32_t>> timestampPacketAllocator(indices, testMemoryManager, 1, 1, sizeof(TimestampPackets<uint32_t>), false, mockDeviceBitfield);
MockTagAllocator<TimestampPackets<uint32_t, TimestampPacketConstants::preferredPacketCount>> timestampPacketAllocator(indices, testMemoryManager, 1, 1, sizeof(TimestampPackets<uint32_t, TimestampPacketConstants::preferredPacketCount>), false, mockDeviceBitfield);
EXPECT_EQ(1u, timestampPacketAllocator.getGraphicsAllocationsCount());
@@ -498,7 +498,7 @@ HWTEST_F(TagAllocatorTest, givenMultipleRootDevicesWhenCallingMakeResidentThenUs
const RootDeviceIndicesContainer indicesVector = {0, 1};
MockTagAllocator<TimestampPackets<uint32_t>> timestampPacketAllocator(indicesVector, testMemoryManager, 1, 1, sizeof(TimestampPackets<uint32_t>), false, mockDeviceBitfield);
MockTagAllocator<TimestampPackets<uint32_t, FamilyType::timestampPacketCount>> timestampPacketAllocator(indicesVector, testMemoryManager, 1, 1, sizeof(TimestampPackets<uint32_t, FamilyType::timestampPacketCount>), false, mockDeviceBitfield);
EXPECT_EQ(1u, timestampPacketAllocator.getGraphicsAllocationsCount());
@@ -557,7 +557,7 @@ TEST_F(TagAllocatorTest, givenNotSupportedTagTypeWhenCallingMethodThenAbortOrRet
}
{
TagNode<TimestampPackets<uint32_t>> timestampPacketsNode = {};
TagNode<TimestampPackets<uint32_t, TimestampPacketConstants::preferredPacketCount>> timestampPacketsNode = {};
EXPECT_ANY_THROW(timestampPacketsNode.getContextCompleteRef());
EXPECT_ANY_THROW(timestampPacketsNode.getGlobalEndRef());