feature: dont initialize in-order TS nodes

Related-To: NEO-11925

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski
2024-07-22 15:46:14 +00:00
committed by Compute-Runtime-Automation
parent 8f4472a26c
commit c3312f21f7
9 changed files with 85 additions and 20 deletions

View File

@@ -934,7 +934,7 @@ TagAllocatorBase *CommandStreamReceiver::getEventTsAllocator() {
if (profilingTimeStampAllocator.get() == nullptr) {
RootDeviceIndicesContainer rootDeviceIndices = {rootDeviceIndex};
profilingTimeStampAllocator = std::make_unique<TagAllocator<HwTimeStamps>>(rootDeviceIndices, getMemoryManager(), getPreferredTagPoolSize(), MemoryConstants::cacheLineSize,
sizeof(HwTimeStamps), false, osContext->getDeviceBitfield());
sizeof(HwTimeStamps), false, true, osContext->getDeviceBitfield());
}
return profilingTimeStampAllocator.get();
}
@@ -943,7 +943,7 @@ TagAllocatorBase *CommandStreamReceiver::getEventPerfCountAllocator(const uint32
if (perfCounterAllocator.get() == nullptr) {
RootDeviceIndicesContainer rootDeviceIndices = {rootDeviceIndex};
perfCounterAllocator = std::make_unique<TagAllocator<HwPerfCounter>>(
rootDeviceIndices, getMemoryManager(), getPreferredTagPoolSize(), MemoryConstants::cacheLineSize, tagSize, false, osContext->getDeviceBitfield());
rootDeviceIndices, getMemoryManager(), getPreferredTagPoolSize(), MemoryConstants::cacheLineSize, tagSize, false, true, osContext->getDeviceBitfield());
}
return perfCounterAllocator.get();
}

View File

@@ -457,10 +457,10 @@ std::unique_ptr<TagAllocatorBase> GfxCoreHelperHw<GfxFamily>::createTimestampPac
if (debugManager.flags.OverrideTimestampPacketSize.get() != -1) {
if (debugManager.flags.OverrideTimestampPacketSize.get() == 4) {
using TimestampPackets32T = TimestampPackets<uint32_t, GfxFamily::timestampPacketCount>;
return std::make_unique<TagAllocator<TimestampPackets32T>>(rootDeviceIndices, memoryManager, initialTagCount, tagAlignment, sizeof(TimestampPackets32T), doNotReleaseNodes, deviceBitfield);
return std::make_unique<TagAllocator<TimestampPackets32T>>(rootDeviceIndices, memoryManager, initialTagCount, tagAlignment, sizeof(TimestampPackets32T), doNotReleaseNodes, true, deviceBitfield);
} else if (debugManager.flags.OverrideTimestampPacketSize.get() == 8) {
using TimestampPackets64T = TimestampPackets<uint64_t, GfxFamily::timestampPacketCount>;
return std::make_unique<TagAllocator<TimestampPackets64T>>(rootDeviceIndices, memoryManager, initialTagCount, tagAlignment, sizeof(TimestampPackets64T), doNotReleaseNodes, deviceBitfield);
return std::make_unique<TagAllocator<TimestampPackets64T>>(rootDeviceIndices, memoryManager, initialTagCount, tagAlignment, sizeof(TimestampPackets64T), doNotReleaseNodes, true, deviceBitfield);
} else {
UNRECOVERABLE_IF(true);
}
@@ -469,7 +469,7 @@ std::unique_ptr<TagAllocatorBase> GfxCoreHelperHw<GfxFamily>::createTimestampPac
using TimestampPacketType = typename GfxFamily::TimestampPacketType;
using TimestampPacketsT = TimestampPackets<TimestampPacketType, GfxFamily::timestampPacketCount>;
return std::make_unique<TagAllocator<TimestampPacketsT>>(rootDeviceIndices, memoryManager, initialTagCount, tagAlignment, sizeof(TimestampPacketsT), doNotReleaseNodes, deviceBitfield);
return std::make_unique<TagAllocator<TimestampPacketsT>>(rootDeviceIndices, memoryManager, initialTagCount, tagAlignment, sizeof(TimestampPacketsT), doNotReleaseNodes, true, deviceBitfield);
}
template <typename GfxFamily>

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2023 Intel Corporation
* Copyright (C) 2018-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -177,8 +177,7 @@ class TagAllocator : public TagAllocatorBase {
using NodeType = TagNode<TagType>;
TagAllocator(const RootDeviceIndicesContainer &rootDeviceIndices, MemoryManager *memMngr, size_t tagCount,
size_t tagAlignment, size_t tagSize, bool doNotReleaseNodes,
DeviceBitfield deviceBitfield);
size_t tagAlignment, size_t tagSize, bool doNotReleaseNodes, bool initializeTags, DeviceBitfield deviceBitfield);
TagNodeBase *getTag() override;
@@ -200,6 +199,8 @@ class TagAllocator : public TagAllocatorBase {
IDList<NodeType> deferredTags;
std::vector<std::unique_ptr<NodeType[]>> tagPoolMemory;
bool initializeTags = true;
};
} // namespace NEO

View File

@@ -13,8 +13,8 @@
namespace NEO {
template <typename TagType>
TagAllocator<TagType>::TagAllocator(const RootDeviceIndicesContainer &rootDeviceIndices, MemoryManager *memMngr, size_t tagCount, size_t tagAlignment,
size_t tagSize, bool doNotReleaseNodes, DeviceBitfield deviceBitfield)
: TagAllocatorBase(rootDeviceIndices, memMngr, tagCount, tagAlignment, tagSize, doNotReleaseNodes, deviceBitfield) {
size_t tagSize, bool doNotReleaseNodes, bool initializeTags, DeviceBitfield deviceBitfield)
: TagAllocatorBase(rootDeviceIndices, memMngr, tagCount, tagAlignment, tagSize, doNotReleaseNodes, deviceBitfield), initializeTags(initializeTags) {
populateFreeTags();
}
@@ -32,10 +32,13 @@ TagNodeBase *TagAllocator<TagType>::getTag() {
}
usedTags.pushFrontOne(*node);
node->incRefCount();
node->initialize();
if (initializeTags) {
node->initialize();
}
if (debugManager.flags.PrintTimestampPacketUsage.get() == 1) {
printf("\nPID: %u, TSP taken from pool and initialized: 0x%" PRIX64, SysCalls::getProcessId(), node->getGpuAddress());
printf("\nPID: %u, TSP taken from pool and initialized(%d): 0x%" PRIX64, SysCalls::getProcessId(), initializeTags, node->getGpuAddress());
}
return node;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2019-2023 Intel Corporation
* Copyright (C) 2019-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -21,7 +21,7 @@ class MockTagAllocator : public TagAllocator<TagType> {
MockTagAllocator(uint32_t rootDeviceIndex, MemoryManager *memoryManager, size_t tagCount,
size_t tagAlignment, size_t tagSize, bool doNotReleaseNodes, DeviceBitfield deviceBitfield)
: BaseClass(RootDeviceIndicesContainer({rootDeviceIndex}), memoryManager, tagCount, tagAlignment, tagSize, doNotReleaseNodes, deviceBitfield) {
: BaseClass(RootDeviceIndicesContainer({rootDeviceIndex}), memoryManager, tagCount, tagAlignment, tagSize, doNotReleaseNodes, true, deviceBitfield) {
}
MockTagAllocator(uint32_t rootDeviceIndex, MemoryManager *memoryManager, size_t tagCount = 10)
@@ -29,7 +29,7 @@ class MockTagAllocator : public TagAllocator<TagType> {
}
MockTagAllocator(const RootDeviceIndicesContainer &rootDeviceIndices, MemoryManager *memoryManager, size_t tagCount = 10)
: BaseClass(rootDeviceIndices, memoryManager, tagCount, MemoryConstants::cacheLineSize, sizeof(TagType), false, mockDeviceBitfield) {}
: BaseClass(rootDeviceIndices, memoryManager, tagCount, MemoryConstants::cacheLineSize, sizeof(TagType), false, true, mockDeviceBitfield) {}
void returnTag(TagNodeBase *node) override {
releaseReferenceNodes.push_back(static_cast<NodeType *>(node));

View File

@@ -37,6 +37,7 @@ struct TagAllocatorTest : public Test<MemoryAllocatorFixture> {
struct TimeStamps {
void initialize() {
initializeCount++;
start = 1;
end = 2;
}
@@ -69,6 +70,8 @@ struct TimeStamps {
uint64_t contextCompleteTS;
uint64_t globalEndTS;
uint32_t initializeCount = 0;
};
template <typename TagType>
@@ -91,7 +94,7 @@ class MockTagAllocator : public TagAllocator<TagType> {
MockTagAllocator(uint32_t rootDeviceIndex, MemoryManager *memoryManager, size_t tagCount,
size_t tagAlignment, size_t tagSize, bool doNotReleaseNodes, DeviceBitfield deviceBitfield)
: BaseClass(RootDeviceIndicesContainer{rootDeviceIndex}, memoryManager, tagCount, tagAlignment, tagSize, doNotReleaseNodes, deviceBitfield) {
: BaseClass(RootDeviceIndicesContainer{rootDeviceIndex}, memoryManager, tagCount, tagAlignment, tagSize, doNotReleaseNodes, true, deviceBitfield) {
}
MockTagAllocator(MemoryManager *memMngr, size_t tagCount, size_t tagAlignment, bool disableCompletionCheck, DeviceBitfield deviceBitfield)
@@ -353,6 +356,27 @@ TEST_F(TagAllocatorTest, whenNewTagIsTakenThenItIsInitialized) {
EXPECT_TRUE(node->isProfilingCapable());
}
TEST_F(TagAllocatorTest, givenReinitializationDisabledWhenGettingNewTagThenDontInitialize) {
MockTagAllocator<TimeStamps> tagAllocator1(RootDeviceIndicesContainer{0}, memoryManager, 1, 2, sizeof(TimeStamps), false, true, deviceBitfield);
MockTagAllocator<TimeStamps> tagAllocator2(RootDeviceIndicesContainer{0}, memoryManager, 1, 2, sizeof(TimeStamps), false, false, deviceBitfield);
tagAllocator1.freeTags.peekHead()->tagForCpuAccess->initializeCount = 0;
tagAllocator2.freeTags.peekHead()->tagForCpuAccess->initializeCount = 0;
auto node1 = static_cast<TagNode<TimeStamps> *>(tagAllocator1.getTag());
auto node2 = static_cast<TagNode<TimeStamps> *>(tagAllocator2.getTag());
EXPECT_EQ(1u, node1->tagForCpuAccess->initializeCount);
EXPECT_EQ(0u, node2->tagForCpuAccess->initializeCount);
node1->returnTag();
node2->returnTag();
node1 = static_cast<TagNode<TimeStamps> *>(tagAllocator1.getTag());
node2 = static_cast<TagNode<TimeStamps> *>(tagAllocator2.getTag());
EXPECT_EQ(2u, node1->tagForCpuAccess->initializeCount);
EXPECT_EQ(0u, node2->tagForCpuAccess->initializeCount);
}
TEST_F(TagAllocatorTest, givenMultipleReferencesOnTagWhenReleasingThenReturnWhenAllRefCountsAreReleased) {
MockTagAllocator<TimeStamps> tagAllocator(memoryManager, 2, 1, deviceBitfield);
@@ -475,7 +499,9 @@ TEST_F(TagAllocatorTest, givenMultipleRootDevicesWhenPopulatingTagsThenCreateMul
const RootDeviceIndicesContainer indices = {0, 2, maxRootDeviceIndex};
MockTagAllocator<TimestampPackets<uint32_t, TimestampPacketConstants::preferredPacketCount>> timestampPacketAllocator(indices, testMemoryManager, 1, 1, sizeof(TimestampPackets<uint32_t, TimestampPacketConstants::preferredPacketCount>), false, mockDeviceBitfield);
MockTagAllocator<TimestampPackets<uint32_t, TimestampPacketConstants::preferredPacketCount>> timestampPacketAllocator(indices, testMemoryManager, 1, 1,
sizeof(TimestampPackets<uint32_t, TimestampPacketConstants::preferredPacketCount>), false,
true, mockDeviceBitfield);
EXPECT_EQ(1u, timestampPacketAllocator.getGraphicsAllocationsCount());
@@ -505,7 +531,8 @@ HWTEST_F(TagAllocatorTest, givenMultipleRootDevicesWhenCallingMakeResidentThenUs
const RootDeviceIndicesContainer indicesVector = {0, 1};
MockTagAllocator<TimestampPackets<uint32_t, FamilyType::timestampPacketCount>> timestampPacketAllocator(indicesVector, testMemoryManager, 1, 1, sizeof(TimestampPackets<uint32_t, FamilyType::timestampPacketCount>), false, mockDeviceBitfield);
MockTagAllocator<TimestampPackets<uint32_t, FamilyType::timestampPacketCount>> timestampPacketAllocator(indicesVector, testMemoryManager, 1, 1, sizeof(TimestampPackets<uint32_t, FamilyType::timestampPacketCount>),
false, true, mockDeviceBitfield);
EXPECT_EQ(1u, timestampPacketAllocator.getGraphicsAllocationsCount());