mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-05 09:09:04 +08:00
feature: dont initialize in-order TS nodes
Related-To: NEO-11925 Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
8f4472a26c
commit
c3312f21f7
@@ -934,7 +934,7 @@ TagAllocatorBase *CommandStreamReceiver::getEventTsAllocator() {
|
||||
if (profilingTimeStampAllocator.get() == nullptr) {
|
||||
RootDeviceIndicesContainer rootDeviceIndices = {rootDeviceIndex};
|
||||
profilingTimeStampAllocator = std::make_unique<TagAllocator<HwTimeStamps>>(rootDeviceIndices, getMemoryManager(), getPreferredTagPoolSize(), MemoryConstants::cacheLineSize,
|
||||
sizeof(HwTimeStamps), false, osContext->getDeviceBitfield());
|
||||
sizeof(HwTimeStamps), false, true, osContext->getDeviceBitfield());
|
||||
}
|
||||
return profilingTimeStampAllocator.get();
|
||||
}
|
||||
@@ -943,7 +943,7 @@ TagAllocatorBase *CommandStreamReceiver::getEventPerfCountAllocator(const uint32
|
||||
if (perfCounterAllocator.get() == nullptr) {
|
||||
RootDeviceIndicesContainer rootDeviceIndices = {rootDeviceIndex};
|
||||
perfCounterAllocator = std::make_unique<TagAllocator<HwPerfCounter>>(
|
||||
rootDeviceIndices, getMemoryManager(), getPreferredTagPoolSize(), MemoryConstants::cacheLineSize, tagSize, false, osContext->getDeviceBitfield());
|
||||
rootDeviceIndices, getMemoryManager(), getPreferredTagPoolSize(), MemoryConstants::cacheLineSize, tagSize, false, true, osContext->getDeviceBitfield());
|
||||
}
|
||||
return perfCounterAllocator.get();
|
||||
}
|
||||
|
||||
@@ -457,10 +457,10 @@ std::unique_ptr<TagAllocatorBase> GfxCoreHelperHw<GfxFamily>::createTimestampPac
|
||||
if (debugManager.flags.OverrideTimestampPacketSize.get() != -1) {
|
||||
if (debugManager.flags.OverrideTimestampPacketSize.get() == 4) {
|
||||
using TimestampPackets32T = TimestampPackets<uint32_t, GfxFamily::timestampPacketCount>;
|
||||
return std::make_unique<TagAllocator<TimestampPackets32T>>(rootDeviceIndices, memoryManager, initialTagCount, tagAlignment, sizeof(TimestampPackets32T), doNotReleaseNodes, deviceBitfield);
|
||||
return std::make_unique<TagAllocator<TimestampPackets32T>>(rootDeviceIndices, memoryManager, initialTagCount, tagAlignment, sizeof(TimestampPackets32T), doNotReleaseNodes, true, deviceBitfield);
|
||||
} else if (debugManager.flags.OverrideTimestampPacketSize.get() == 8) {
|
||||
using TimestampPackets64T = TimestampPackets<uint64_t, GfxFamily::timestampPacketCount>;
|
||||
return std::make_unique<TagAllocator<TimestampPackets64T>>(rootDeviceIndices, memoryManager, initialTagCount, tagAlignment, sizeof(TimestampPackets64T), doNotReleaseNodes, deviceBitfield);
|
||||
return std::make_unique<TagAllocator<TimestampPackets64T>>(rootDeviceIndices, memoryManager, initialTagCount, tagAlignment, sizeof(TimestampPackets64T), doNotReleaseNodes, true, deviceBitfield);
|
||||
} else {
|
||||
UNRECOVERABLE_IF(true);
|
||||
}
|
||||
@@ -469,7 +469,7 @@ std::unique_ptr<TagAllocatorBase> GfxCoreHelperHw<GfxFamily>::createTimestampPac
|
||||
using TimestampPacketType = typename GfxFamily::TimestampPacketType;
|
||||
using TimestampPacketsT = TimestampPackets<TimestampPacketType, GfxFamily::timestampPacketCount>;
|
||||
|
||||
return std::make_unique<TagAllocator<TimestampPacketsT>>(rootDeviceIndices, memoryManager, initialTagCount, tagAlignment, sizeof(TimestampPacketsT), doNotReleaseNodes, deviceBitfield);
|
||||
return std::make_unique<TagAllocator<TimestampPacketsT>>(rootDeviceIndices, memoryManager, initialTagCount, tagAlignment, sizeof(TimestampPacketsT), doNotReleaseNodes, true, deviceBitfield);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
* Copyright (C) 2018-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -177,8 +177,7 @@ class TagAllocator : public TagAllocatorBase {
|
||||
using NodeType = TagNode<TagType>;
|
||||
|
||||
TagAllocator(const RootDeviceIndicesContainer &rootDeviceIndices, MemoryManager *memMngr, size_t tagCount,
|
||||
size_t tagAlignment, size_t tagSize, bool doNotReleaseNodes,
|
||||
DeviceBitfield deviceBitfield);
|
||||
size_t tagAlignment, size_t tagSize, bool doNotReleaseNodes, bool initializeTags, DeviceBitfield deviceBitfield);
|
||||
|
||||
TagNodeBase *getTag() override;
|
||||
|
||||
@@ -200,6 +199,8 @@ class TagAllocator : public TagAllocatorBase {
|
||||
IDList<NodeType> deferredTags;
|
||||
|
||||
std::vector<std::unique_ptr<NodeType[]>> tagPoolMemory;
|
||||
|
||||
bool initializeTags = true;
|
||||
};
|
||||
} // namespace NEO
|
||||
|
||||
|
||||
@@ -13,8 +13,8 @@
|
||||
namespace NEO {
|
||||
template <typename TagType>
|
||||
TagAllocator<TagType>::TagAllocator(const RootDeviceIndicesContainer &rootDeviceIndices, MemoryManager *memMngr, size_t tagCount, size_t tagAlignment,
|
||||
size_t tagSize, bool doNotReleaseNodes, DeviceBitfield deviceBitfield)
|
||||
: TagAllocatorBase(rootDeviceIndices, memMngr, tagCount, tagAlignment, tagSize, doNotReleaseNodes, deviceBitfield) {
|
||||
size_t tagSize, bool doNotReleaseNodes, bool initializeTags, DeviceBitfield deviceBitfield)
|
||||
: TagAllocatorBase(rootDeviceIndices, memMngr, tagCount, tagAlignment, tagSize, doNotReleaseNodes, deviceBitfield), initializeTags(initializeTags) {
|
||||
|
||||
populateFreeTags();
|
||||
}
|
||||
@@ -32,10 +32,13 @@ TagNodeBase *TagAllocator<TagType>::getTag() {
|
||||
}
|
||||
usedTags.pushFrontOne(*node);
|
||||
node->incRefCount();
|
||||
node->initialize();
|
||||
|
||||
if (initializeTags) {
|
||||
node->initialize();
|
||||
}
|
||||
|
||||
if (debugManager.flags.PrintTimestampPacketUsage.get() == 1) {
|
||||
printf("\nPID: %u, TSP taken from pool and initialized: 0x%" PRIX64, SysCalls::getProcessId(), node->getGpuAddress());
|
||||
printf("\nPID: %u, TSP taken from pool and initialized(%d): 0x%" PRIX64, SysCalls::getProcessId(), initializeTags, node->getGpuAddress());
|
||||
}
|
||||
|
||||
return node;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2023 Intel Corporation
|
||||
* Copyright (C) 2019-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -21,7 +21,7 @@ class MockTagAllocator : public TagAllocator<TagType> {
|
||||
|
||||
MockTagAllocator(uint32_t rootDeviceIndex, MemoryManager *memoryManager, size_t tagCount,
|
||||
size_t tagAlignment, size_t tagSize, bool doNotReleaseNodes, DeviceBitfield deviceBitfield)
|
||||
: BaseClass(RootDeviceIndicesContainer({rootDeviceIndex}), memoryManager, tagCount, tagAlignment, tagSize, doNotReleaseNodes, deviceBitfield) {
|
||||
: BaseClass(RootDeviceIndicesContainer({rootDeviceIndex}), memoryManager, tagCount, tagAlignment, tagSize, doNotReleaseNodes, true, deviceBitfield) {
|
||||
}
|
||||
|
||||
MockTagAllocator(uint32_t rootDeviceIndex, MemoryManager *memoryManager, size_t tagCount = 10)
|
||||
@@ -29,7 +29,7 @@ class MockTagAllocator : public TagAllocator<TagType> {
|
||||
}
|
||||
|
||||
MockTagAllocator(const RootDeviceIndicesContainer &rootDeviceIndices, MemoryManager *memoryManager, size_t tagCount = 10)
|
||||
: BaseClass(rootDeviceIndices, memoryManager, tagCount, MemoryConstants::cacheLineSize, sizeof(TagType), false, mockDeviceBitfield) {}
|
||||
: BaseClass(rootDeviceIndices, memoryManager, tagCount, MemoryConstants::cacheLineSize, sizeof(TagType), false, true, mockDeviceBitfield) {}
|
||||
|
||||
void returnTag(TagNodeBase *node) override {
|
||||
releaseReferenceNodes.push_back(static_cast<NodeType *>(node));
|
||||
|
||||
@@ -37,6 +37,7 @@ struct TagAllocatorTest : public Test<MemoryAllocatorFixture> {
|
||||
|
||||
struct TimeStamps {
|
||||
void initialize() {
|
||||
initializeCount++;
|
||||
start = 1;
|
||||
end = 2;
|
||||
}
|
||||
@@ -69,6 +70,8 @@ struct TimeStamps {
|
||||
|
||||
uint64_t contextCompleteTS;
|
||||
uint64_t globalEndTS;
|
||||
|
||||
uint32_t initializeCount = 0;
|
||||
};
|
||||
|
||||
template <typename TagType>
|
||||
@@ -91,7 +94,7 @@ class MockTagAllocator : public TagAllocator<TagType> {
|
||||
|
||||
MockTagAllocator(uint32_t rootDeviceIndex, MemoryManager *memoryManager, size_t tagCount,
|
||||
size_t tagAlignment, size_t tagSize, bool doNotReleaseNodes, DeviceBitfield deviceBitfield)
|
||||
: BaseClass(RootDeviceIndicesContainer{rootDeviceIndex}, memoryManager, tagCount, tagAlignment, tagSize, doNotReleaseNodes, deviceBitfield) {
|
||||
: BaseClass(RootDeviceIndicesContainer{rootDeviceIndex}, memoryManager, tagCount, tagAlignment, tagSize, doNotReleaseNodes, true, deviceBitfield) {
|
||||
}
|
||||
|
||||
MockTagAllocator(MemoryManager *memMngr, size_t tagCount, size_t tagAlignment, bool disableCompletionCheck, DeviceBitfield deviceBitfield)
|
||||
@@ -353,6 +356,27 @@ TEST_F(TagAllocatorTest, whenNewTagIsTakenThenItIsInitialized) {
|
||||
EXPECT_TRUE(node->isProfilingCapable());
|
||||
}
|
||||
|
||||
TEST_F(TagAllocatorTest, givenReinitializationDisabledWhenGettingNewTagThenDontInitialize) {
|
||||
MockTagAllocator<TimeStamps> tagAllocator1(RootDeviceIndicesContainer{0}, memoryManager, 1, 2, sizeof(TimeStamps), false, true, deviceBitfield);
|
||||
MockTagAllocator<TimeStamps> tagAllocator2(RootDeviceIndicesContainer{0}, memoryManager, 1, 2, sizeof(TimeStamps), false, false, deviceBitfield);
|
||||
|
||||
tagAllocator1.freeTags.peekHead()->tagForCpuAccess->initializeCount = 0;
|
||||
tagAllocator2.freeTags.peekHead()->tagForCpuAccess->initializeCount = 0;
|
||||
|
||||
auto node1 = static_cast<TagNode<TimeStamps> *>(tagAllocator1.getTag());
|
||||
auto node2 = static_cast<TagNode<TimeStamps> *>(tagAllocator2.getTag());
|
||||
EXPECT_EQ(1u, node1->tagForCpuAccess->initializeCount);
|
||||
EXPECT_EQ(0u, node2->tagForCpuAccess->initializeCount);
|
||||
|
||||
node1->returnTag();
|
||||
node2->returnTag();
|
||||
|
||||
node1 = static_cast<TagNode<TimeStamps> *>(tagAllocator1.getTag());
|
||||
node2 = static_cast<TagNode<TimeStamps> *>(tagAllocator2.getTag());
|
||||
EXPECT_EQ(2u, node1->tagForCpuAccess->initializeCount);
|
||||
EXPECT_EQ(0u, node2->tagForCpuAccess->initializeCount);
|
||||
}
|
||||
|
||||
TEST_F(TagAllocatorTest, givenMultipleReferencesOnTagWhenReleasingThenReturnWhenAllRefCountsAreReleased) {
|
||||
MockTagAllocator<TimeStamps> tagAllocator(memoryManager, 2, 1, deviceBitfield);
|
||||
|
||||
@@ -475,7 +499,9 @@ TEST_F(TagAllocatorTest, givenMultipleRootDevicesWhenPopulatingTagsThenCreateMul
|
||||
|
||||
const RootDeviceIndicesContainer indices = {0, 2, maxRootDeviceIndex};
|
||||
|
||||
MockTagAllocator<TimestampPackets<uint32_t, TimestampPacketConstants::preferredPacketCount>> timestampPacketAllocator(indices, testMemoryManager, 1, 1, sizeof(TimestampPackets<uint32_t, TimestampPacketConstants::preferredPacketCount>), false, mockDeviceBitfield);
|
||||
MockTagAllocator<TimestampPackets<uint32_t, TimestampPacketConstants::preferredPacketCount>> timestampPacketAllocator(indices, testMemoryManager, 1, 1,
|
||||
sizeof(TimestampPackets<uint32_t, TimestampPacketConstants::preferredPacketCount>), false,
|
||||
true, mockDeviceBitfield);
|
||||
|
||||
EXPECT_EQ(1u, timestampPacketAllocator.getGraphicsAllocationsCount());
|
||||
|
||||
@@ -505,7 +531,8 @@ HWTEST_F(TagAllocatorTest, givenMultipleRootDevicesWhenCallingMakeResidentThenUs
|
||||
|
||||
const RootDeviceIndicesContainer indicesVector = {0, 1};
|
||||
|
||||
MockTagAllocator<TimestampPackets<uint32_t, FamilyType::timestampPacketCount>> timestampPacketAllocator(indicesVector, testMemoryManager, 1, 1, sizeof(TimestampPackets<uint32_t, FamilyType::timestampPacketCount>), false, mockDeviceBitfield);
|
||||
MockTagAllocator<TimestampPackets<uint32_t, FamilyType::timestampPacketCount>> timestampPacketAllocator(indicesVector, testMemoryManager, 1, 1, sizeof(TimestampPackets<uint32_t, FamilyType::timestampPacketCount>),
|
||||
false, true, mockDeviceBitfield);
|
||||
|
||||
EXPECT_EQ(1u, timestampPacketAllocator.getGraphicsAllocationsCount());
|
||||
|
||||
|
||||
Reference in New Issue
Block a user