feature: initial support for standalone CB Events Timestamps allocator

Related-To: NEO-11925

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski
2024-07-19 15:15:04 +00:00
committed by Compute-Runtime-Automation
parent 4513e42ddb
commit 3758e99cbf
18 changed files with 245 additions and 2 deletions

View File

@@ -290,6 +290,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, ForceSynchronizedDispatchMode, -1, "-1: default,
DECLARE_DEBUG_VARIABLE(int32_t, ForceSipClass, -1, "-1: default, otherwise based on values from enum class SipClassType (init, builtins, rawBinaryFromFile, hexadecimalHeaderFile)")
DECLARE_DEBUG_VARIABLE(int32_t, ForceScratchAndMTPBufferSizeMode, -1, "-1: default, 0: Full, 1: Min. BMG+: Reduce required memory for Scrach and MTP buffers on CCS context")
DECLARE_DEBUG_VARIABLE(int32_t, CFEStackIDControl, -1, "Set Stack ID Control in CFE_STATE on Xe2+, -1 - do not set")
DECLARE_DEBUG_VARIABLE(int32_t, StandaloneInOrderTimestampAllocationEnabled, -1, "-1: default, 0: disabled, 1: enabled. If enabled, use internal allocations, instead of Event pool for timestamps")
/*LOGGING FLAGS*/
DECLARE_DEBUG_VARIABLE(int32_t, PrintDriverDiagnostics, -1, "prints driver diagnostics messages to standard output, value corresponds to hint level")

View File

@@ -46,6 +46,9 @@ InOrderExecInfo::~InOrderExecInfo() {
if (hostCounterNode) {
hostCounterNode->returnTag();
}
// forced return - All related objects (CmdList and Events) already destroyed
releaseNotUsedTempTimestampNodes(true);
}
InOrderExecInfo::InOrderExecInfo(TagNodeBase *deviceCounterNode, TagNodeBase *hostCounterNode, NEO::MemoryManager &memoryManager, uint32_t partitionCount, uint32_t rootDeviceIndex,
@@ -102,4 +105,26 @@ uint64_t InOrderExecInfo::getBaseHostGpuAddress() const {
return hostCounterNode->getGpuAddress();
}
void InOrderExecInfo::pushTempTimestampNode(TagNodeBase *node, uint64_t value) {
std::unique_lock<std::mutex> lock(mutex);
tempTimestampNodes.emplace_back(node, value);
}
void InOrderExecInfo::releaseNotUsedTempTimestampNodes(bool forceReturn) {
std::unique_lock<std::mutex> lock(mutex);
std::vector<std::pair<TagNodeBase *, uint64_t>> tempVector;
for (auto &node : tempTimestampNodes) {
if (forceReturn || lastWaitedCounterValue >= node.second) {
node.first->returnTag();
} else {
tempVector.push_back(node);
}
}
tempTimestampNodes.swap(tempVector);
}
} // namespace NEO

View File

@@ -14,6 +14,7 @@
#include <cstdint>
#include <memory>
#include <mutex>
#include <vector>
namespace NEO {
@@ -90,11 +91,18 @@ class InOrderExecInfo : public NEO::NonCopyableClass {
NEO::GraphicsAllocation *getExternalHostAllocation() const { return externalHostAllocation; }
void pushTempTimestampNode(TagNodeBase *node, uint64_t value);
void releaseNotUsedTempTimestampNodes(bool forceReturn);
protected:
NEO::MemoryManager &memoryManager;
NEO::TagNodeBase *deviceCounterNode = nullptr;
NEO::TagNodeBase *hostCounterNode = nullptr;
NEO::GraphicsAllocation *externalHostAllocation = nullptr;
std::vector<std::pair<NEO::TagNodeBase *, uint64_t>> tempTimestampNodes;
std::mutex mutex;
uint64_t counterValue = 0;
uint64_t lastWaitedCounterValue = 0;

View File

@@ -620,4 +620,5 @@ ForceScratchAndMTPBufferSizeMode = -1
ForcePostSyncL1Flush = -1
AllowNotZeroForCompressedOnWddm = -1
ForceGmmSystemMemoryBufferForAllocations = 0
StandaloneInOrderTimestampAllocationEnabled = -1
# Please don't edit below this line

View File

@@ -74,6 +74,72 @@ HWTEST_F(CommandEncoderTests, givenDifferentInputParamsWhenCreatingStandaloneInO
EXPECT_EQ(0u, inOrderExecInfo->getCounterValue());
}
HWTEST_F(CommandEncoderTests, givenTsNodesWhenStoringOnTempListThenHandleOwnershipCorrectly) {
class MyMockInOrderExecInfo : public NEO::InOrderExecInfo {
public:
using InOrderExecInfo::InOrderExecInfo;
using InOrderExecInfo::lastWaitedCounterValue;
using InOrderExecInfo::tempTimestampNodes;
};
MockDevice mockDevice;
using AllocatorT = MockTagAllocator<NEO::TimestampPackets<uint64_t, 1>>;
AllocatorT tsAllocator(0, mockDevice.getMemoryManager());
auto &memoryManager = *mockDevice.getMemoryManager();
auto node0 = static_cast<AllocatorT::NodeType *>(tsAllocator.getTag());
auto node1 = static_cast<AllocatorT::NodeType *>(tsAllocator.getTag());
EXPECT_FALSE(tsAllocator.freeTags.peekContains(*node0));
EXPECT_FALSE(tsAllocator.freeTags.peekContains(*node1));
{
MyMockInOrderExecInfo inOrderExecInfo(nullptr, nullptr, memoryManager, 1, 0, false, false);
inOrderExecInfo.lastWaitedCounterValue = 0;
inOrderExecInfo.pushTempTimestampNode(node0, 1);
inOrderExecInfo.pushTempTimestampNode(node1, 2);
EXPECT_EQ(2u, inOrderExecInfo.tempTimestampNodes.size());
inOrderExecInfo.releaseNotUsedTempTimestampNodes(false);
EXPECT_EQ(2u, inOrderExecInfo.tempTimestampNodes.size());
EXPECT_FALSE(tsAllocator.freeTags.peekContains(*node0));
EXPECT_FALSE(tsAllocator.freeTags.peekContains(*node1));
inOrderExecInfo.lastWaitedCounterValue = 1;
inOrderExecInfo.releaseNotUsedTempTimestampNodes(false);
EXPECT_EQ(1u, inOrderExecInfo.tempTimestampNodes.size());
EXPECT_EQ(node1, inOrderExecInfo.tempTimestampNodes[0].first);
EXPECT_TRUE(tsAllocator.freeTags.peekContains(*node0));
EXPECT_FALSE(tsAllocator.freeTags.peekContains(*node1));
inOrderExecInfo.lastWaitedCounterValue = 2;
inOrderExecInfo.releaseNotUsedTempTimestampNodes(false);
EXPECT_EQ(0u, inOrderExecInfo.tempTimestampNodes.size());
EXPECT_TRUE(tsAllocator.freeTags.peekContains(*node0));
EXPECT_TRUE(tsAllocator.freeTags.peekContains(*node1));
node0 = static_cast<AllocatorT::NodeType *>(tsAllocator.getTag());
node1 = static_cast<AllocatorT::NodeType *>(tsAllocator.getTag());
EXPECT_FALSE(tsAllocator.freeTags.peekContains(*node0));
EXPECT_FALSE(tsAllocator.freeTags.peekContains(*node1));
inOrderExecInfo.pushTempTimestampNode(node0, 3);
inOrderExecInfo.pushTempTimestampNode(node1, 4);
}
// forced release on destruction
EXPECT_TRUE(tsAllocator.freeTags.peekContains(*node0));
EXPECT_TRUE(tsAllocator.freeTags.peekContains(*node1));
}
HWTEST_F(CommandEncoderTests, givenDifferentInputParamsWhenCreatingInOrderExecInfoThenSetupCorrectly) {
MockDevice mockDevice;