From 3758e99cbfb7f9eddab157fe32adae228194c4a8 Mon Sep 17 00:00:00 2001 From: Bartosz Dunajski Date: Fri, 19 Jul 2024 15:15:04 +0000 Subject: [PATCH] feature: initial support for standalone CB Events Timestamps allocator Related-To: NEO-11925 Signed-off-by: Bartosz Dunajski --- level_zero/core/source/cmdlist/cmdlist_hw.inl | 4 ++ level_zero/core/source/device/device.cpp | 18 +++++ level_zero/core/source/device/device.h | 2 + level_zero/core/source/device/device_imp.cpp | 1 + level_zero/core/source/event/event.cpp | 12 ++++ level_zero/core/source/event/event.h | 5 +- level_zero/core/source/event/event_impl.inl | 4 ++ .../gfx_core_helpers/l0_gfx_core_helper.h | 9 +++ .../l0_gfx_core_helper_base.inl | 16 +++++ .../fixtures/in_order_cmd_list_fixture.h | 1 + .../core/test/unit_tests/mocks/mock_device.h | 1 + .../sources/cmdlist/test_in_order_cmdlist.cpp | 63 ++++++++++++++++++ .../sources/device/test_l0_device.cpp | 10 ++- .../debug_settings/debug_variables_base.inl | 1 + .../source/helpers/in_order_cmd_helpers.cpp | 25 +++++++ shared/source/helpers/in_order_cmd_helpers.h | 8 +++ shared/test/common/test_files/igdrcl.config | 1 + .../command_encoder_tests.cpp | 66 +++++++++++++++++++ 18 files changed, 245 insertions(+), 2 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 35d7210807..fd859d153a 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -3968,6 +3968,10 @@ bool CommandListCoreFamily::handleCounterBasedEventOperations(Eve this->interruptEvents.push_back(signalEvent); } } + + if (signalEvent->isUsingContextEndOffset() && NEO::debugManager.flags.StandaloneInOrderTimestampAllocationEnabled.get() == 1) { + signalEvent->resetInOrderTimestampNode(device->getInOrderTimestampAllocator()->getTag()); + } } return true; diff --git a/level_zero/core/source/device/device.cpp b/level_zero/core/source/device/device.cpp index 450d46861c..a5526d37b8 100644 --- a/level_zero/core/source/device/device.cpp +++ b/level_zero/core/source/device/device.cpp @@ -9,6 +9,7 @@ #include "shared/source/device/device.h" #include "shared/source/helpers/aligned_memory.h" +#include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/helpers/in_order_cmd_helpers.h" #include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h" @@ -53,6 +54,23 @@ NEO::TagAllocatorBase *Device::getHostInOrderCounterAllocator() { return getInOrderCounterAllocator>(hostInOrderCounterAllocator, inOrderAllocatorMutex, *getNEODevice(), getL0GfxCoreHelper().getImmediateWritePostSyncOffset()); } +NEO::TagAllocatorBase *Device::getInOrderTimestampAllocator() { + if (!inOrderTimestampAllocator.get()) { + std::unique_lock lock(inOrderAllocatorMutex); + + if (!inOrderTimestampAllocator.get()) { + RootDeviceIndicesContainer rootDeviceIndices = {getNEODevice()->getRootDeviceIndex()}; + + size_t packetsCountPerElement = getEventMaxPacketCount(); + size_t alignment = getGfxCoreHelper().getTimestampPacketAllocatorAlignment(); + + inOrderTimestampAllocator = getL0GfxCoreHelper().getInOrderTimestampAllocator(rootDeviceIndices, getNEODevice()->getMemoryManager(), 64, packetsCountPerElement, alignment, getNEODevice()->getDeviceBitfield()); + } + } + + return inOrderTimestampAllocator.get(); +} + uint32_t Device::getNextSyncDispatchQueueId() { auto newValue = syncDispatchQueueIdAllocator.fetch_add(1); diff --git a/level_zero/core/source/device/device.h b/level_zero/core/source/device/device.h index d81dbd6da7..2307d89d3e 100644 --- a/level_zero/core/source/device/device.h +++ b/level_zero/core/source/device/device.h @@ -150,6 +150,7 @@ struct Device : _ze_device_handle_t { virtual uint32_t getEventMaxKernelCount() const = 0; NEO::TagAllocatorBase *getDeviceInOrderCounterAllocator(); NEO::TagAllocatorBase *getHostInOrderCounterAllocator(); + NEO::TagAllocatorBase *getInOrderTimestampAllocator(); NEO::GraphicsAllocation *getSyncDispatchTokenAllocation() const { return syncDispatchTokenAllocation; } uint32_t getNextSyncDispatchQueueId(); void ensureSyncDispatchTokenAllocation(); @@ -158,6 +159,7 @@ struct Device : _ze_device_handle_t { NEO::Device *neoDevice = nullptr; std::unique_ptr deviceInOrderCounterAllocator; std::unique_ptr hostInOrderCounterAllocator; + std::unique_ptr inOrderTimestampAllocator; NEO::GraphicsAllocation *syncDispatchTokenAllocation = nullptr; std::mutex inOrderAllocatorMutex; std::mutex syncDispatchTokenMutex; diff --git a/level_zero/core/source/device/device_imp.cpp b/level_zero/core/source/device/device_imp.cpp index 0c48688afc..70cffd4ad1 100644 --- a/level_zero/core/source/device/device_imp.cpp +++ b/level_zero/core/source/device/device_imp.cpp @@ -1501,6 +1501,7 @@ void DeviceImp::releaseResources() { cacheReservation.reset(); deviceInOrderCounterAllocator.reset(); hostInOrderCounterAllocator.reset(); + inOrderTimestampAllocator.reset(); if (allocationsForReuse.get()) { allocationsForReuse->freeAllGraphicsAllocations(neoDevice); diff --git a/level_zero/core/source/event/event.cpp b/level_zero/core/source/event/event.cpp index c440c5d35c..e2dbbebcc6 100644 --- a/level_zero/core/source/event/event.cpp +++ b/level_zero/core/source/event/event.cpp @@ -41,6 +41,10 @@ template Event *Event::create(EventPool *, const ze_event_desc_t *, De template Event *Event::create(const EventDescriptor &, const ze_event_desc_t *, Device *); template Event *Event::create(const EventDescriptor &, const ze_event_desc_t *, Device *); +Event::~Event() { + resetInOrderTimestampNode(nullptr); +} + ze_result_t EventPool::initialize(DriverHandle *driver, Context *context, uint32_t numDevices, ze_device_handle_t *deviceHandles) { this->context = static_cast(context); @@ -494,9 +498,17 @@ void Event::setReferenceTs(uint64_t currentCpuTimeStamp) { } void Event::unsetInOrderExecInfo() { + resetInOrderTimestampNode(nullptr); inOrderExecInfo.reset(); inOrderAllocationOffset = 0; inOrderExecSignalValue = 0; } +void Event::resetInOrderTimestampNode(NEO::TagNodeBase *newNode) { + if (inOrderTimestampNode) { + inOrderExecInfo->pushTempTimestampNode(inOrderTimestampNode, inOrderExecSignalValue); + } + inOrderTimestampNode = newNode; +} + } // namespace L0 diff --git a/level_zero/core/source/event/event.h b/level_zero/core/source/event/event.h index 028efd2937..c80c65444e 100644 --- a/level_zero/core/source/event/event.h +++ b/level_zero/core/source/event/event.h @@ -78,7 +78,7 @@ struct EventDescriptor { }; struct Event : _ze_event_handle_t { - virtual ~Event() = default; + virtual ~Event(); virtual ze_result_t destroy(); virtual ze_result_t hostSignal(bool allowCounterBased) = 0; virtual ze_result_t hostSynchronize(uint64_t timeout) = 0; @@ -292,6 +292,8 @@ struct Event : _ze_event_handle_t { void setExternalInterruptId(uint32_t interruptId) { externalInterruptId = interruptId; } + void resetInOrderTimestampNode(NEO::TagNodeBase *newNode); + protected: Event(int index, Device *device) : device(device), index(index) {} @@ -333,6 +335,7 @@ struct Event : _ze_event_handle_t { std::mutex *kernelWithPrintfDeviceMutex = nullptr; std::shared_ptr inOrderExecInfo; CommandQueue *latestUsedCmdQueue = nullptr; + NEO::TagNodeBase *inOrderTimestampNode = nullptr; uint32_t maxKernelCount = 0; uint32_t kernelCount = 1u; diff --git a/level_zero/core/source/event/event_impl.inl b/level_zero/core/source/event/event_impl.inl index 65d822b268..c093fb802a 100644 --- a/level_zero/core/source/event/event_impl.inl +++ b/level_zero/core/source/event/event_impl.inl @@ -268,6 +268,10 @@ void EventImp::handleSuccessfulHostSynchronization() { for (auto &csr : csrs) { csr->getInternalAllocationStorage()->cleanAllocationList(csr->peekTaskCount(), NEO::AllocationUsage::TEMPORARY_ALLOCATION); } + + if (inOrderExecInfo) { + inOrderExecInfo->releaseNotUsedTempTimestampNodes(false); + } } template diff --git a/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h b/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h index 5df5e9167c..98d5016a75 100644 --- a/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h +++ b/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h @@ -8,6 +8,7 @@ #pragma once #include "shared/source/helpers/api_gfx_core_helper.h" +#include "shared/source/helpers/device_bitfield.h" #include "shared/source/helpers/heap_base_address_model.h" #include "level_zero/include/zet_intel_gpu_debug.h" @@ -28,8 +29,12 @@ struct EngineGroupT; struct RootDeviceEnvironment; class Debugger; class ProductHelper; +class TagAllocatorBase; +class MemoryManager; } // namespace NEO +class RootDeviceIndicesContainer; + namespace L0 { enum class RTASDeviceFormatInternal { @@ -109,6 +114,8 @@ class L0GfxCoreHelper : public NEO::ApiGfxCoreHelper { virtual uint32_t getIpSamplingMetricCount() = 0; virtual bool synchronizedDispatchSupported() const = 0; virtual bool implicitSynchronizedDispatchForCooperativeKernelsAllowed() const = 0; + virtual std::unique_ptr getInOrderTimestampAllocator(const RootDeviceIndicesContainer &rootDeviceIndices, NEO::MemoryManager *memoryManager, size_t initialTagCount, size_t packetsCountPerElement, size_t tagAlignment, + NEO::DeviceBitfield deviceBitfield) const = 0; protected: L0GfxCoreHelper() = default; @@ -158,6 +165,8 @@ class L0GfxCoreHelperHw : public L0GfxCoreHelper { uint32_t getIpSamplingMetricCount() override; bool synchronizedDispatchSupported() const override; bool implicitSynchronizedDispatchForCooperativeKernelsAllowed() const override; + std::unique_ptr getInOrderTimestampAllocator(const RootDeviceIndicesContainer &rootDeviceIndices, NEO::MemoryManager *memoryManager, size_t initialTagCount, size_t packetsCountPerElement, size_t tagAlignment, + NEO::DeviceBitfield deviceBitfield) const override; protected: L0GfxCoreHelperHw() = default; diff --git a/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_base.inl b/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_base.inl index ffe07062b9..0a7a18d319 100644 --- a/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_base.inl +++ b/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_base.inl @@ -8,6 +8,10 @@ #include "shared/source/command_container/implicit_scaling.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/helpers/gfx_core_helper.h" +#include "shared/source/helpers/timestamp_packet.h" +#include "shared/source/memory_manager/memory_manager.h" +#include "shared/source/utilities/stackvec.h" +#include "shared/source/utilities/tag_allocator.h" #include "level_zero/core/source/device/device.h" #include "level_zero/core/source/event/event.h" @@ -68,4 +72,16 @@ bool L0GfxCoreHelperHw::implicitSynchronizedDispatchForCooperativeKernel return false; } +template +std::unique_ptr L0GfxCoreHelperHw::getInOrderTimestampAllocator(const RootDeviceIndicesContainer &rootDeviceIndices, NEO::MemoryManager *memoryManager, size_t initialTagCount, size_t packetsCountPerElement, + size_t tagAlignment, NEO::DeviceBitfield deviceBitfield) const { + + using TimestampPacketType = typename Family::TimestampPacketType; + using TimestampPacketsT = NEO::TimestampPackets; + + size_t size = sizeof(TimestampPacketsT) * packetsCountPerElement; + + return std::make_unique>(rootDeviceIndices, memoryManager, initialTagCount, tagAlignment, size, false, deviceBitfield); +} + } // namespace L0 diff --git a/level_zero/core/test/unit_tests/fixtures/in_order_cmd_list_fixture.h b/level_zero/core/test/unit_tests/fixtures/in_order_cmd_list_fixture.h index 99b60b5c32..27887e1d39 100644 --- a/level_zero/core/test/unit_tests/fixtures/in_order_cmd_list_fixture.h +++ b/level_zero/core/test/unit_tests/fixtures/in_order_cmd_list_fixture.h @@ -34,6 +34,7 @@ struct InOrderCmdListFixture : public ::Test { using EventImp::unsetCmdQueue; using EventImp::externalInterruptId; using EventImp::latestUsedCmdQueue; + using EventImp::inOrderTimestampNode; void makeCounterBasedInitiallyDisabled() { counterBasedMode = CounterBasedMode::initiallyDisabled; diff --git a/level_zero/core/test/unit_tests/mocks/mock_device.h b/level_zero/core/test/unit_tests/mocks/mock_device.h index 47bc1f39a8..279f8d0758 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_device.h +++ b/level_zero/core/test/unit_tests/mocks/mock_device.h @@ -106,6 +106,7 @@ struct MockDeviceImp : public L0::DeviceImp { using Base::getNEODevice; using Base::hostInOrderCounterAllocator; using Base::implicitScalingCapable; + using Base::inOrderTimestampAllocator; using Base::neoDevice; using Base::subDeviceCopyEngineGroups; using Base::syncDispatchTokenAllocation; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist.cpp index f83f4fd308..1e90457cbe 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist.cpp @@ -6184,6 +6184,69 @@ HWTEST2_F(InOrderRegularCmdListTests, givenAddedCmdForPatchWhenUpdateNewInOrderI EXPECT_EQ(5u, semaphoreCmd.getSemaphoreDataDword()); } +struct StandaloneInOrderTimestampAllocationTests : public InOrderCmdListTests { + void SetUp() override { + NEO::debugManager.flags.StandaloneInOrderTimestampAllocationEnabled.set(1); + InOrderCmdListTests::SetUp(); + } +}; + +HWTEST2_F(StandaloneInOrderTimestampAllocationTests, givenTimestampEventWhenDispatchingThenAssignNewNode, IsAtLeastSkl) { + auto eventPool = createEvents(1, true); + auto eventHandle = events[0]->toHandle(); + + auto cmdList = createImmCmdList(); + + EXPECT_EQ(nullptr, events[0]->inOrderTimestampNode); + + cmdList->appendLaunchKernel(kernel->toHandle(), groupCount, eventHandle, 0, nullptr, launchParams, false); + + EXPECT_NE(nullptr, events[0]->inOrderTimestampNode); + + // keep node0 ownership for testing + auto node0 = events[0]->inOrderTimestampNode; + events[0]->inOrderTimestampNode = nullptr; + + cmdList->appendLaunchKernel(kernel->toHandle(), groupCount, eventHandle, 0, nullptr, launchParams, false); + EXPECT_NE(nullptr, events[0]->inOrderTimestampNode); + EXPECT_NE(node0, events[0]->inOrderTimestampNode); + + auto node1 = events[0]->inOrderTimestampNode; + + // node1 moved to reusable list + cmdList->appendLaunchKernel(kernel->toHandle(), groupCount, eventHandle, 0, nullptr, launchParams, false); + EXPECT_NE(nullptr, events[0]->inOrderTimestampNode); + EXPECT_NE(node1->getGpuAddress(), events[0]->inOrderTimestampNode->getGpuAddress()); + + auto node2 = events[0]->inOrderTimestampNode; + + auto hostAddress = cmdList->inOrderExecInfo->getBaseHostAddress(); + *hostAddress = 3; + + // return node1 to pool + EXPECT_EQ(ZE_RESULT_SUCCESS, events[0]->hostSynchronize(1)); + + cmdList->appendLaunchKernel(kernel->toHandle(), groupCount, eventHandle, 0, nullptr, launchParams, false); + // node1 reused + EXPECT_EQ(node1->getGpuAddress(), events[0]->inOrderTimestampNode->getGpuAddress()); + + // reuse node2 - counter already waited + *hostAddress = 2; + + cmdList->inOrderExecInfo->releaseNotUsedTempTimestampNodes(false); + cmdList->appendLaunchKernel(kernel->toHandle(), groupCount, eventHandle, 0, nullptr, launchParams, false); + + EXPECT_EQ(node2->getGpuAddress(), events[0]->inOrderTimestampNode->getGpuAddress()); + + events[0]->unsetInOrderExecInfo(); + EXPECT_EQ(nullptr, events[0]->inOrderTimestampNode); + + cmdList->appendLaunchKernel(kernel->toHandle(), groupCount, eventHandle, 0, nullptr, launchParams, false); + + // mark as not ready, to make sure that destructor will release everything anyway + *hostAddress = 0; +} + HWTEST2_F(InOrderCmdListTests, givenInOrderModeAndNoopWaitEventsAllowedWhenEventBoundToCmdListThenNoopSpaceForWaitCommands, IsAtLeastXeHpCore) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; diff --git a/level_zero/core/test/unit_tests/sources/device/test_l0_device.cpp b/level_zero/core/test/unit_tests/sources/device/test_l0_device.cpp index e49711bbd9..f4821784c9 100644 --- a/level_zero/core/test/unit_tests/sources/device/test_l0_device.cpp +++ b/level_zero/core/test/unit_tests/sources/device/test_l0_device.cpp @@ -889,7 +889,12 @@ TEST_F(DeviceTest, whenCreatingDeviceThenCreateInOrderCounterAllocatorOnDemandAn MockHostTagAllocator(uint32_t rootDeviceIndex, MemoryManager *memoryManager, uint32_t &destructorId) : DestructorCounted(destructorId, rootDeviceIndex, memoryManager, 10) {} }; - class MyMockDevice : public DestructorCounted { + class MockTsAllocator : public DestructorCounted>, 2> { + public: + MockTsAllocator(uint32_t rootDeviceIndex, MemoryManager *memoryManager, uint32_t &destructorId) : DestructorCounted(destructorId, rootDeviceIndex, memoryManager, 10) {} + }; + + class MyMockDevice : public DestructorCounted { public: MyMockDevice(NEO::ExecutionEnvironment *executionEnvironment, uint32_t rootDeviceIndex, uint32_t &destructorId) : DestructorCounted(destructorId, executionEnvironment, rootDeviceIndex) {} }; @@ -905,13 +910,16 @@ TEST_F(DeviceTest, whenCreatingDeviceThenCreateInOrderCounterAllocatorOnDemandAn { auto deviceAllocator = new MockDeviceTagAllocator(0, neoMockDevice->getMemoryManager(), destructorId); auto hostAllocator = new MockHostTagAllocator(0, neoMockDevice->getMemoryManager(), destructorId); + auto tsAllocator = new MockTsAllocator(0, neoMockDevice->getMemoryManager(), destructorId); MockDeviceImp deviceImp(neoMockDevice, neoMockDevice->getExecutionEnvironment()); deviceImp.deviceInOrderCounterAllocator.reset(deviceAllocator); deviceImp.hostInOrderCounterAllocator.reset(hostAllocator); + deviceImp.inOrderTimestampAllocator.reset(tsAllocator); EXPECT_EQ(deviceAllocator, deviceImp.getDeviceInOrderCounterAllocator()); EXPECT_EQ(hostAllocator, deviceImp.getHostInOrderCounterAllocator()); + EXPECT_EQ(tsAllocator, deviceImp.getInOrderTimestampAllocator()); } } diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index 1743740f8e..78d330d049 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -290,6 +290,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, ForceSynchronizedDispatchMode, -1, "-1: default, DECLARE_DEBUG_VARIABLE(int32_t, ForceSipClass, -1, "-1: default, otherwise based on values from enum class SipClassType (init, builtins, rawBinaryFromFile, hexadecimalHeaderFile)") DECLARE_DEBUG_VARIABLE(int32_t, ForceScratchAndMTPBufferSizeMode, -1, "-1: default, 0: Full, 1: Min. BMG+: Reduce required memory for Scrach and MTP buffers on CCS context") DECLARE_DEBUG_VARIABLE(int32_t, CFEStackIDControl, -1, "Set Stack ID Control in CFE_STATE on Xe2+, -1 - do not set") +DECLARE_DEBUG_VARIABLE(int32_t, StandaloneInOrderTimestampAllocationEnabled, -1, "-1: default, 0: disabled, 1: enabled. If enabled, use internal allocations, instead of Event pool for timestamps") /*LOGGING FLAGS*/ DECLARE_DEBUG_VARIABLE(int32_t, PrintDriverDiagnostics, -1, "prints driver diagnostics messages to standard output, value corresponds to hint level") diff --git a/shared/source/helpers/in_order_cmd_helpers.cpp b/shared/source/helpers/in_order_cmd_helpers.cpp index 75d9812be7..4ac96c3465 100644 --- a/shared/source/helpers/in_order_cmd_helpers.cpp +++ b/shared/source/helpers/in_order_cmd_helpers.cpp @@ -46,6 +46,9 @@ InOrderExecInfo::~InOrderExecInfo() { if (hostCounterNode) { hostCounterNode->returnTag(); } + + // forced return - All related objects (CmdList and Events) already destroyed + releaseNotUsedTempTimestampNodes(true); } InOrderExecInfo::InOrderExecInfo(TagNodeBase *deviceCounterNode, TagNodeBase *hostCounterNode, NEO::MemoryManager &memoryManager, uint32_t partitionCount, uint32_t rootDeviceIndex, @@ -102,4 +105,26 @@ uint64_t InOrderExecInfo::getBaseHostGpuAddress() const { return hostCounterNode->getGpuAddress(); } +void InOrderExecInfo::pushTempTimestampNode(TagNodeBase *node, uint64_t value) { + std::unique_lock lock(mutex); + + tempTimestampNodes.emplace_back(node, value); +} + +void InOrderExecInfo::releaseNotUsedTempTimestampNodes(bool forceReturn) { + std::unique_lock lock(mutex); + + std::vector> tempVector; + + for (auto &node : tempTimestampNodes) { + if (forceReturn || lastWaitedCounterValue >= node.second) { + node.first->returnTag(); + } else { + tempVector.push_back(node); + } + } + + tempTimestampNodes.swap(tempVector); +} + } // namespace NEO diff --git a/shared/source/helpers/in_order_cmd_helpers.h b/shared/source/helpers/in_order_cmd_helpers.h index 074e3640e1..6f5994c3e7 100644 --- a/shared/source/helpers/in_order_cmd_helpers.h +++ b/shared/source/helpers/in_order_cmd_helpers.h @@ -14,6 +14,7 @@ #include #include +#include #include namespace NEO { @@ -90,11 +91,18 @@ class InOrderExecInfo : public NEO::NonCopyableClass { NEO::GraphicsAllocation *getExternalHostAllocation() const { return externalHostAllocation; } + void pushTempTimestampNode(TagNodeBase *node, uint64_t value); + void releaseNotUsedTempTimestampNodes(bool forceReturn); + protected: NEO::MemoryManager &memoryManager; NEO::TagNodeBase *deviceCounterNode = nullptr; NEO::TagNodeBase *hostCounterNode = nullptr; NEO::GraphicsAllocation *externalHostAllocation = nullptr; + std::vector> tempTimestampNodes; + + std::mutex mutex; + uint64_t counterValue = 0; uint64_t lastWaitedCounterValue = 0; diff --git a/shared/test/common/test_files/igdrcl.config b/shared/test/common/test_files/igdrcl.config index 9551b01772..b08ae0e5a4 100644 --- a/shared/test/common/test_files/igdrcl.config +++ b/shared/test/common/test_files/igdrcl.config @@ -620,4 +620,5 @@ ForceScratchAndMTPBufferSizeMode = -1 ForcePostSyncL1Flush = -1 AllowNotZeroForCompressedOnWddm = -1 ForceGmmSystemMemoryBufferForAllocations = 0 +StandaloneInOrderTimestampAllocationEnabled = -1 # Please don't edit below this line diff --git a/shared/test/unit_test/command_container/command_encoder_tests.cpp b/shared/test/unit_test/command_container/command_encoder_tests.cpp index 139c46cd5a..76f5c62f72 100644 --- a/shared/test/unit_test/command_container/command_encoder_tests.cpp +++ b/shared/test/unit_test/command_container/command_encoder_tests.cpp @@ -74,6 +74,72 @@ HWTEST_F(CommandEncoderTests, givenDifferentInputParamsWhenCreatingStandaloneInO EXPECT_EQ(0u, inOrderExecInfo->getCounterValue()); } +HWTEST_F(CommandEncoderTests, givenTsNodesWhenStoringOnTempListThenHandleOwnershipCorrectly) { + class MyMockInOrderExecInfo : public NEO::InOrderExecInfo { + public: + using InOrderExecInfo::InOrderExecInfo; + using InOrderExecInfo::lastWaitedCounterValue; + using InOrderExecInfo::tempTimestampNodes; + }; + + MockDevice mockDevice; + + using AllocatorT = MockTagAllocator>; + + AllocatorT tsAllocator(0, mockDevice.getMemoryManager()); + + auto &memoryManager = *mockDevice.getMemoryManager(); + auto node0 = static_cast(tsAllocator.getTag()); + auto node1 = static_cast(tsAllocator.getTag()); + + EXPECT_FALSE(tsAllocator.freeTags.peekContains(*node0)); + EXPECT_FALSE(tsAllocator.freeTags.peekContains(*node1)); + + { + MyMockInOrderExecInfo inOrderExecInfo(nullptr, nullptr, memoryManager, 1, 0, false, false); + + inOrderExecInfo.lastWaitedCounterValue = 0; + + inOrderExecInfo.pushTempTimestampNode(node0, 1); + inOrderExecInfo.pushTempTimestampNode(node1, 2); + + EXPECT_EQ(2u, inOrderExecInfo.tempTimestampNodes.size()); + + inOrderExecInfo.releaseNotUsedTempTimestampNodes(false); + EXPECT_EQ(2u, inOrderExecInfo.tempTimestampNodes.size()); + + EXPECT_FALSE(tsAllocator.freeTags.peekContains(*node0)); + EXPECT_FALSE(tsAllocator.freeTags.peekContains(*node1)); + + inOrderExecInfo.lastWaitedCounterValue = 1; + inOrderExecInfo.releaseNotUsedTempTimestampNodes(false); + EXPECT_EQ(1u, inOrderExecInfo.tempTimestampNodes.size()); + EXPECT_EQ(node1, inOrderExecInfo.tempTimestampNodes[0].first); + + EXPECT_TRUE(tsAllocator.freeTags.peekContains(*node0)); + EXPECT_FALSE(tsAllocator.freeTags.peekContains(*node1)); + + inOrderExecInfo.lastWaitedCounterValue = 2; + inOrderExecInfo.releaseNotUsedTempTimestampNodes(false); + EXPECT_EQ(0u, inOrderExecInfo.tempTimestampNodes.size()); + EXPECT_TRUE(tsAllocator.freeTags.peekContains(*node0)); + EXPECT_TRUE(tsAllocator.freeTags.peekContains(*node1)); + + node0 = static_cast(tsAllocator.getTag()); + node1 = static_cast(tsAllocator.getTag()); + + EXPECT_FALSE(tsAllocator.freeTags.peekContains(*node0)); + EXPECT_FALSE(tsAllocator.freeTags.peekContains(*node1)); + + inOrderExecInfo.pushTempTimestampNode(node0, 3); + inOrderExecInfo.pushTempTimestampNode(node1, 4); + } + + // forced release on destruction + EXPECT_TRUE(tsAllocator.freeTags.peekContains(*node0)); + EXPECT_TRUE(tsAllocator.freeTags.peekContains(*node1)); +} + HWTEST_F(CommandEncoderTests, givenDifferentInputParamsWhenCreatingInOrderExecInfoThenSetupCorrectly) { MockDevice mockDevice;