From e400a5c62469fed6abb06fa3ac0d1439960e4f5a Mon Sep 17 00:00:00 2001 From: Bartosz Dunajski Date: Wed, 13 Mar 2024 15:48:10 +0000 Subject: [PATCH] feature: in-order host counter allocation pooling 2 Related-To: NEO-10507 Signed-off-by: Bartosz Dunajski --- level_zero/core/source/cmdlist/cmdlist_hw.inl | 2 +- .../core/source/cmdlist/cmdlist_imp.cpp | 9 +++- level_zero/core/source/device/device.cpp | 26 +++++++---- level_zero/core/source/device/device.h | 2 + level_zero/core/source/device/device_imp.cpp | 1 + .../core/test/unit_tests/mocks/mock_device.h | 1 + .../sources/cmdlist/test_in_order_cmdlist.cpp | 43 +++++++++++++++++-- .../sources/device/test_l0_device.cpp | 23 +++++++--- .../unit_tests/sources/event/test_event.cpp | 15 +++---- .../source/helpers/in_order_cmd_helpers.cpp | 40 ++++++++--------- shared/source/helpers/in_order_cmd_helpers.h | 9 ++-- .../command_encoder_tests.cpp | 34 ++++++++------- 12 files changed, 137 insertions(+), 68 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 3204a0b800..b752053909 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -2629,7 +2629,7 @@ void CommandListCoreFamily::appendSignalInOrderDependencyCounter( } if (inOrderExecInfo->isHostStorageDuplicated()) { - appendSdiInOrderCounterSignalling(inOrderExecInfo->getHostCounterAllocation()->getGpuAddress(), signalValue); + appendSdiInOrderCounterSignalling(inOrderExecInfo->getBaseHostGpuAddress(), signalValue); } if ((NEO::debugManager.flags.ProgramUserInterruptOnResolvedDependency.get() == 1) && signalEvent && signalEvent->isInterruptModeEnabled()) { diff --git a/level_zero/core/source/cmdlist/cmdlist_imp.cpp b/level_zero/core/source/cmdlist/cmdlist_imp.cpp index 09b7aaf03c..9659a8752a 100644 --- a/level_zero/core/source/cmdlist/cmdlist_imp.cpp +++ b/level_zero/core/source/cmdlist/cmdlist_imp.cpp @@ -259,8 +259,15 @@ void CommandListImp::enableInOrderExecution() { UNRECOVERABLE_IF(inOrderExecInfo.get()); auto deviceCounterNode = this->device->getDeviceInOrderCounterAllocator()->getTag(); + NEO::TagNodeBase *hostCounterNode = nullptr; - inOrderExecInfo = NEO::InOrderExecInfo::create(deviceCounterNode, *this->device->getNEODevice(), this->partitionCount, !isImmediateType()); + auto &gfxCoreHelper = device->getGfxCoreHelper(); + + if (gfxCoreHelper.duplicatedInOrderCounterStorageEnabled(device->getNEODevice()->getRootDeviceEnvironment())) { + hostCounterNode = this->device->getHostInOrderCounterAllocator()->getTag(); + } + + inOrderExecInfo = NEO::InOrderExecInfo::create(deviceCounterNode, hostCounterNode, *this->device->getNEODevice(), this->partitionCount, !isImmediateType()); } void CommandListImp::storeReferenceTsToMappedEvents(bool isClearEnabled) { diff --git a/level_zero/core/source/device/device.cpp b/level_zero/core/source/device/device.cpp index 1fd8d95b05..eda06a9e20 100644 --- a/level_zero/core/source/device/device.cpp +++ b/level_zero/core/source/device/device.cpp @@ -21,26 +21,34 @@ NEO::DebuggerL0 *Device::getL0Debugger() { return getNEODevice()->getL0Debugger(); } -NEO::TagAllocatorBase *Device::getDeviceInOrderCounterAllocator() { - if (!deviceInOrderCounterAllocator.get()) { +template +NEO::TagAllocatorBase *getInOrderCounterAllocator(std::unique_ptr &allocator, std::mutex &inOrderAllocatorMutex, NEO::Device &neoDevice) { + if (!allocator.get()) { std::unique_lock lock(inOrderAllocatorMutex); - if (!deviceInOrderCounterAllocator.get()) { - using NodeT = typename NEO::DeviceAllocNodeType; - RootDeviceIndicesContainer rootDeviceIndices = {getRootDeviceIndex()}; + if (!allocator.get()) { + RootDeviceIndicesContainer rootDeviceIndices = {neoDevice.getRootDeviceIndex()}; - const size_t maxPartitionCount = getNEODevice()->getDeviceBitfield().count(); + const size_t maxPartitionCount = neoDevice.getDeviceBitfield().count(); const size_t nodeSize = sizeof(uint64_t) * maxPartitionCount * 2; // Multiplied by 2 to handle 32b overflow DEBUG_BREAK_IF(alignUp(nodeSize, MemoryConstants::cacheLineSize) * NodeT::defaultAllocatorTagCount > MemoryConstants::pageSize64k); - deviceInOrderCounterAllocator = std::make_unique>(rootDeviceIndices, neoDevice->getMemoryManager(), NodeT::defaultAllocatorTagCount, - MemoryConstants::cacheLineSize, nodeSize, false, neoDevice->getDeviceBitfield()); + allocator = std::make_unique>(rootDeviceIndices, neoDevice.getMemoryManager(), NodeT::defaultAllocatorTagCount, + MemoryConstants::cacheLineSize, nodeSize, false, neoDevice.getDeviceBitfield()); } } - return deviceInOrderCounterAllocator.get(); + return allocator.get(); +} + +NEO::TagAllocatorBase *Device::getDeviceInOrderCounterAllocator() { + return getInOrderCounterAllocator>(deviceInOrderCounterAllocator, inOrderAllocatorMutex, *getNEODevice()); +} + +NEO::TagAllocatorBase *Device::getHostInOrderCounterAllocator() { + return getInOrderCounterAllocator>(hostInOrderCounterAllocator, inOrderAllocatorMutex, *getNEODevice()); } } // namespace L0 \ No newline at end of file diff --git a/level_zero/core/source/device/device.h b/level_zero/core/source/device/device.h index 3fb94db846..5705ec8a21 100644 --- a/level_zero/core/source/device/device.h +++ b/level_zero/core/source/device/device.h @@ -150,10 +150,12 @@ struct Device : _ze_device_handle_t { virtual uint32_t getEventMaxPacketCount() const = 0; virtual uint32_t getEventMaxKernelCount() const = 0; NEO::TagAllocatorBase *getDeviceInOrderCounterAllocator(); + NEO::TagAllocatorBase *getHostInOrderCounterAllocator(); protected: NEO::Device *neoDevice = nullptr; std::unique_ptr deviceInOrderCounterAllocator; + std::unique_ptr hostInOrderCounterAllocator; std::mutex inOrderAllocatorMutex; bool implicitScalingCapable = false; }; diff --git a/level_zero/core/source/device/device_imp.cpp b/level_zero/core/source/device/device_imp.cpp index 0fd7e99926..02ed3ff273 100644 --- a/level_zero/core/source/device/device_imp.cpp +++ b/level_zero/core/source/device/device_imp.cpp @@ -1467,6 +1467,7 @@ void DeviceImp::releaseResources() { builtins.reset(); cacheReservation.reset(); deviceInOrderCounterAllocator.reset(); + hostInOrderCounterAllocator.reset(); if (allocationsForReuse.get()) { allocationsForReuse->freeAllGraphicsAllocations(neoDevice); diff --git a/level_zero/core/test/unit_tests/mocks/mock_device.h b/level_zero/core/test/unit_tests/mocks/mock_device.h index 1d71f28b2d..1dde11e587 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_device.h +++ b/level_zero/core/test/unit_tests/mocks/mock_device.h @@ -105,6 +105,7 @@ struct MockDeviceImp : public L0::DeviceImp { using Base::debugSession; using Base::deviceInOrderCounterAllocator; using Base::getNEODevice; + using Base::hostInOrderCounterAllocator; using Base::implicitScalingCapable; using Base::neoDevice; using Base::subDeviceCopyEngineGroups; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist.cpp index 0e45282f85..6343b11d81 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist.cpp @@ -396,6 +396,39 @@ HWTEST2_F(InOrderCmdListTests, whenCreatingInOrderExecInfoThenReuseDeviceAlloc, tag->returnTag(); } +HWTEST2_F(InOrderCmdListTests, whenCreatingInOrderExecInfoThenReuseHostAlloc, IsAtLeastSkl) { + debugManager.flags.InOrderDuplicatedCounterStorageEnabled.set(1); + + auto tag = device->getHostInOrderCounterAllocator()->getTag(); + + auto immCmdList1 = createImmCmdList(); + auto gpuVa1 = immCmdList1->inOrderExecInfo->getBaseHostAddress(); + + auto immCmdList2 = createImmCmdList(); + auto gpuVa2 = immCmdList2->inOrderExecInfo->getBaseHostAddress(); + + EXPECT_NE(gpuVa1, gpuVa2); + + // allocation from the same allocator + EXPECT_EQ(immCmdList1->inOrderExecInfo->getHostCounterAllocation(), tag->getBaseGraphicsAllocation()->getGraphicsAllocation(0)); + + immCmdList1.reset(); + + auto immCmdList3 = createImmCmdList(); + auto gpuVa3 = immCmdList3->inOrderExecInfo->getBaseHostAddress(); + + EXPECT_EQ(gpuVa1, gpuVa3); + + immCmdList2.reset(); + + auto immCmdList4 = createImmCmdList(); + auto gpuVa4 = immCmdList4->inOrderExecInfo->getBaseHostAddress(); + + EXPECT_EQ(gpuVa2, gpuVa4); + + tag->returnTag(); +} + HWTEST2_F(InOrderCmdListTests, givenInOrderEventWhenAppendEventResetCalledThenReturnError, IsAtLeastSkl) { auto immCmdList = createImmCmdList(); @@ -1573,8 +1606,13 @@ HWTEST2_F(InOrderCmdListTests, givenDebugFlagSetWhenDispatchingThenEnsureHostAll EXPECT_EQ(immCmdList1->inOrderExecInfo->getBaseHostAddress(), immCmdList1->inOrderExecInfo->getHostCounterAllocation()->getUnderlyingBuffer()); EXPECT_FALSE(immCmdList1->inOrderExecInfo->getHostCounterAllocation()->isAllocatedInLocalMemoryPool()); + EXPECT_EQ(immCmdList1->inOrderExecInfo->getHostCounterAllocation(), immCmdList2->inOrderExecInfo->getHostCounterAllocation()); + + auto hostAllocOffset = ptrDiff(immCmdList2->inOrderExecInfo->getBaseHostAddress(), immCmdList1->inOrderExecInfo->getBaseHostAddress()); + EXPECT_NE(0u, hostAllocOffset); + EXPECT_EQ(AllocationType::bufferHostMemory, immCmdList2->inOrderExecInfo->getHostCounterAllocation()->getAllocationType()); - EXPECT_EQ(immCmdList2->inOrderExecInfo->getBaseHostAddress(), immCmdList2->inOrderExecInfo->getHostCounterAllocation()->getUnderlyingBuffer()); + EXPECT_EQ(immCmdList2->inOrderExecInfo->getBaseHostAddress(), ptrOffset(immCmdList2->inOrderExecInfo->getHostCounterAllocation()->getUnderlyingBuffer(), hostAllocOffset)); EXPECT_FALSE(immCmdList2->inOrderExecInfo->getHostCounterAllocation()->isAllocatedInLocalMemoryPool()); immCmdList1->appendLaunchKernel(kernel->toHandle(), groupCount, event0Handle, 0, nullptr, launchParams, false); @@ -1583,8 +1621,7 @@ HWTEST2_F(InOrderCmdListTests, givenDebugFlagSetWhenDispatchingThenEnsureHostAll immCmdList2->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 1, &event0Handle, launchParams, false); - // host allocation not used as Device dependency - EXPECT_EQ(1u, ultCsr->makeResidentAllocations[immCmdList1->inOrderExecInfo->getHostCounterAllocation()]); + EXPECT_EQ(2u, ultCsr->makeResidentAllocations[immCmdList1->inOrderExecInfo->getHostCounterAllocation()]); } HWTEST2_F(InOrderCmdListTests, givenInOrderEventModeWhenSubmittingThenClearEventCsrList, IsAtLeastSkl) { diff --git a/level_zero/core/test/unit_tests/sources/device/test_l0_device.cpp b/level_zero/core/test/unit_tests/sources/device/test_l0_device.cpp index dab1e03955..80d5246574 100644 --- a/level_zero/core/test/unit_tests/sources/device/test_l0_device.cpp +++ b/level_zero/core/test/unit_tests/sources/device/test_l0_device.cpp @@ -837,14 +837,19 @@ TEST_F(DeviceHostPointerTest, givenHostPointerNotAcceptedByKernelAndHostPointerC TEST_F(DeviceTest, whenCreatingDeviceThenCreateInOrderCounterAllocatorOnDemandAndHandleDestruction) { uint32_t destructorId = 0u; - class MyMockDevice : public DestructorCounted { + class MockDeviceTagAllocator : public DestructorCounted>, 0> { public: - MyMockDevice(NEO::ExecutionEnvironment *executionEnvironment, uint32_t rootDeviceIndex, uint32_t &destructorId) : DestructorCounted(destructorId, executionEnvironment, rootDeviceIndex) {} + MockDeviceTagAllocator(uint32_t rootDeviceIndex, MemoryManager *memoryManager, uint32_t &destructorId) : DestructorCounted(destructorId, rootDeviceIndex, memoryManager, 10) {} }; - class MyMockTagAllocator : public DestructorCounted>, 0> { + class MockHostTagAllocator : public DestructorCounted>, 1> { public: - MyMockTagAllocator(uint32_t rootDeviceIndex, MemoryManager *memoryManager, uint32_t &destructorId) : DestructorCounted(destructorId, rootDeviceIndex, memoryManager, 10) {} + MockHostTagAllocator(uint32_t rootDeviceIndex, MemoryManager *memoryManager, uint32_t &destructorId) : DestructorCounted(destructorId, rootDeviceIndex, memoryManager, 10) {} + }; + + class MyMockDevice : public DestructorCounted { + public: + MyMockDevice(NEO::ExecutionEnvironment *executionEnvironment, uint32_t rootDeviceIndex, uint32_t &destructorId) : DestructorCounted(destructorId, executionEnvironment, rootDeviceIndex) {} }; const uint32_t rootDeviceIndex = 0u; @@ -856,11 +861,15 @@ TEST_F(DeviceTest, whenCreatingDeviceThenCreateInOrderCounterAllocatorOnDemandAn neoMockDevice->createDeviceImpl(); { - auto allocator = new MyMockTagAllocator(0, neoMockDevice->getMemoryManager(), destructorId); + auto deviceAllocator = new MockDeviceTagAllocator(0, neoMockDevice->getMemoryManager(), destructorId); + auto hostAllocator = new MockHostTagAllocator(0, neoMockDevice->getMemoryManager(), destructorId); MockDeviceImp deviceImp(neoMockDevice, neoMockDevice->getExecutionEnvironment()); - deviceImp.deviceInOrderCounterAllocator.reset(allocator); - EXPECT_EQ(allocator, deviceImp.getDeviceInOrderCounterAllocator()); + deviceImp.deviceInOrderCounterAllocator.reset(deviceAllocator); + deviceImp.hostInOrderCounterAllocator.reset(hostAllocator); + + EXPECT_EQ(deviceAllocator, deviceImp.getDeviceInOrderCounterAllocator()); + EXPECT_EQ(hostAllocator, deviceImp.getHostInOrderCounterAllocator()); } } diff --git a/level_zero/core/test/unit_tests/sources/event/test_event.cpp b/level_zero/core/test/unit_tests/sources/event/test_event.cpp index efa198e18a..f0dab3ce83 100644 --- a/level_zero/core/test/unit_tests/sources/event/test_event.cpp +++ b/level_zero/core/test/unit_tests/sources/event/test_event.cpp @@ -3425,7 +3425,8 @@ HWTEST_F(EventTests, givenInOrderEventWithHostAllocWhenHostSynchronizeIsCalledTh neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->memoryOperationsInterface = std::make_unique(); - MockTagAllocator> tagAllocator(0, neoDevice->getMemoryManager()); + MockTagAllocator> deviceTagAllocator(0, neoDevice->getMemoryManager()); + MockTagAllocator> hostTagAllocator(0, neoDevice->getMemoryManager()); auto event = zeUniquePtr(whiteboxCast(getHelper().createEvent(eventPool.get(), &eventDesc, device))); @@ -3440,14 +3441,12 @@ HWTEST_F(EventTests, givenInOrderEventWithHostAllocWhenHostSynchronizeIsCalledTh downloadAllocationTrack[&gfxAllocation]++; }; - uint64_t storage2[2] = {1, 1}; + auto deviceMockNode = deviceTagAllocator.getTag(); + auto hostMockNode = hostTagAllocator.getTag(); + auto deviceSyncAllocation = deviceMockNode->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation(); + auto hostSyncAllocation = hostMockNode->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation(); - auto hostSyncAllocation = new NEO::MockGraphicsAllocation(&storage2, sizeof(storage2)); - - auto mockNode = tagAllocator.getTag(); - auto deviceSyncAllocation = mockNode->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation(); - - auto inOrderExecInfo = std::make_shared(mockNode, hostSyncAllocation, *neoDevice->getMemoryManager(), 1, 0, false, false); + auto inOrderExecInfo = std::make_shared(deviceMockNode, hostMockNode, *neoDevice->getMemoryManager(), 1, 0, false, false); *inOrderExecInfo->getBaseHostAddress() = 1; event->enableCounterBasedMode(true, ZE_EVENT_POOL_COUNTER_BASED_EXP_FLAG_IMMEDIATE); diff --git a/shared/source/helpers/in_order_cmd_helpers.cpp b/shared/source/helpers/in_order_cmd_helpers.cpp index 0e605b48b5..ec627f8b16 100644 --- a/shared/source/helpers/in_order_cmd_helpers.cpp +++ b/shared/source/helpers/in_order_cmd_helpers.cpp @@ -19,22 +19,12 @@ namespace NEO { -std::shared_ptr InOrderExecInfo::create(TagNodeBase *deviceCounterNode, NEO::Device &device, uint32_t partitionCount, bool regularCmdList) { - NEO::GraphicsAllocation *hostCounterAllocation = nullptr; - - auto &gfxCoreHelper = device.getGfxCoreHelper(); - bool atomicDeviceSignalling = gfxCoreHelper.inOrderAtomicSignallingEnabled(device.getRootDeviceEnvironment()); - - if (gfxCoreHelper.duplicatedInOrderCounterStorageEnabled(device.getRootDeviceEnvironment())) { - NEO::AllocationProperties hostAllocationProperties{device.getRootDeviceIndex(), MemoryConstants::pageSize64k, NEO::AllocationType::bufferHostMemory, device.getDeviceBitfield()}; - hostCounterAllocation = device.getMemoryManager()->allocateGraphicsMemoryWithProperties(hostAllocationProperties); - - UNRECOVERABLE_IF(!hostCounterAllocation); - } +std::shared_ptr InOrderExecInfo::create(TagNodeBase *deviceCounterNode, TagNodeBase *hostCounterNode, NEO::Device &device, uint32_t partitionCount, bool regularCmdList) { + bool atomicDeviceSignalling = device.getGfxCoreHelper().inOrderAtomicSignallingEnabled(device.getRootDeviceEnvironment()); UNRECOVERABLE_IF(!deviceCounterNode); - return std::make_shared(deviceCounterNode, hostCounterAllocation, *device.getMemoryManager(), partitionCount, device.getRootDeviceIndex(), regularCmdList, atomicDeviceSignalling); + return std::make_shared(deviceCounterNode, hostCounterNode, *device.getMemoryManager(), partitionCount, device.getRootDeviceIndex(), regularCmdList, atomicDeviceSignalling); } std::shared_ptr InOrderExecInfo::createFromExternalAllocation(NEO::Device &device, uint64_t deviceAddress, uint64_t *hostAddress, uint64_t counterValue) { @@ -52,19 +42,21 @@ InOrderExecInfo::~InOrderExecInfo() { if (deviceCounterNode) { deviceCounterNode->returnTag(); } - memoryManager.freeGraphicsMemory(hostCounterAllocation); + if (hostCounterNode) { + hostCounterNode->returnTag(); + } } -InOrderExecInfo::InOrderExecInfo(TagNodeBase *deviceCounterNode, NEO::GraphicsAllocation *hostCounterAllocation, NEO::MemoryManager &memoryManager, uint32_t partitionCount, uint32_t rootDeviceIndex, +InOrderExecInfo::InOrderExecInfo(TagNodeBase *deviceCounterNode, TagNodeBase *hostCounterNode, NEO::MemoryManager &memoryManager, uint32_t partitionCount, uint32_t rootDeviceIndex, bool regularCmdList, bool atomicDeviceSignalling) - : memoryManager(memoryManager), deviceCounterNode(deviceCounterNode), hostCounterAllocation(hostCounterAllocation), rootDeviceIndex(rootDeviceIndex), + : memoryManager(memoryManager), deviceCounterNode(deviceCounterNode), hostCounterNode(hostCounterNode), rootDeviceIndex(rootDeviceIndex), regularCmdList(regularCmdList), atomicDeviceSignalling(atomicDeviceSignalling) { numDevicePartitionsToWait = atomicDeviceSignalling ? 1 : partitionCount; numHostPartitionsToWait = partitionCount; - if (hostCounterAllocation) { - hostAddress = reinterpret_cast(hostCounterAllocation->getUnderlyingBuffer()); + if (hostCounterNode) { + hostAddress = reinterpret_cast(hostCounterNode->getCpuBase()); duplicatedHostStorage = true; } else if (deviceCounterNode) { hostAddress = reinterpret_cast(deviceCounterNode->getCpuBase()); @@ -83,9 +75,9 @@ void InOrderExecInfo::initializeAllocationsFromHost() { memset(ptrOffset(deviceCounterNode->getCpuBase(), allocationOffset), 0, deviceAllocationWriteSize); } - if (hostCounterAllocation) { + if (hostCounterNode) { const size_t hostAllocationWriteSize = sizeof(uint64_t) * numHostPartitionsToWait; - memset(ptrOffset(hostCounterAllocation->getUnderlyingBuffer(), allocationOffset), 0, hostAllocationWriteSize); + memset(ptrOffset(hostCounterNode->getCpuBase(), allocationOffset), 0, hostAllocationWriteSize); } } @@ -101,4 +93,12 @@ NEO::GraphicsAllocation *InOrderExecInfo::getDeviceCounterAllocation() const { return deviceCounterNode ? deviceCounterNode->getBaseGraphicsAllocation()->getGraphicsAllocation(rootDeviceIndex) : nullptr; } +NEO::GraphicsAllocation *InOrderExecInfo::getHostCounterAllocation() const { + return hostCounterNode ? hostCounterNode->getBaseGraphicsAllocation()->getGraphicsAllocation(rootDeviceIndex) : nullptr; +} + +uint64_t InOrderExecInfo::getBaseHostGpuAddress() const { + return hostCounterNode->getGpuAddress(); +} + } // namespace NEO diff --git a/shared/source/helpers/in_order_cmd_helpers.h b/shared/source/helpers/in_order_cmd_helpers.h index a3e79543d3..ef6e863c46 100644 --- a/shared/source/helpers/in_order_cmd_helpers.h +++ b/shared/source/helpers/in_order_cmd_helpers.h @@ -48,16 +48,17 @@ class InOrderExecInfo : public NEO::NonCopyableClass { InOrderExecInfo() = delete; - static std::shared_ptr create(TagNodeBase *deviceCounterNode, NEO::Device &device, uint32_t partitionCount, bool regularCmdList); + static std::shared_ptr create(TagNodeBase *deviceCounterNode, TagNodeBase *hostCounterNode, NEO::Device &device, uint32_t partitionCount, bool regularCmdList); static std::shared_ptr createFromExternalAllocation(NEO::Device &device, uint64_t deviceAddress, uint64_t *hostAddress, uint64_t counterValue); - InOrderExecInfo(TagNodeBase *deviceCounterNode, NEO::GraphicsAllocation *hostCounterAllocation, NEO::MemoryManager &memoryManager, uint32_t partitionCount, uint32_t rootDeviceIndex, + InOrderExecInfo(TagNodeBase *deviceCounterNode, TagNodeBase *hostCounterNode, NEO::MemoryManager &memoryManager, uint32_t partitionCount, uint32_t rootDeviceIndex, bool regularCmdList, bool atomicDeviceSignalling); NEO::GraphicsAllocation *getDeviceCounterAllocation() const; - NEO::GraphicsAllocation *getHostCounterAllocation() const { return hostCounterAllocation; } + NEO::GraphicsAllocation *getHostCounterAllocation() const; uint64_t *getBaseHostAddress() const { return hostAddress; } uint64_t getBaseDeviceAddress() const { return deviceAddress; } + uint64_t getBaseHostGpuAddress() const; uint64_t getCounterValue() const { return counterValue; } void addCounterValue(uint64_t addValue) { counterValue += addValue; } @@ -82,7 +83,7 @@ class InOrderExecInfo : public NEO::NonCopyableClass { protected: NEO::MemoryManager &memoryManager; NEO::TagNodeBase *deviceCounterNode = nullptr; - NEO::GraphicsAllocation *hostCounterAllocation = nullptr; + NEO::TagNodeBase *hostCounterNode = nullptr; uint64_t counterValue = 0; uint64_t regularCmdListSubmissionCounter = 0; uint64_t deviceAddress = 0; diff --git a/shared/test/unit_test/command_container/command_encoder_tests.cpp b/shared/test/unit_test/command_container/command_encoder_tests.cpp index a437db52e0..a7cb2e069c 100644 --- a/shared/test/unit_test/command_container/command_encoder_tests.cpp +++ b/shared/test/unit_test/command_container/command_encoder_tests.cpp @@ -75,20 +75,20 @@ HWTEST_F(CommandEncoderTests, givenDifferentInputParamsWhenCreatingStandaloneInO HWTEST_F(CommandEncoderTests, givenDifferentInputParamsWhenCreatingInOrderExecInfoThenSetupCorrectly) { MockDevice mockDevice; - MockTagAllocator> tagAllocator(0, mockDevice.getMemoryManager()); + MockTagAllocator> deviceTagAllocator(0, mockDevice.getMemoryManager()); + MockTagAllocator> hostTagAllocator(0, mockDevice.getMemoryManager()); auto &memoryManager = *mockDevice.getMemoryManager(); - auto tempNode = tagAllocator.getTag(); - - uint64_t storage2[2] = {1, 1}; + auto tempNode1 = deviceTagAllocator.getTag(); + auto tempNode2 = hostTagAllocator.getTag(); { - auto deviceNode = tagAllocator.getTag(); + auto deviceNode = deviceTagAllocator.getTag(); EXPECT_NE(deviceNode->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation()->getGpuAddress(), deviceNode->getGpuAddress()); EXPECT_NE(deviceNode->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation()->getUnderlyingBuffer(), deviceNode->getCpuBase()); - auto inOrderExecInfo = InOrderExecInfo::create(deviceNode, mockDevice, 2, false); + auto inOrderExecInfo = InOrderExecInfo::create(deviceNode, nullptr, mockDevice, 2, false); EXPECT_EQ(deviceNode->getCpuBase(), inOrderExecInfo->getBaseHostAddress()); EXPECT_EQ(deviceNode->getBaseGraphicsAllocation()->getGraphicsAllocation(0), inOrderExecInfo->getDeviceCounterAllocation()); @@ -103,7 +103,7 @@ HWTEST_F(CommandEncoderTests, givenDifferentInputParamsWhenCreatingInOrderExecIn } { - auto deviceNode = tagAllocator.getTag(); + auto deviceNode = deviceTagAllocator.getTag(); InOrderExecInfo inOrderExecInfo(deviceNode, nullptr, memoryManager, 2, 0, true, true); EXPECT_TRUE(inOrderExecInfo.isRegularCmdList()); @@ -113,26 +113,29 @@ HWTEST_F(CommandEncoderTests, givenDifferentInputParamsWhenCreatingInOrderExecIn } { - auto deviceNode = tagAllocator.getTag(); + auto deviceNode = deviceTagAllocator.getTag(); + auto hostNode = hostTagAllocator.getTag(); + auto offset = ptrDiff(hostNode->getCpuBase(), tempNode2->getCpuBase()); DebugManagerStateRestore restore; debugManager.flags.InOrderDuplicatedCounterStorageEnabled.set(1); - auto inOrderExecInfo = InOrderExecInfo::create(deviceNode, mockDevice, 2, false); + auto inOrderExecInfo = InOrderExecInfo::create(deviceNode, hostNode, mockDevice, 2, false); + EXPECT_EQ(inOrderExecInfo->getBaseHostGpuAddress(), hostNode->getGpuAddress()); EXPECT_NE(inOrderExecInfo->getDeviceCounterAllocation(), inOrderExecInfo->getHostCounterAllocation()); EXPECT_NE(deviceNode->getBaseGraphicsAllocation()->getGraphicsAllocation(0), inOrderExecInfo->getHostCounterAllocation()); EXPECT_NE(deviceNode->getCpuBase(), inOrderExecInfo->getBaseHostAddress()); - EXPECT_EQ(inOrderExecInfo->getHostCounterAllocation()->getUnderlyingBuffer(), inOrderExecInfo->getBaseHostAddress()); + EXPECT_EQ(ptrOffset(inOrderExecInfo->getHostCounterAllocation()->getUnderlyingBuffer(), offset), inOrderExecInfo->getBaseHostAddress()); EXPECT_TRUE(inOrderExecInfo->isHostStorageDuplicated()); } { - auto deviceNode = tagAllocator.getTag(); + auto deviceNode = deviceTagAllocator.getTag(); + auto hostNode = hostTagAllocator.getTag(); - auto hostSyncAllocation = new MockGraphicsAllocation(&storage2, sizeof(storage2)); - InOrderExecInfo inOrderExecInfo(deviceNode, hostSyncAllocation, memoryManager, 1, 0, false, false); + InOrderExecInfo inOrderExecInfo(deviceNode, hostNode, memoryManager, 1, 0, false, false); auto deviceAllocHostAddress = reinterpret_cast(deviceNode->getCpuBase()); EXPECT_EQ(0u, inOrderExecInfo.getCounterValue()); EXPECT_EQ(0u, inOrderExecInfo.getRegularCmdListSubmissionCounter()); @@ -160,7 +163,7 @@ HWTEST_F(CommandEncoderTests, givenDifferentInputParamsWhenCreatingInOrderExecIn } { - auto deviceNode = tagAllocator.getTag(); + auto deviceNode = deviceTagAllocator.getTag(); InOrderExecInfo inOrderExecInfo(deviceNode, nullptr, memoryManager, 2, 0, true, false); @@ -175,7 +178,8 @@ HWTEST_F(CommandEncoderTests, givenDifferentInputParamsWhenCreatingInOrderExecIn EXPECT_EQ(4u, InOrderPatchCommandHelpers::getAppendCounterValue(inOrderExecInfo)); } - tempNode->returnTag(); + tempNode1->returnTag(); + tempNode2->returnTag(); } HWTEST_F(CommandEncoderTests, givenInOrderExecInfoWhenPatchingThenSetCorrectValues) {