feature: in-order host counter allocation pooling

Related-To: NEO-10507

Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz 2024-03-05 16:52:05 +00:00 committed by Compute-Runtime-Automation
parent 4af5851778
commit 2f03c48c7a
12 changed files with 137 additions and 68 deletions

View File

@ -2575,7 +2575,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(
} }
if (inOrderExecInfo->isHostStorageDuplicated()) { if (inOrderExecInfo->isHostStorageDuplicated()) {
appendSdiInOrderCounterSignalling(inOrderExecInfo->getHostCounterAllocation()->getGpuAddress(), signalValue); appendSdiInOrderCounterSignalling(inOrderExecInfo->getBaseHostGpuAddress(), signalValue);
} }
if ((NEO::debugManager.flags.ProgramUserInterruptOnResolvedDependency.get() == 1) && signalEvent && signalEvent->isInterruptModeEnabled()) { if ((NEO::debugManager.flags.ProgramUserInterruptOnResolvedDependency.get() == 1) && signalEvent && signalEvent->isInterruptModeEnabled()) {

View File

@ -259,8 +259,15 @@ void CommandListImp::enableInOrderExecution() {
UNRECOVERABLE_IF(inOrderExecInfo.get()); UNRECOVERABLE_IF(inOrderExecInfo.get());
auto deviceCounterNode = this->device->getDeviceInOrderCounterAllocator()->getTag(); auto deviceCounterNode = this->device->getDeviceInOrderCounterAllocator()->getTag();
NEO::TagNodeBase *hostCounterNode = nullptr;
inOrderExecInfo = NEO::InOrderExecInfo::create(deviceCounterNode, *this->device->getNEODevice(), this->partitionCount, !isImmediateType()); auto &gfxCoreHelper = device->getGfxCoreHelper();
if (gfxCoreHelper.duplicatedInOrderCounterStorageEnabled(device->getNEODevice()->getRootDeviceEnvironment())) {
hostCounterNode = this->device->getHostInOrderCounterAllocator()->getTag();
}
inOrderExecInfo = NEO::InOrderExecInfo::create(deviceCounterNode, hostCounterNode, *this->device->getNEODevice(), this->partitionCount, !isImmediateType());
} }
void CommandListImp::storeReferenceTsToMappedEvents(bool isClearEnabled) { void CommandListImp::storeReferenceTsToMappedEvents(bool isClearEnabled) {

View File

@ -21,26 +21,34 @@ NEO::DebuggerL0 *Device::getL0Debugger() {
return getNEODevice()->getL0Debugger(); return getNEODevice()->getL0Debugger();
} }
NEO::TagAllocatorBase *Device::getDeviceInOrderCounterAllocator() { template <typename NodeT>
if (!deviceInOrderCounterAllocator.get()) { NEO::TagAllocatorBase *getInOrderCounterAllocator(std::unique_ptr<NEO::TagAllocatorBase> &allocator, std::mutex &inOrderAllocatorMutex, NEO::Device &neoDevice) {
if (!allocator.get()) {
std::unique_lock<std::mutex> lock(inOrderAllocatorMutex); std::unique_lock<std::mutex> lock(inOrderAllocatorMutex);
if (!deviceInOrderCounterAllocator.get()) { if (!allocator.get()) {
using NodeT = typename NEO::DeviceAllocNodeType<true>; RootDeviceIndicesContainer rootDeviceIndices = {neoDevice.getRootDeviceIndex()};
RootDeviceIndicesContainer rootDeviceIndices = {getRootDeviceIndex()};
const size_t maxPartitionCount = getNEODevice()->getDeviceBitfield().count(); const size_t maxPartitionCount = neoDevice.getDeviceBitfield().count();
const size_t nodeSize = sizeof(uint64_t) * maxPartitionCount * 2; // Multiplied by 2 to handle 32b overflow const size_t nodeSize = sizeof(uint64_t) * maxPartitionCount * 2; // Multiplied by 2 to handle 32b overflow
DEBUG_BREAK_IF(alignUp(nodeSize, MemoryConstants::cacheLineSize) * NodeT::defaultAllocatorTagCount > MemoryConstants::pageSize64k); DEBUG_BREAK_IF(alignUp(nodeSize, MemoryConstants::cacheLineSize) * NodeT::defaultAllocatorTagCount > MemoryConstants::pageSize64k);
deviceInOrderCounterAllocator = std::make_unique<NEO::TagAllocator<NodeT>>(rootDeviceIndices, neoDevice->getMemoryManager(), NodeT::defaultAllocatorTagCount, allocator = std::make_unique<NEO::TagAllocator<NodeT>>(rootDeviceIndices, neoDevice.getMemoryManager(), NodeT::defaultAllocatorTagCount,
MemoryConstants::cacheLineSize, nodeSize, false, neoDevice->getDeviceBitfield()); MemoryConstants::cacheLineSize, nodeSize, false, neoDevice.getDeviceBitfield());
} }
} }
return deviceInOrderCounterAllocator.get(); return allocator.get();
}
NEO::TagAllocatorBase *Device::getDeviceInOrderCounterAllocator() {
return getInOrderCounterAllocator<NEO::DeviceAllocNodeType<true>>(deviceInOrderCounterAllocator, inOrderAllocatorMutex, *getNEODevice());
}
NEO::TagAllocatorBase *Device::getHostInOrderCounterAllocator() {
return getInOrderCounterAllocator<NEO::DeviceAllocNodeType<false>>(hostInOrderCounterAllocator, inOrderAllocatorMutex, *getNEODevice());
} }
} // namespace L0 } // namespace L0

View File

@ -150,10 +150,12 @@ struct Device : _ze_device_handle_t {
virtual uint32_t getEventMaxPacketCount() const = 0; virtual uint32_t getEventMaxPacketCount() const = 0;
virtual uint32_t getEventMaxKernelCount() const = 0; virtual uint32_t getEventMaxKernelCount() const = 0;
NEO::TagAllocatorBase *getDeviceInOrderCounterAllocator(); NEO::TagAllocatorBase *getDeviceInOrderCounterAllocator();
NEO::TagAllocatorBase *getHostInOrderCounterAllocator();
protected: protected:
NEO::Device *neoDevice = nullptr; NEO::Device *neoDevice = nullptr;
std::unique_ptr<NEO::TagAllocatorBase> deviceInOrderCounterAllocator; std::unique_ptr<NEO::TagAllocatorBase> deviceInOrderCounterAllocator;
std::unique_ptr<NEO::TagAllocatorBase> hostInOrderCounterAllocator;
std::mutex inOrderAllocatorMutex; std::mutex inOrderAllocatorMutex;
bool implicitScalingCapable = false; bool implicitScalingCapable = false;
}; };

View File

@ -1468,6 +1468,7 @@ void DeviceImp::releaseResources() {
builtins.reset(); builtins.reset();
cacheReservation.reset(); cacheReservation.reset();
deviceInOrderCounterAllocator.reset(); deviceInOrderCounterAllocator.reset();
hostInOrderCounterAllocator.reset();
if (allocationsForReuse.get()) { if (allocationsForReuse.get()) {
allocationsForReuse->freeAllGraphicsAllocations(neoDevice); allocationsForReuse->freeAllGraphicsAllocations(neoDevice);

View File

@ -105,6 +105,7 @@ struct MockDeviceImp : public L0::DeviceImp {
using Base::debugSession; using Base::debugSession;
using Base::deviceInOrderCounterAllocator; using Base::deviceInOrderCounterAllocator;
using Base::getNEODevice; using Base::getNEODevice;
using Base::hostInOrderCounterAllocator;
using Base::implicitScalingCapable; using Base::implicitScalingCapable;
using Base::neoDevice; using Base::neoDevice;
using Base::subDeviceCopyEngineGroups; using Base::subDeviceCopyEngineGroups;

View File

@ -396,6 +396,39 @@ HWTEST2_F(InOrderCmdListTests, whenCreatingInOrderExecInfoThenReuseDeviceAlloc,
tag->returnTag(); tag->returnTag();
} }
HWTEST2_F(InOrderCmdListTests, whenCreatingInOrderExecInfoThenReuseHostAlloc, IsAtLeastSkl) {
debugManager.flags.InOrderDuplicatedCounterStorageEnabled.set(1);
auto tag = device->getHostInOrderCounterAllocator()->getTag();
auto immCmdList1 = createImmCmdList<gfxCoreFamily>();
auto gpuVa1 = immCmdList1->inOrderExecInfo->getBaseHostAddress();
auto immCmdList2 = createImmCmdList<gfxCoreFamily>();
auto gpuVa2 = immCmdList2->inOrderExecInfo->getBaseHostAddress();
EXPECT_NE(gpuVa1, gpuVa2);
// allocation from the same allocator
EXPECT_EQ(immCmdList1->inOrderExecInfo->getHostCounterAllocation(), tag->getBaseGraphicsAllocation()->getGraphicsAllocation(0));
immCmdList1.reset();
auto immCmdList3 = createImmCmdList<gfxCoreFamily>();
auto gpuVa3 = immCmdList3->inOrderExecInfo->getBaseHostAddress();
EXPECT_EQ(gpuVa1, gpuVa3);
immCmdList2.reset();
auto immCmdList4 = createImmCmdList<gfxCoreFamily>();
auto gpuVa4 = immCmdList4->inOrderExecInfo->getBaseHostAddress();
EXPECT_EQ(gpuVa2, gpuVa4);
tag->returnTag();
}
HWTEST2_F(InOrderCmdListTests, givenInOrderEventWhenAppendEventResetCalledThenReturnError, IsAtLeastSkl) { HWTEST2_F(InOrderCmdListTests, givenInOrderEventWhenAppendEventResetCalledThenReturnError, IsAtLeastSkl) {
auto immCmdList = createImmCmdList<gfxCoreFamily>(); auto immCmdList = createImmCmdList<gfxCoreFamily>();
@ -1573,8 +1606,13 @@ HWTEST2_F(InOrderCmdListTests, givenDebugFlagSetWhenDispatchingThenEnsureHostAll
EXPECT_EQ(immCmdList1->inOrderExecInfo->getBaseHostAddress(), immCmdList1->inOrderExecInfo->getHostCounterAllocation()->getUnderlyingBuffer()); EXPECT_EQ(immCmdList1->inOrderExecInfo->getBaseHostAddress(), immCmdList1->inOrderExecInfo->getHostCounterAllocation()->getUnderlyingBuffer());
EXPECT_FALSE(immCmdList1->inOrderExecInfo->getHostCounterAllocation()->isAllocatedInLocalMemoryPool()); EXPECT_FALSE(immCmdList1->inOrderExecInfo->getHostCounterAllocation()->isAllocatedInLocalMemoryPool());
EXPECT_EQ(immCmdList1->inOrderExecInfo->getHostCounterAllocation(), immCmdList2->inOrderExecInfo->getHostCounterAllocation());
auto hostAllocOffset = ptrDiff(immCmdList2->inOrderExecInfo->getBaseHostAddress(), immCmdList1->inOrderExecInfo->getBaseHostAddress());
EXPECT_NE(0u, hostAllocOffset);
EXPECT_EQ(AllocationType::bufferHostMemory, immCmdList2->inOrderExecInfo->getHostCounterAllocation()->getAllocationType()); EXPECT_EQ(AllocationType::bufferHostMemory, immCmdList2->inOrderExecInfo->getHostCounterAllocation()->getAllocationType());
EXPECT_EQ(immCmdList2->inOrderExecInfo->getBaseHostAddress(), immCmdList2->inOrderExecInfo->getHostCounterAllocation()->getUnderlyingBuffer()); EXPECT_EQ(immCmdList2->inOrderExecInfo->getBaseHostAddress(), ptrOffset(immCmdList2->inOrderExecInfo->getHostCounterAllocation()->getUnderlyingBuffer(), hostAllocOffset));
EXPECT_FALSE(immCmdList2->inOrderExecInfo->getHostCounterAllocation()->isAllocatedInLocalMemoryPool()); EXPECT_FALSE(immCmdList2->inOrderExecInfo->getHostCounterAllocation()->isAllocatedInLocalMemoryPool());
immCmdList1->appendLaunchKernel(kernel->toHandle(), groupCount, event0Handle, 0, nullptr, launchParams, false); immCmdList1->appendLaunchKernel(kernel->toHandle(), groupCount, event0Handle, 0, nullptr, launchParams, false);
@ -1583,8 +1621,7 @@ HWTEST2_F(InOrderCmdListTests, givenDebugFlagSetWhenDispatchingThenEnsureHostAll
immCmdList2->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 1, &event0Handle, launchParams, false); immCmdList2->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 1, &event0Handle, launchParams, false);
// host allocation not used as Device dependency EXPECT_EQ(2u, ultCsr->makeResidentAllocations[immCmdList1->inOrderExecInfo->getHostCounterAllocation()]);
EXPECT_EQ(1u, ultCsr->makeResidentAllocations[immCmdList1->inOrderExecInfo->getHostCounterAllocation()]);
} }
HWTEST2_F(InOrderCmdListTests, givenInOrderEventModeWhenSubmittingThenClearEventCsrList, IsAtLeastSkl) { HWTEST2_F(InOrderCmdListTests, givenInOrderEventModeWhenSubmittingThenClearEventCsrList, IsAtLeastSkl) {

View File

@ -837,14 +837,19 @@ TEST_F(DeviceHostPointerTest, givenHostPointerNotAcceptedByKernelAndHostPointerC
TEST_F(DeviceTest, whenCreatingDeviceThenCreateInOrderCounterAllocatorOnDemandAndHandleDestruction) { TEST_F(DeviceTest, whenCreatingDeviceThenCreateInOrderCounterAllocatorOnDemandAndHandleDestruction) {
uint32_t destructorId = 0u; uint32_t destructorId = 0u;
class MyMockDevice : public DestructorCounted<NEO::MockDevice, 1> { class MockDeviceTagAllocator : public DestructorCounted<MockTagAllocator<NEO::DeviceAllocNodeType<true>>, 0> {
public: public:
MyMockDevice(NEO::ExecutionEnvironment *executionEnvironment, uint32_t rootDeviceIndex, uint32_t &destructorId) : DestructorCounted(destructorId, executionEnvironment, rootDeviceIndex) {} MockDeviceTagAllocator(uint32_t rootDeviceIndex, MemoryManager *memoryManager, uint32_t &destructorId) : DestructorCounted(destructorId, rootDeviceIndex, memoryManager, 10) {}
}; };
class MyMockTagAllocator : public DestructorCounted<MockTagAllocator<NEO::DeviceAllocNodeType<true>>, 0> { class MockHostTagAllocator : public DestructorCounted<MockTagAllocator<NEO::DeviceAllocNodeType<true>>, 1> {
public: public:
MyMockTagAllocator(uint32_t rootDeviceIndex, MemoryManager *memoryManager, uint32_t &destructorId) : DestructorCounted(destructorId, rootDeviceIndex, memoryManager, 10) {} MockHostTagAllocator(uint32_t rootDeviceIndex, MemoryManager *memoryManager, uint32_t &destructorId) : DestructorCounted(destructorId, rootDeviceIndex, memoryManager, 10) {}
};
class MyMockDevice : public DestructorCounted<NEO::MockDevice, 2> {
public:
MyMockDevice(NEO::ExecutionEnvironment *executionEnvironment, uint32_t rootDeviceIndex, uint32_t &destructorId) : DestructorCounted(destructorId, executionEnvironment, rootDeviceIndex) {}
}; };
const uint32_t rootDeviceIndex = 0u; const uint32_t rootDeviceIndex = 0u;
@ -856,11 +861,15 @@ TEST_F(DeviceTest, whenCreatingDeviceThenCreateInOrderCounterAllocatorOnDemandAn
neoMockDevice->createDeviceImpl(); neoMockDevice->createDeviceImpl();
{ {
auto allocator = new MyMockTagAllocator(0, neoMockDevice->getMemoryManager(), destructorId); auto deviceAllocator = new MockDeviceTagAllocator(0, neoMockDevice->getMemoryManager(), destructorId);
auto hostAllocator = new MockHostTagAllocator(0, neoMockDevice->getMemoryManager(), destructorId);
MockDeviceImp deviceImp(neoMockDevice, neoMockDevice->getExecutionEnvironment()); MockDeviceImp deviceImp(neoMockDevice, neoMockDevice->getExecutionEnvironment());
deviceImp.deviceInOrderCounterAllocator.reset(allocator); deviceImp.deviceInOrderCounterAllocator.reset(deviceAllocator);
EXPECT_EQ(allocator, deviceImp.getDeviceInOrderCounterAllocator()); deviceImp.hostInOrderCounterAllocator.reset(hostAllocator);
EXPECT_EQ(deviceAllocator, deviceImp.getDeviceInOrderCounterAllocator());
EXPECT_EQ(hostAllocator, deviceImp.getHostInOrderCounterAllocator());
} }
} }

View File

@ -3425,7 +3425,8 @@ HWTEST_F(EventTests, givenInOrderEventWithHostAllocWhenHostSynchronizeIsCalledTh
neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->memoryOperationsInterface = neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->memoryOperationsInterface =
std::make_unique<NEO::MockMemoryOperations>(); std::make_unique<NEO::MockMemoryOperations>();
MockTagAllocator<DeviceAllocNodeType<true>> tagAllocator(0, neoDevice->getMemoryManager()); MockTagAllocator<DeviceAllocNodeType<true>> deviceTagAllocator(0, neoDevice->getMemoryManager());
MockTagAllocator<DeviceAllocNodeType<true>> hostTagAllocator(0, neoDevice->getMemoryManager());
auto event = zeUniquePtr(whiteboxCast(getHelper<L0GfxCoreHelper>().createEvent(eventPool.get(), &eventDesc, device))); auto event = zeUniquePtr(whiteboxCast(getHelper<L0GfxCoreHelper>().createEvent(eventPool.get(), &eventDesc, device)));
@ -3440,14 +3441,12 @@ HWTEST_F(EventTests, givenInOrderEventWithHostAllocWhenHostSynchronizeIsCalledTh
downloadAllocationTrack[&gfxAllocation]++; downloadAllocationTrack[&gfxAllocation]++;
}; };
uint64_t storage2[2] = {1, 1}; auto deviceMockNode = deviceTagAllocator.getTag();
auto hostMockNode = hostTagAllocator.getTag();
auto deviceSyncAllocation = deviceMockNode->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation();
auto hostSyncAllocation = hostMockNode->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation();
auto hostSyncAllocation = new NEO::MockGraphicsAllocation(&storage2, sizeof(storage2)); auto inOrderExecInfo = std::make_shared<NEO::InOrderExecInfo>(deviceMockNode, hostMockNode, *neoDevice->getMemoryManager(), 1, 0, false, false);
auto mockNode = tagAllocator.getTag();
auto deviceSyncAllocation = mockNode->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation();
auto inOrderExecInfo = std::make_shared<NEO::InOrderExecInfo>(mockNode, hostSyncAllocation, *neoDevice->getMemoryManager(), 1, 0, false, false);
*inOrderExecInfo->getBaseHostAddress() = 1; *inOrderExecInfo->getBaseHostAddress() = 1;
event->enableCounterBasedMode(true, ZE_EVENT_POOL_COUNTER_BASED_EXP_FLAG_IMMEDIATE); event->enableCounterBasedMode(true, ZE_EVENT_POOL_COUNTER_BASED_EXP_FLAG_IMMEDIATE);

View File

@ -19,22 +19,12 @@
namespace NEO { namespace NEO {
std::shared_ptr<InOrderExecInfo> InOrderExecInfo::create(TagNodeBase *deviceCounterNode, NEO::Device &device, uint32_t partitionCount, bool regularCmdList) { std::shared_ptr<InOrderExecInfo> InOrderExecInfo::create(TagNodeBase *deviceCounterNode, TagNodeBase *hostCounterNode, NEO::Device &device, uint32_t partitionCount, bool regularCmdList) {
NEO::GraphicsAllocation *hostCounterAllocation = nullptr; bool atomicDeviceSignalling = device.getGfxCoreHelper().inOrderAtomicSignallingEnabled(device.getRootDeviceEnvironment());
auto &gfxCoreHelper = device.getGfxCoreHelper();
bool atomicDeviceSignalling = gfxCoreHelper.inOrderAtomicSignallingEnabled(device.getRootDeviceEnvironment());
if (gfxCoreHelper.duplicatedInOrderCounterStorageEnabled(device.getRootDeviceEnvironment())) {
NEO::AllocationProperties hostAllocationProperties{device.getRootDeviceIndex(), MemoryConstants::pageSize64k, NEO::AllocationType::bufferHostMemory, device.getDeviceBitfield()};
hostCounterAllocation = device.getMemoryManager()->allocateGraphicsMemoryWithProperties(hostAllocationProperties);
UNRECOVERABLE_IF(!hostCounterAllocation);
}
UNRECOVERABLE_IF(!deviceCounterNode); UNRECOVERABLE_IF(!deviceCounterNode);
return std::make_shared<NEO::InOrderExecInfo>(deviceCounterNode, hostCounterAllocation, *device.getMemoryManager(), partitionCount, device.getRootDeviceIndex(), regularCmdList, atomicDeviceSignalling); return std::make_shared<NEO::InOrderExecInfo>(deviceCounterNode, hostCounterNode, *device.getMemoryManager(), partitionCount, device.getRootDeviceIndex(), regularCmdList, atomicDeviceSignalling);
} }
std::shared_ptr<InOrderExecInfo> InOrderExecInfo::createFromExternalAllocation(NEO::Device &device, uint64_t deviceAddress, uint64_t *hostAddress, uint64_t counterValue) { std::shared_ptr<InOrderExecInfo> InOrderExecInfo::createFromExternalAllocation(NEO::Device &device, uint64_t deviceAddress, uint64_t *hostAddress, uint64_t counterValue) {
@ -52,19 +42,21 @@ InOrderExecInfo::~InOrderExecInfo() {
if (deviceCounterNode) { if (deviceCounterNode) {
deviceCounterNode->returnTag(); deviceCounterNode->returnTag();
} }
memoryManager.freeGraphicsMemory(hostCounterAllocation); if (hostCounterNode) {
hostCounterNode->returnTag();
}
} }
InOrderExecInfo::InOrderExecInfo(TagNodeBase *deviceCounterNode, NEO::GraphicsAllocation *hostCounterAllocation, NEO::MemoryManager &memoryManager, uint32_t partitionCount, uint32_t rootDeviceIndex, InOrderExecInfo::InOrderExecInfo(TagNodeBase *deviceCounterNode, TagNodeBase *hostCounterNode, NEO::MemoryManager &memoryManager, uint32_t partitionCount, uint32_t rootDeviceIndex,
bool regularCmdList, bool atomicDeviceSignalling) bool regularCmdList, bool atomicDeviceSignalling)
: memoryManager(memoryManager), deviceCounterNode(deviceCounterNode), hostCounterAllocation(hostCounterAllocation), rootDeviceIndex(rootDeviceIndex), : memoryManager(memoryManager), deviceCounterNode(deviceCounterNode), hostCounterNode(hostCounterNode), rootDeviceIndex(rootDeviceIndex),
regularCmdList(regularCmdList), atomicDeviceSignalling(atomicDeviceSignalling) { regularCmdList(regularCmdList), atomicDeviceSignalling(atomicDeviceSignalling) {
numDevicePartitionsToWait = atomicDeviceSignalling ? 1 : partitionCount; numDevicePartitionsToWait = atomicDeviceSignalling ? 1 : partitionCount;
numHostPartitionsToWait = partitionCount; numHostPartitionsToWait = partitionCount;
if (hostCounterAllocation) { if (hostCounterNode) {
hostAddress = reinterpret_cast<uint64_t *>(hostCounterAllocation->getUnderlyingBuffer()); hostAddress = reinterpret_cast<uint64_t *>(hostCounterNode->getCpuBase());
duplicatedHostStorage = true; duplicatedHostStorage = true;
} else if (deviceCounterNode) { } else if (deviceCounterNode) {
hostAddress = reinterpret_cast<uint64_t *>(deviceCounterNode->getCpuBase()); hostAddress = reinterpret_cast<uint64_t *>(deviceCounterNode->getCpuBase());
@ -83,9 +75,9 @@ void InOrderExecInfo::initializeAllocationsFromHost() {
memset(ptrOffset(deviceCounterNode->getCpuBase(), allocationOffset), 0, deviceAllocationWriteSize); memset(ptrOffset(deviceCounterNode->getCpuBase(), allocationOffset), 0, deviceAllocationWriteSize);
} }
if (hostCounterAllocation) { if (hostCounterNode) {
const size_t hostAllocationWriteSize = sizeof(uint64_t) * numHostPartitionsToWait; const size_t hostAllocationWriteSize = sizeof(uint64_t) * numHostPartitionsToWait;
memset(ptrOffset(hostCounterAllocation->getUnderlyingBuffer(), allocationOffset), 0, hostAllocationWriteSize); memset(ptrOffset(hostCounterNode->getCpuBase(), allocationOffset), 0, hostAllocationWriteSize);
} }
} }
@ -101,4 +93,12 @@ NEO::GraphicsAllocation *InOrderExecInfo::getDeviceCounterAllocation() const {
return deviceCounterNode ? deviceCounterNode->getBaseGraphicsAllocation()->getGraphicsAllocation(rootDeviceIndex) : nullptr; return deviceCounterNode ? deviceCounterNode->getBaseGraphicsAllocation()->getGraphicsAllocation(rootDeviceIndex) : nullptr;
} }
NEO::GraphicsAllocation *InOrderExecInfo::getHostCounterAllocation() const {
return hostCounterNode ? hostCounterNode->getBaseGraphicsAllocation()->getGraphicsAllocation(rootDeviceIndex) : nullptr;
}
uint64_t InOrderExecInfo::getBaseHostGpuAddress() const {
return hostCounterNode->getGpuAddress();
}
} // namespace NEO } // namespace NEO

View File

@ -48,16 +48,17 @@ class InOrderExecInfo : public NEO::NonCopyableClass {
InOrderExecInfo() = delete; InOrderExecInfo() = delete;
static std::shared_ptr<InOrderExecInfo> create(TagNodeBase *deviceCounterNode, NEO::Device &device, uint32_t partitionCount, bool regularCmdList); static std::shared_ptr<InOrderExecInfo> create(TagNodeBase *deviceCounterNode, TagNodeBase *hostCounterNode, NEO::Device &device, uint32_t partitionCount, bool regularCmdList);
static std::shared_ptr<InOrderExecInfo> createFromExternalAllocation(NEO::Device &device, uint64_t deviceAddress, uint64_t *hostAddress, uint64_t counterValue); static std::shared_ptr<InOrderExecInfo> createFromExternalAllocation(NEO::Device &device, uint64_t deviceAddress, uint64_t *hostAddress, uint64_t counterValue);
InOrderExecInfo(TagNodeBase *deviceCounterNode, NEO::GraphicsAllocation *hostCounterAllocation, NEO::MemoryManager &memoryManager, uint32_t partitionCount, uint32_t rootDeviceIndex, InOrderExecInfo(TagNodeBase *deviceCounterNode, TagNodeBase *hostCounterNode, NEO::MemoryManager &memoryManager, uint32_t partitionCount, uint32_t rootDeviceIndex,
bool regularCmdList, bool atomicDeviceSignalling); bool regularCmdList, bool atomicDeviceSignalling);
NEO::GraphicsAllocation *getDeviceCounterAllocation() const; NEO::GraphicsAllocation *getDeviceCounterAllocation() const;
NEO::GraphicsAllocation *getHostCounterAllocation() const { return hostCounterAllocation; } NEO::GraphicsAllocation *getHostCounterAllocation() const;
uint64_t *getBaseHostAddress() const { return hostAddress; } uint64_t *getBaseHostAddress() const { return hostAddress; }
uint64_t getBaseDeviceAddress() const { return deviceAddress; } uint64_t getBaseDeviceAddress() const { return deviceAddress; }
uint64_t getBaseHostGpuAddress() const;
uint64_t getCounterValue() const { return counterValue; } uint64_t getCounterValue() const { return counterValue; }
void addCounterValue(uint64_t addValue) { counterValue += addValue; } void addCounterValue(uint64_t addValue) { counterValue += addValue; }
@ -82,7 +83,7 @@ class InOrderExecInfo : public NEO::NonCopyableClass {
protected: protected:
NEO::MemoryManager &memoryManager; NEO::MemoryManager &memoryManager;
NEO::TagNodeBase *deviceCounterNode = nullptr; NEO::TagNodeBase *deviceCounterNode = nullptr;
NEO::GraphicsAllocation *hostCounterAllocation = nullptr; NEO::TagNodeBase *hostCounterNode = nullptr;
uint64_t counterValue = 0; uint64_t counterValue = 0;
uint64_t regularCmdListSubmissionCounter = 0; uint64_t regularCmdListSubmissionCounter = 0;
uint64_t deviceAddress = 0; uint64_t deviceAddress = 0;

View File

@ -75,20 +75,20 @@ HWTEST_F(CommandEncoderTests, givenDifferentInputParamsWhenCreatingStandaloneInO
HWTEST_F(CommandEncoderTests, givenDifferentInputParamsWhenCreatingInOrderExecInfoThenSetupCorrectly) { HWTEST_F(CommandEncoderTests, givenDifferentInputParamsWhenCreatingInOrderExecInfoThenSetupCorrectly) {
MockDevice mockDevice; MockDevice mockDevice;
MockTagAllocator<DeviceAllocNodeType<true>> tagAllocator(0, mockDevice.getMemoryManager()); MockTagAllocator<DeviceAllocNodeType<true>> deviceTagAllocator(0, mockDevice.getMemoryManager());
MockTagAllocator<DeviceAllocNodeType<true>> hostTagAllocator(0, mockDevice.getMemoryManager());
auto &memoryManager = *mockDevice.getMemoryManager(); auto &memoryManager = *mockDevice.getMemoryManager();
auto tempNode = tagAllocator.getTag(); auto tempNode1 = deviceTagAllocator.getTag();
auto tempNode2 = hostTagAllocator.getTag();
uint64_t storage2[2] = {1, 1};
{ {
auto deviceNode = tagAllocator.getTag(); auto deviceNode = deviceTagAllocator.getTag();
EXPECT_NE(deviceNode->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation()->getGpuAddress(), deviceNode->getGpuAddress()); EXPECT_NE(deviceNode->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation()->getGpuAddress(), deviceNode->getGpuAddress());
EXPECT_NE(deviceNode->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation()->getUnderlyingBuffer(), deviceNode->getCpuBase()); EXPECT_NE(deviceNode->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation()->getUnderlyingBuffer(), deviceNode->getCpuBase());
auto inOrderExecInfo = InOrderExecInfo::create(deviceNode, mockDevice, 2, false); auto inOrderExecInfo = InOrderExecInfo::create(deviceNode, nullptr, mockDevice, 2, false);
EXPECT_EQ(deviceNode->getCpuBase(), inOrderExecInfo->getBaseHostAddress()); EXPECT_EQ(deviceNode->getCpuBase(), inOrderExecInfo->getBaseHostAddress());
EXPECT_EQ(deviceNode->getBaseGraphicsAllocation()->getGraphicsAllocation(0), inOrderExecInfo->getDeviceCounterAllocation()); EXPECT_EQ(deviceNode->getBaseGraphicsAllocation()->getGraphicsAllocation(0), inOrderExecInfo->getDeviceCounterAllocation());
@ -103,7 +103,7 @@ HWTEST_F(CommandEncoderTests, givenDifferentInputParamsWhenCreatingInOrderExecIn
} }
{ {
auto deviceNode = tagAllocator.getTag(); auto deviceNode = deviceTagAllocator.getTag();
InOrderExecInfo inOrderExecInfo(deviceNode, nullptr, memoryManager, 2, 0, true, true); InOrderExecInfo inOrderExecInfo(deviceNode, nullptr, memoryManager, 2, 0, true, true);
EXPECT_TRUE(inOrderExecInfo.isRegularCmdList()); EXPECT_TRUE(inOrderExecInfo.isRegularCmdList());
@ -113,26 +113,29 @@ HWTEST_F(CommandEncoderTests, givenDifferentInputParamsWhenCreatingInOrderExecIn
} }
{ {
auto deviceNode = tagAllocator.getTag(); auto deviceNode = deviceTagAllocator.getTag();
auto hostNode = hostTagAllocator.getTag();
auto offset = ptrDiff(hostNode->getCpuBase(), tempNode2->getCpuBase());
DebugManagerStateRestore restore; DebugManagerStateRestore restore;
debugManager.flags.InOrderDuplicatedCounterStorageEnabled.set(1); debugManager.flags.InOrderDuplicatedCounterStorageEnabled.set(1);
auto inOrderExecInfo = InOrderExecInfo::create(deviceNode, mockDevice, 2, false); auto inOrderExecInfo = InOrderExecInfo::create(deviceNode, hostNode, mockDevice, 2, false);
EXPECT_EQ(inOrderExecInfo->getBaseHostGpuAddress(), hostNode->getGpuAddress());
EXPECT_NE(inOrderExecInfo->getDeviceCounterAllocation(), inOrderExecInfo->getHostCounterAllocation()); EXPECT_NE(inOrderExecInfo->getDeviceCounterAllocation(), inOrderExecInfo->getHostCounterAllocation());
EXPECT_NE(deviceNode->getBaseGraphicsAllocation()->getGraphicsAllocation(0), inOrderExecInfo->getHostCounterAllocation()); EXPECT_NE(deviceNode->getBaseGraphicsAllocation()->getGraphicsAllocation(0), inOrderExecInfo->getHostCounterAllocation());
EXPECT_NE(deviceNode->getCpuBase(), inOrderExecInfo->getBaseHostAddress()); EXPECT_NE(deviceNode->getCpuBase(), inOrderExecInfo->getBaseHostAddress());
EXPECT_EQ(inOrderExecInfo->getHostCounterAllocation()->getUnderlyingBuffer(), inOrderExecInfo->getBaseHostAddress()); EXPECT_EQ(ptrOffset(inOrderExecInfo->getHostCounterAllocation()->getUnderlyingBuffer(), offset), inOrderExecInfo->getBaseHostAddress());
EXPECT_TRUE(inOrderExecInfo->isHostStorageDuplicated()); EXPECT_TRUE(inOrderExecInfo->isHostStorageDuplicated());
} }
{ {
auto deviceNode = tagAllocator.getTag(); auto deviceNode = deviceTagAllocator.getTag();
auto hostNode = hostTagAllocator.getTag();
auto hostSyncAllocation = new MockGraphicsAllocation(&storage2, sizeof(storage2)); InOrderExecInfo inOrderExecInfo(deviceNode, hostNode, memoryManager, 1, 0, false, false);
InOrderExecInfo inOrderExecInfo(deviceNode, hostSyncAllocation, memoryManager, 1, 0, false, false);
auto deviceAllocHostAddress = reinterpret_cast<uint64_t *>(deviceNode->getCpuBase()); auto deviceAllocHostAddress = reinterpret_cast<uint64_t *>(deviceNode->getCpuBase());
EXPECT_EQ(0u, inOrderExecInfo.getCounterValue()); EXPECT_EQ(0u, inOrderExecInfo.getCounterValue());
EXPECT_EQ(0u, inOrderExecInfo.getRegularCmdListSubmissionCounter()); EXPECT_EQ(0u, inOrderExecInfo.getRegularCmdListSubmissionCounter());
@ -160,7 +163,7 @@ HWTEST_F(CommandEncoderTests, givenDifferentInputParamsWhenCreatingInOrderExecIn
} }
{ {
auto deviceNode = tagAllocator.getTag(); auto deviceNode = deviceTagAllocator.getTag();
InOrderExecInfo inOrderExecInfo(deviceNode, nullptr, memoryManager, 2, 0, true, false); InOrderExecInfo inOrderExecInfo(deviceNode, nullptr, memoryManager, 2, 0, true, false);
@ -175,7 +178,8 @@ HWTEST_F(CommandEncoderTests, givenDifferentInputParamsWhenCreatingInOrderExecIn
EXPECT_EQ(4u, InOrderPatchCommandHelpers::getAppendCounterValue(inOrderExecInfo)); EXPECT_EQ(4u, InOrderPatchCommandHelpers::getAppendCounterValue(inOrderExecInfo));
} }
tempNode->returnTag(); tempNode1->returnTag();
tempNode2->returnTag();
} }
HWTEST_F(CommandEncoderTests, givenInOrderExecInfoWhenPatchingThenSetCorrectValues) { HWTEST_F(CommandEncoderTests, givenInOrderExecInfoWhenPatchingThenSetCorrectValues) {