feature: in-order host counter allocation pooling 2

Related-To: NEO-10507

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski
2024-03-13 15:48:10 +00:00
committed by Compute-Runtime-Automation
parent 78e55f31b6
commit e400a5c624
12 changed files with 137 additions and 68 deletions

View File

@@ -19,22 +19,12 @@
namespace NEO {
std::shared_ptr<InOrderExecInfo> InOrderExecInfo::create(TagNodeBase *deviceCounterNode, NEO::Device &device, uint32_t partitionCount, bool regularCmdList) {
NEO::GraphicsAllocation *hostCounterAllocation = nullptr;
auto &gfxCoreHelper = device.getGfxCoreHelper();
bool atomicDeviceSignalling = gfxCoreHelper.inOrderAtomicSignallingEnabled(device.getRootDeviceEnvironment());
if (gfxCoreHelper.duplicatedInOrderCounterStorageEnabled(device.getRootDeviceEnvironment())) {
NEO::AllocationProperties hostAllocationProperties{device.getRootDeviceIndex(), MemoryConstants::pageSize64k, NEO::AllocationType::bufferHostMemory, device.getDeviceBitfield()};
hostCounterAllocation = device.getMemoryManager()->allocateGraphicsMemoryWithProperties(hostAllocationProperties);
UNRECOVERABLE_IF(!hostCounterAllocation);
}
std::shared_ptr<InOrderExecInfo> InOrderExecInfo::create(TagNodeBase *deviceCounterNode, TagNodeBase *hostCounterNode, NEO::Device &device, uint32_t partitionCount, bool regularCmdList) {
bool atomicDeviceSignalling = device.getGfxCoreHelper().inOrderAtomicSignallingEnabled(device.getRootDeviceEnvironment());
UNRECOVERABLE_IF(!deviceCounterNode);
return std::make_shared<NEO::InOrderExecInfo>(deviceCounterNode, hostCounterAllocation, *device.getMemoryManager(), partitionCount, device.getRootDeviceIndex(), regularCmdList, atomicDeviceSignalling);
return std::make_shared<NEO::InOrderExecInfo>(deviceCounterNode, hostCounterNode, *device.getMemoryManager(), partitionCount, device.getRootDeviceIndex(), regularCmdList, atomicDeviceSignalling);
}
std::shared_ptr<InOrderExecInfo> InOrderExecInfo::createFromExternalAllocation(NEO::Device &device, uint64_t deviceAddress, uint64_t *hostAddress, uint64_t counterValue) {
@@ -52,19 +42,21 @@ InOrderExecInfo::~InOrderExecInfo() {
if (deviceCounterNode) {
deviceCounterNode->returnTag();
}
memoryManager.freeGraphicsMemory(hostCounterAllocation);
if (hostCounterNode) {
hostCounterNode->returnTag();
}
}
InOrderExecInfo::InOrderExecInfo(TagNodeBase *deviceCounterNode, NEO::GraphicsAllocation *hostCounterAllocation, NEO::MemoryManager &memoryManager, uint32_t partitionCount, uint32_t rootDeviceIndex,
InOrderExecInfo::InOrderExecInfo(TagNodeBase *deviceCounterNode, TagNodeBase *hostCounterNode, NEO::MemoryManager &memoryManager, uint32_t partitionCount, uint32_t rootDeviceIndex,
bool regularCmdList, bool atomicDeviceSignalling)
: memoryManager(memoryManager), deviceCounterNode(deviceCounterNode), hostCounterAllocation(hostCounterAllocation), rootDeviceIndex(rootDeviceIndex),
: memoryManager(memoryManager), deviceCounterNode(deviceCounterNode), hostCounterNode(hostCounterNode), rootDeviceIndex(rootDeviceIndex),
regularCmdList(regularCmdList), atomicDeviceSignalling(atomicDeviceSignalling) {
numDevicePartitionsToWait = atomicDeviceSignalling ? 1 : partitionCount;
numHostPartitionsToWait = partitionCount;
if (hostCounterAllocation) {
hostAddress = reinterpret_cast<uint64_t *>(hostCounterAllocation->getUnderlyingBuffer());
if (hostCounterNode) {
hostAddress = reinterpret_cast<uint64_t *>(hostCounterNode->getCpuBase());
duplicatedHostStorage = true;
} else if (deviceCounterNode) {
hostAddress = reinterpret_cast<uint64_t *>(deviceCounterNode->getCpuBase());
@@ -83,9 +75,9 @@ void InOrderExecInfo::initializeAllocationsFromHost() {
memset(ptrOffset(deviceCounterNode->getCpuBase(), allocationOffset), 0, deviceAllocationWriteSize);
}
if (hostCounterAllocation) {
if (hostCounterNode) {
const size_t hostAllocationWriteSize = sizeof(uint64_t) * numHostPartitionsToWait;
memset(ptrOffset(hostCounterAllocation->getUnderlyingBuffer(), allocationOffset), 0, hostAllocationWriteSize);
memset(ptrOffset(hostCounterNode->getCpuBase(), allocationOffset), 0, hostAllocationWriteSize);
}
}
@@ -101,4 +93,12 @@ NEO::GraphicsAllocation *InOrderExecInfo::getDeviceCounterAllocation() const {
return deviceCounterNode ? deviceCounterNode->getBaseGraphicsAllocation()->getGraphicsAllocation(rootDeviceIndex) : nullptr;
}
NEO::GraphicsAllocation *InOrderExecInfo::getHostCounterAllocation() const {
return hostCounterNode ? hostCounterNode->getBaseGraphicsAllocation()->getGraphicsAllocation(rootDeviceIndex) : nullptr;
}
uint64_t InOrderExecInfo::getBaseHostGpuAddress() const {
return hostCounterNode->getGpuAddress();
}
} // namespace NEO

View File

@@ -48,16 +48,17 @@ class InOrderExecInfo : public NEO::NonCopyableClass {
InOrderExecInfo() = delete;
static std::shared_ptr<InOrderExecInfo> create(TagNodeBase *deviceCounterNode, NEO::Device &device, uint32_t partitionCount, bool regularCmdList);
static std::shared_ptr<InOrderExecInfo> create(TagNodeBase *deviceCounterNode, TagNodeBase *hostCounterNode, NEO::Device &device, uint32_t partitionCount, bool regularCmdList);
static std::shared_ptr<InOrderExecInfo> createFromExternalAllocation(NEO::Device &device, uint64_t deviceAddress, uint64_t *hostAddress, uint64_t counterValue);
InOrderExecInfo(TagNodeBase *deviceCounterNode, NEO::GraphicsAllocation *hostCounterAllocation, NEO::MemoryManager &memoryManager, uint32_t partitionCount, uint32_t rootDeviceIndex,
InOrderExecInfo(TagNodeBase *deviceCounterNode, TagNodeBase *hostCounterNode, NEO::MemoryManager &memoryManager, uint32_t partitionCount, uint32_t rootDeviceIndex,
bool regularCmdList, bool atomicDeviceSignalling);
NEO::GraphicsAllocation *getDeviceCounterAllocation() const;
NEO::GraphicsAllocation *getHostCounterAllocation() const { return hostCounterAllocation; }
NEO::GraphicsAllocation *getHostCounterAllocation() const;
uint64_t *getBaseHostAddress() const { return hostAddress; }
uint64_t getBaseDeviceAddress() const { return deviceAddress; }
uint64_t getBaseHostGpuAddress() const;
uint64_t getCounterValue() const { return counterValue; }
void addCounterValue(uint64_t addValue) { counterValue += addValue; }
@@ -82,7 +83,7 @@ class InOrderExecInfo : public NEO::NonCopyableClass {
protected:
NEO::MemoryManager &memoryManager;
NEO::TagNodeBase *deviceCounterNode = nullptr;
NEO::GraphicsAllocation *hostCounterAllocation = nullptr;
NEO::TagNodeBase *hostCounterNode = nullptr;
uint64_t counterValue = 0;
uint64_t regularCmdListSubmissionCounter = 0;
uint64_t deviceAddress = 0;