Revert "feature: in-order host counter allocation pooling"

This reverts commit 2f03c48c7a.

Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
This commit is contained in:
Compute-Runtime-Validation
2024-03-09 19:18:46 +01:00
committed by Compute-Runtime-Automation
parent 0d00c8bab8
commit 4082e9f028
12 changed files with 70 additions and 139 deletions

View File

@@ -2575,7 +2575,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(
}
if (inOrderExecInfo->isHostStorageDuplicated()) {
appendSdiInOrderCounterSignalling(inOrderExecInfo->getBaseHostGpuAddress(), signalValue);
appendSdiInOrderCounterSignalling(inOrderExecInfo->getHostCounterAllocation()->getGpuAddress(), signalValue);
}
if ((NEO::debugManager.flags.ProgramUserInterruptOnResolvedDependency.get() == 1) && signalEvent && signalEvent->isInterruptModeEnabled()) {

View File

@@ -259,15 +259,8 @@ void CommandListImp::enableInOrderExecution() {
UNRECOVERABLE_IF(inOrderExecInfo.get());
auto deviceCounterNode = this->device->getDeviceInOrderCounterAllocator()->getTag();
NEO::TagNodeBase *hostCounterNode = nullptr;
auto &gfxCoreHelper = device->getGfxCoreHelper();
if (gfxCoreHelper.duplicatedInOrderCounterStorageEnabled(device->getNEODevice()->getRootDeviceEnvironment())) {
hostCounterNode = this->device->getHostInOrderCounterAllocator()->getTag();
}
inOrderExecInfo = NEO::InOrderExecInfo::create(deviceCounterNode, hostCounterNode, *this->device->getNEODevice(), this->partitionCount, !isImmediateType());
inOrderExecInfo = NEO::InOrderExecInfo::create(deviceCounterNode, *this->device->getNEODevice(), this->partitionCount, !isImmediateType());
}
void CommandListImp::storeReferenceTsToMappedEvents(bool isClearEnabled) {

View File

@@ -21,34 +21,26 @@ NEO::DebuggerL0 *Device::getL0Debugger() {
return getNEODevice()->getL0Debugger();
}
template <typename NodeT>
NEO::TagAllocatorBase *getInOrderCounterAllocator(std::unique_ptr<NEO::TagAllocatorBase> &allocator, std::mutex &inOrderAllocatorMutex, NEO::Device &neoDevice) {
if (!allocator.get()) {
NEO::TagAllocatorBase *Device::getDeviceInOrderCounterAllocator() {
if (!deviceInOrderCounterAllocator.get()) {
std::unique_lock<std::mutex> lock(inOrderAllocatorMutex);
if (!allocator.get()) {
RootDeviceIndicesContainer rootDeviceIndices = {neoDevice.getRootDeviceIndex()};
if (!deviceInOrderCounterAllocator.get()) {
using NodeT = typename NEO::DeviceAllocNodeType<true>;
RootDeviceIndicesContainer rootDeviceIndices = {getRootDeviceIndex()};
const size_t maxPartitionCount = neoDevice.getDeviceBitfield().count();
const size_t maxPartitionCount = getNEODevice()->getDeviceBitfield().count();
const size_t nodeSize = sizeof(uint64_t) * maxPartitionCount * 2; // Multiplied by 2 to handle 32b overflow
DEBUG_BREAK_IF(alignUp(nodeSize, MemoryConstants::cacheLineSize) * NodeT::defaultAllocatorTagCount > MemoryConstants::pageSize64k);
allocator = std::make_unique<NEO::TagAllocator<NodeT>>(rootDeviceIndices, neoDevice.getMemoryManager(), NodeT::defaultAllocatorTagCount,
MemoryConstants::cacheLineSize, nodeSize, false, neoDevice.getDeviceBitfield());
deviceInOrderCounterAllocator = std::make_unique<NEO::TagAllocator<NodeT>>(rootDeviceIndices, neoDevice->getMemoryManager(), NodeT::defaultAllocatorTagCount,
MemoryConstants::cacheLineSize, nodeSize, false, neoDevice->getDeviceBitfield());
}
}
return allocator.get();
}
NEO::TagAllocatorBase *Device::getDeviceInOrderCounterAllocator() {
return getInOrderCounterAllocator<NEO::DeviceAllocNodeType<true>>(deviceInOrderCounterAllocator, inOrderAllocatorMutex, *getNEODevice());
}
NEO::TagAllocatorBase *Device::getHostInOrderCounterAllocator() {
return getInOrderCounterAllocator<NEO::DeviceAllocNodeType<false>>(hostInOrderCounterAllocator, inOrderAllocatorMutex, *getNEODevice());
return deviceInOrderCounterAllocator.get();
}
} // namespace L0

View File

@@ -150,12 +150,10 @@ struct Device : _ze_device_handle_t {
virtual uint32_t getEventMaxPacketCount() const = 0;
virtual uint32_t getEventMaxKernelCount() const = 0;
NEO::TagAllocatorBase *getDeviceInOrderCounterAllocator();
NEO::TagAllocatorBase *getHostInOrderCounterAllocator();
protected:
NEO::Device *neoDevice = nullptr;
std::unique_ptr<NEO::TagAllocatorBase> deviceInOrderCounterAllocator;
std::unique_ptr<NEO::TagAllocatorBase> hostInOrderCounterAllocator;
std::mutex inOrderAllocatorMutex;
bool implicitScalingCapable = false;
};

View File

@@ -1467,7 +1467,6 @@ void DeviceImp::releaseResources() {
builtins.reset();
cacheReservation.reset();
deviceInOrderCounterAllocator.reset();
hostInOrderCounterAllocator.reset();
if (allocationsForReuse.get()) {
allocationsForReuse->freeAllGraphicsAllocations(neoDevice);

View File

@@ -105,7 +105,6 @@ struct MockDeviceImp : public L0::DeviceImp {
using Base::debugSession;
using Base::deviceInOrderCounterAllocator;
using Base::getNEODevice;
using Base::hostInOrderCounterAllocator;
using Base::implicitScalingCapable;
using Base::neoDevice;
using Base::subDeviceCopyEngineGroups;

View File

@@ -396,39 +396,6 @@ HWTEST2_F(InOrderCmdListTests, whenCreatingInOrderExecInfoThenReuseDeviceAlloc,
tag->returnTag();
}
HWTEST2_F(InOrderCmdListTests, whenCreatingInOrderExecInfoThenReuseHostAlloc, IsAtLeastSkl) {
debugManager.flags.InOrderDuplicatedCounterStorageEnabled.set(1);
auto tag = device->getHostInOrderCounterAllocator()->getTag();
auto immCmdList1 = createImmCmdList<gfxCoreFamily>();
auto gpuVa1 = immCmdList1->inOrderExecInfo->getBaseHostAddress();
auto immCmdList2 = createImmCmdList<gfxCoreFamily>();
auto gpuVa2 = immCmdList2->inOrderExecInfo->getBaseHostAddress();
EXPECT_NE(gpuVa1, gpuVa2);
// allocation from the same allocator
EXPECT_EQ(immCmdList1->inOrderExecInfo->getHostCounterAllocation(), tag->getBaseGraphicsAllocation()->getGraphicsAllocation(0));
immCmdList1.reset();
auto immCmdList3 = createImmCmdList<gfxCoreFamily>();
auto gpuVa3 = immCmdList3->inOrderExecInfo->getBaseHostAddress();
EXPECT_EQ(gpuVa1, gpuVa3);
immCmdList2.reset();
auto immCmdList4 = createImmCmdList<gfxCoreFamily>();
auto gpuVa4 = immCmdList4->inOrderExecInfo->getBaseHostAddress();
EXPECT_EQ(gpuVa2, gpuVa4);
tag->returnTag();
}
HWTEST2_F(InOrderCmdListTests, givenInOrderEventWhenAppendEventResetCalledThenReturnError, IsAtLeastSkl) {
auto immCmdList = createImmCmdList<gfxCoreFamily>();
@@ -1606,13 +1573,8 @@ HWTEST2_F(InOrderCmdListTests, givenDebugFlagSetWhenDispatchingThenEnsureHostAll
EXPECT_EQ(immCmdList1->inOrderExecInfo->getBaseHostAddress(), immCmdList1->inOrderExecInfo->getHostCounterAllocation()->getUnderlyingBuffer());
EXPECT_FALSE(immCmdList1->inOrderExecInfo->getHostCounterAllocation()->isAllocatedInLocalMemoryPool());
EXPECT_EQ(immCmdList1->inOrderExecInfo->getHostCounterAllocation(), immCmdList2->inOrderExecInfo->getHostCounterAllocation());
auto hostAllocOffset = ptrDiff(immCmdList2->inOrderExecInfo->getBaseHostAddress(), immCmdList1->inOrderExecInfo->getBaseHostAddress());
EXPECT_NE(0u, hostAllocOffset);
EXPECT_EQ(AllocationType::bufferHostMemory, immCmdList2->inOrderExecInfo->getHostCounterAllocation()->getAllocationType());
EXPECT_EQ(immCmdList2->inOrderExecInfo->getBaseHostAddress(), ptrOffset(immCmdList2->inOrderExecInfo->getHostCounterAllocation()->getUnderlyingBuffer(), hostAllocOffset));
EXPECT_EQ(immCmdList2->inOrderExecInfo->getBaseHostAddress(), immCmdList2->inOrderExecInfo->getHostCounterAllocation()->getUnderlyingBuffer());
EXPECT_FALSE(immCmdList2->inOrderExecInfo->getHostCounterAllocation()->isAllocatedInLocalMemoryPool());
immCmdList1->appendLaunchKernel(kernel->toHandle(), groupCount, event0Handle, 0, nullptr, launchParams, false);
@@ -1621,7 +1583,8 @@ HWTEST2_F(InOrderCmdListTests, givenDebugFlagSetWhenDispatchingThenEnsureHostAll
immCmdList2->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 1, &event0Handle, launchParams, false);
EXPECT_EQ(2u, ultCsr->makeResidentAllocations[immCmdList1->inOrderExecInfo->getHostCounterAllocation()]);
// host allocation not used as Device dependency
EXPECT_EQ(1u, ultCsr->makeResidentAllocations[immCmdList1->inOrderExecInfo->getHostCounterAllocation()]);
}
HWTEST2_F(InOrderCmdListTests, givenInOrderEventModeWhenSubmittingThenClearEventCsrList, IsAtLeastSkl) {

View File

@@ -837,21 +837,16 @@ TEST_F(DeviceHostPointerTest, givenHostPointerNotAcceptedByKernelAndHostPointerC
TEST_F(DeviceTest, whenCreatingDeviceThenCreateInOrderCounterAllocatorOnDemandAndHandleDestruction) {
uint32_t destructorId = 0u;
class MockDeviceTagAllocator : public DestructorCounted<MockTagAllocator<NEO::DeviceAllocNodeType<true>>, 0> {
public:
MockDeviceTagAllocator(uint32_t rootDeviceIndex, MemoryManager *memoryManager, uint32_t &destructorId) : DestructorCounted(destructorId, rootDeviceIndex, memoryManager, 10) {}
};
class MockHostTagAllocator : public DestructorCounted<MockTagAllocator<NEO::DeviceAllocNodeType<true>>, 1> {
public:
MockHostTagAllocator(uint32_t rootDeviceIndex, MemoryManager *memoryManager, uint32_t &destructorId) : DestructorCounted(destructorId, rootDeviceIndex, memoryManager, 10) {}
};
class MyMockDevice : public DestructorCounted<NEO::MockDevice, 2> {
class MyMockDevice : public DestructorCounted<NEO::MockDevice, 1> {
public:
MyMockDevice(NEO::ExecutionEnvironment *executionEnvironment, uint32_t rootDeviceIndex, uint32_t &destructorId) : DestructorCounted(destructorId, executionEnvironment, rootDeviceIndex) {}
};
class MyMockTagAllocator : public DestructorCounted<MockTagAllocator<NEO::DeviceAllocNodeType<true>>, 0> {
public:
MyMockTagAllocator(uint32_t rootDeviceIndex, MemoryManager *memoryManager, uint32_t &destructorId) : DestructorCounted(destructorId, rootDeviceIndex, memoryManager, 10) {}
};
const uint32_t rootDeviceIndex = 0u;
auto hwInfo = *NEO::defaultHwInfo;
@@ -861,15 +856,11 @@ TEST_F(DeviceTest, whenCreatingDeviceThenCreateInOrderCounterAllocatorOnDemandAn
neoMockDevice->createDeviceImpl();
{
auto deviceAllocator = new MockDeviceTagAllocator(0, neoMockDevice->getMemoryManager(), destructorId);
auto hostAllocator = new MockHostTagAllocator(0, neoMockDevice->getMemoryManager(), destructorId);
auto allocator = new MyMockTagAllocator(0, neoMockDevice->getMemoryManager(), destructorId);
MockDeviceImp deviceImp(neoMockDevice, neoMockDevice->getExecutionEnvironment());
deviceImp.deviceInOrderCounterAllocator.reset(deviceAllocator);
deviceImp.hostInOrderCounterAllocator.reset(hostAllocator);
EXPECT_EQ(deviceAllocator, deviceImp.getDeviceInOrderCounterAllocator());
EXPECT_EQ(hostAllocator, deviceImp.getHostInOrderCounterAllocator());
deviceImp.deviceInOrderCounterAllocator.reset(allocator);
EXPECT_EQ(allocator, deviceImp.getDeviceInOrderCounterAllocator());
}
}

View File

@@ -3425,8 +3425,7 @@ HWTEST_F(EventTests, givenInOrderEventWithHostAllocWhenHostSynchronizeIsCalledTh
neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->memoryOperationsInterface =
std::make_unique<NEO::MockMemoryOperations>();
MockTagAllocator<DeviceAllocNodeType<true>> deviceTagAllocator(0, neoDevice->getMemoryManager());
MockTagAllocator<DeviceAllocNodeType<true>> hostTagAllocator(0, neoDevice->getMemoryManager());
MockTagAllocator<DeviceAllocNodeType<true>> tagAllocator(0, neoDevice->getMemoryManager());
auto event = zeUniquePtr(whiteboxCast(getHelper<L0GfxCoreHelper>().createEvent(eventPool.get(), &eventDesc, device)));
@@ -3441,12 +3440,14 @@ HWTEST_F(EventTests, givenInOrderEventWithHostAllocWhenHostSynchronizeIsCalledTh
downloadAllocationTrack[&gfxAllocation]++;
};
auto deviceMockNode = deviceTagAllocator.getTag();
auto hostMockNode = hostTagAllocator.getTag();
auto deviceSyncAllocation = deviceMockNode->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation();
auto hostSyncAllocation = hostMockNode->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation();
uint64_t storage2[2] = {1, 1};
auto inOrderExecInfo = std::make_shared<NEO::InOrderExecInfo>(deviceMockNode, hostMockNode, *neoDevice->getMemoryManager(), 1, 0, false, false);
auto hostSyncAllocation = new NEO::MockGraphicsAllocation(&storage2, sizeof(storage2));
auto mockNode = tagAllocator.getTag();
auto deviceSyncAllocation = mockNode->getBaseGraphicsAllocation()->getDefaultGraphicsAllocation();
auto inOrderExecInfo = std::make_shared<NEO::InOrderExecInfo>(mockNode, hostSyncAllocation, *neoDevice->getMemoryManager(), 1, 0, false, false);
*inOrderExecInfo->getBaseHostAddress() = 1;
event->enableCounterBasedMode(true, ZE_EVENT_POOL_COUNTER_BASED_EXP_FLAG_IMMEDIATE);