feature: Experimental support of immediate cmd list in-order execution [3/n]

New allocation to track dependencies counter

Related-To: LOCI-4332

Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2023-04-28 10:21:20 +00:00
committed by Compute-Runtime-Automation
parent b6b331fbe2
commit ef10c98497
7 changed files with 81 additions and 11 deletions

View File

@ -170,6 +170,8 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
bool isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents) const;
protected:
using BaseClass::inOrderDependencyCounter;
using BaseClass::inOrderDependencyCounterAllocation;
using BaseClass::latestInOrderOperationCompleted;
using BaseClass::latestSentInOrderEvent;

View File

@ -702,8 +702,11 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediate(ze_res
}
if (isInOrderExecutionEnabled()) {
inOrderDependencyCounter++;
latestInOrderOperationCompleted = false;
this->latestSentInOrderEvent = hSignalEvent;
if (hSignalEvent) {
Event::fromHandle(hSignalEvent)->setLatestUsedInOrderCmdList(this);
}

View File

@ -16,6 +16,7 @@
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/logical_state_helper.h"
#include "shared/source/indirect_heap/indirect_heap.h"
#include "shared/source/memory_manager/allocation_properties.h"
#include "shared/source/memory_manager/memory_manager.h"
#include "shared/source/os_interface/os_context.h"
#include "shared/source/os_interface/sys_calls_common.h"
@ -74,6 +75,8 @@ ze_result_t CommandListImp::destroy() {
}
}
device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(inOrderDependencyCounterAllocation);
delete this;
return ZE_RESULT_SUCCESS;
}
@ -166,10 +169,6 @@ CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device
commandList->cmdListType = CommandListType::TYPE_IMMEDIATE;
commandList->isSyncModeQueue = (desc->mode == ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS);
if (NEO::DebugManager.flags.ForceInOrderImmediateCmdListExecution.get() == 1) {
commandList->setInOrderExecution(true);
}
if (!internalUsage) {
auto &productHelper = device->getProductHelper();
commandList->isFlushTaskSubmissionEnabled = gfxCoreHelper.isPlatformFlushTaskEnabled(productHelper);
@ -185,6 +184,11 @@ CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device
csr->initializeResources();
csr->initDirectSubmission();
returnValue = commandList->initialize(device, engineGroupType, desc->flags);
if (NEO::DebugManager.flags.ForceInOrderImmediateCmdListExecution.get() == 1) {
commandList->enableInOrderExecution();
}
if (returnValue != ZE_RESULT_SUCCESS) {
commandList->destroy();
commandList = nullptr;
@ -234,4 +238,22 @@ void CommandListImp::unsetLastInOrderOutEvent(ze_event_handle_t outEvent) {
}
}
void CommandListImp::enableInOrderExecution() {
UNRECOVERABLE_IF(inOrderDependencyCounterAllocation);
auto device = this->device->getNEODevice();
NEO::AllocationProperties allocationProperties{device->getRootDeviceIndex(), sizeof(uint32_t), NEO::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, device->getDeviceBitfield()};
inOrderDependencyCounterAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties(allocationProperties);
UNRECOVERABLE_IF(!inOrderDependencyCounterAllocation);
commandContainer.addToResidencyContainer(inOrderDependencyCounterAllocation);
memset(inOrderDependencyCounterAllocation->getUnderlyingBuffer(), 0, inOrderDependencyCounterAllocation->getUnderlyingBufferSize());
inOrderExecutionEnabled = true;
}
} // namespace L0

View File

@ -32,12 +32,14 @@ struct CommandListImp : CommandList {
virtual NEO::LogicalStateHelper *getLogicalStateHelper() const { return nonImmediateLogicalStateHelper.get(); }
void setStreamPropertiesDefaultSettings(NEO::StreamProperties &streamProperties);
void setInOrderExecution(bool enabled) { inOrderExecutionEnabled = enabled; }
void enableInOrderExecution();
bool isInOrderExecutionEnabled() const { return inOrderExecutionEnabled; }
void unsetLastInOrderOutEvent(ze_event_handle_t outEvent);
protected:
std::unique_ptr<NEO::LogicalStateHelper> nonImmediateLogicalStateHelper;
NEO::GraphicsAllocation *inOrderDependencyCounterAllocation = nullptr;
uint32_t inOrderDependencyCounter = 0;
ze_event_handle_t latestSentInOrderEvent = nullptr;
bool latestInOrderOperationCompleted = true; // If driver is able to detect that previous operation is already done, there is no need to track dependencies.
bool inOrderExecutionEnabled = false;

View File

@ -160,6 +160,8 @@ struct WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>
using BaseClass::getDcFlushRequired;
using BaseClass::getHostPtrAlloc;
using BaseClass::immediateCmdListHeapSharing;
using BaseClass::inOrderDependencyCounter;
using BaseClass::inOrderDependencyCounterAllocation;
using BaseClass::isFlushTaskSubmissionEnabled;
using BaseClass::isSyncModeQueue;
using BaseClass::isTbxMode;

View File

@ -1233,10 +1233,10 @@ HWTEST2_F(CommandListCreate, givenInOrderExecutionWhenDispatchingRelaxedOrdering
ze_command_queue_desc_t desc = {};
desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue));
auto commandList = CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue);
ASSERT_NE(nullptr, commandList);
auto whiteBoxCmdList = static_cast<CommandList *>(commandList.get());
whiteBoxCmdList->setInOrderExecution(true);
auto whiteBoxCmdList = static_cast<CommandList *>(commandList);
whiteBoxCmdList->enableInOrderExecution();
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
@ -1270,6 +1270,8 @@ HWTEST2_F(CommandListCreate, givenInOrderExecutionWhenDispatchingRelaxedOrdering
commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_TRUE(ultCsr->recordedDispatchFlags.hasRelaxedOrderingDependencies);
EXPECT_TRUE(ultCsr->latestFlushedBatchBuffer.hasRelaxedOrderingDependencies);
commandList->destroy();
}
TEST_F(CommandListCreate, GivenGpuHangWhenCreatingImmCmdListWithSyncModeAndAppendBarrierThenAppendBarrierReturnsDeviceLost) {

View File

@ -665,6 +665,23 @@ struct InOrderCmdListTests : public CommandListAppendLaunchKernel {
using EventImp<uint32_t>::latestUsedInOrderCmdList;
};
template <typename T>
struct DestroyObject {
void operator()(T *t) {
if (t) {
t->destroy();
}
}
};
template <typename T>
using DestructableUniquePtr = std::unique_ptr<T, DestroyObject<T>>;
template <typename T>
DestructableUniquePtr<T> createDestructableUniqePtr(T *object) {
return DestructableUniquePtr<T>{object};
}
void SetUp() override {
NEO::DebugManager.flags.ForceInOrderImmediateCmdListExecution.set(1);
@ -690,8 +707,8 @@ struct InOrderCmdListTests : public CommandListAppendLaunchKernel {
}
template <GFXCORE_FAMILY gfxCoreFamily>
std::unique_ptr<WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>> createImmCmdList() {
auto cmdList = std::make_unique<WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>>();
DestructableUniquePtr<WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>> createImmCmdList() {
auto cmdList = createDestructableUniqePtr(new WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>());
auto csr = device->getNEODevice()->getDefaultEngine().commandStreamReceiver;
@ -701,11 +718,11 @@ struct InOrderCmdListTests : public CommandListAppendLaunchKernel {
cmdList->cmdQImmediate = mockCmdQ.get();
cmdList->isFlushTaskSubmissionEnabled = true;
cmdList->setInOrderExecution(true);
cmdList->cmdListType = CommandList::CommandListType::TYPE_IMMEDIATE;
cmdList->csr = csr;
cmdList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
cmdList->commandContainer.setImmediateCmdListCsr(csr);
cmdList->enableInOrderExecution();
return cmdList;
}
@ -824,6 +841,26 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenSubmittingThenProgramSemaphor
ASSERT_NE(cmdList.end(), itor);
}
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenDispatchingThenHandleDependencyCounter, IsAtLeastSkl) {
auto immCmdList = createImmCmdList<gfxCoreFamily>();
EXPECT_NE(nullptr, immCmdList->inOrderDependencyCounterAllocation);
EXPECT_EQ(AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, immCmdList->inOrderDependencyCounterAllocation->getAllocationType());
EXPECT_EQ(0u, immCmdList->inOrderDependencyCounter);
auto itorAlloc = std::find(immCmdList->getCmdContainer().getResidencyContainer().begin(),
immCmdList->getCmdContainer().getResidencyContainer().end(),
immCmdList->inOrderDependencyCounterAllocation);
EXPECT_NE(itorAlloc, immCmdList->getCmdContainer().getResidencyContainer().end());
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(1u, immCmdList->inOrderDependencyCounter);
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(2u, immCmdList->inOrderDependencyCounter);
}
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenAddingRelaxedOrderingEventsThenConfigureRegistersFirst, IsAtLeastSkl) {
auto immCmdList = createImmCmdList<gfxCoreFamily>();