feature: Experimental support of immediate cmd list in-order execution [1/n]

Related-To: LOCI-4332

Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2023-04-26 11:48:10 +00:00
committed by Compute-Runtime-Automation
parent e35a7746a4
commit 14c3777409
11 changed files with 159 additions and 0 deletions

View File

@@ -42,6 +42,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
using BaseClass::BaseClass;
using BaseClass::executeCommandListImmediate;
using BaseClass::isCopyOnly;
using BaseClass::isInOrderExecutionEnabled;
ze_result_t appendLaunchKernel(ze_kernel_handle_t kernelHandle,
const ze_group_count_t *threadGroupDimensions,
@@ -168,6 +169,9 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
bool isBarrierRequired();
protected:
using BaseClass::latestInOrderOperationCompleted;
using BaseClass::latestSentInOrderEvent;
void printKernelsPrintfOutput(bool hangDetected);
MOCKABLE_VIRTUAL void checkAssert();
std::atomic<bool> dependenciesPresent{false};

View File

@@ -700,6 +700,14 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediate(ze_res
if (hSignalEvent) {
Event::fromHandle(hSignalEvent)->setCsr(this->csr);
}
if (isInOrderExecutionEnabled()) {
latestInOrderOperationCompleted = false;
this->latestSentInOrderEvent = hSignalEvent;
if (hSignalEvent) {
Event::fromHandle(hSignalEvent)->setLatestUsedInOrderCmdList(this);
}
}
return inputRet;
}

View File

@@ -164,6 +164,11 @@ CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device
commandList->internalUsage = internalUsage;
commandList->cmdListType = CommandListType::TYPE_IMMEDIATE;
commandList->isSyncModeQueue = (desc->mode == ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS);
if (NEO::DebugManager.flags.ForceInOrderImmediateCmdListExecution.get() == 1) {
commandList->setInOrderExecution(true);
}
if (!internalUsage) {
auto &productHelper = device->getProductHelper();
commandList->isFlushTaskSubmissionEnabled = gfxCoreHelper.isPlatformFlushTaskEnabled(productHelper);
@@ -219,4 +224,11 @@ void CommandListImp::setStreamPropertiesDefaultSettings(NEO::StreamProperties &s
streamProperties.stateBaseAddress.setPropertyGlobalAtomics(cmdListDefaultGlobalAtomics, true);
}
void CommandListImp::unsetLastInOrderOutEvent(ze_event_handle_t outEvent) {
if (latestSentInOrderEvent == outEvent) {
latestSentInOrderEvent = nullptr;
latestInOrderOperationCompleted = true;
}
}
} // namespace L0

View File

@@ -32,9 +32,15 @@ struct CommandListImp : CommandList {
virtual NEO::LogicalStateHelper *getLogicalStateHelper() const { return nonImmediateLogicalStateHelper.get(); }
void setStreamPropertiesDefaultSettings(NEO::StreamProperties &streamProperties);
void setInOrderExecution(bool enabled) { inOrderExecutionEnabled = enabled; }
bool isInOrderExecutionEnabled() const { return inOrderExecutionEnabled; }
void unsetLastInOrderOutEvent(ze_event_handle_t outEvent);
protected:
std::unique_ptr<NEO::LogicalStateHelper> nonImmediateLogicalStateHelper;
ze_event_handle_t latestSentInOrderEvent = nullptr;
bool latestInOrderOperationCompleted = true; // If driver is able to detect that previous operation is already done, there is no need to track dependencies.
bool inOrderExecutionEnabled = false;
~CommandListImp() override = default;

View File

@@ -24,6 +24,7 @@
#include "shared/source/utilities/wait_util.h"
#include "level_zero/core/source/cmdlist/cmdlist.h"
#include "level_zero/core/source/cmdlist/cmdlist_imp.h"
#include "level_zero/core/source/cmdqueue/cmdqueue.h"
#include "level_zero/core/source/context/context_imp.h"
#include "level_zero/core/source/device/device.h"
@@ -334,6 +335,10 @@ ze_result_t EventPool::openEventPoolIpcHandle(const ze_ipc_event_pool_handle_t &
}
ze_result_t Event::destroy() {
if (latestUsedInOrderCmdList) {
latestUsedInOrderCmdList->unsetLastInOrderOutEvent(this->toHandle());
}
delete this;
return ZE_RESULT_SUCCESS;
}

View File

@@ -39,6 +39,7 @@ struct DriverHandle;
struct DriverHandleImp;
struct Device;
struct Kernel;
struct CommandListImp;
#pragma pack(1)
struct IpcEventPoolData {
@@ -200,6 +201,10 @@ struct Event : _ze_event_handle_t {
this->metricStreamer = metricStreamer;
}
void setLatestUsedInOrderCmdList(CommandListImp *newCmdList) {
latestUsedInOrderCmdList = newCmdList;
}
protected:
Event(EventPool *eventPool, int index, Device *device) : device(device), eventPool(eventPool), index(index) {}
@@ -230,6 +235,7 @@ struct Event : _ze_event_handle_t {
Device *device = nullptr;
EventPool *eventPool = nullptr;
Kernel *kernelWithPrintf = nullptr;
CommandListImp *latestUsedInOrderCmdList = nullptr;
uint32_t maxKernelCount = 0;
uint32_t kernelCount = 1u;

View File

@@ -346,6 +346,10 @@ ze_result_t EventImp<TagSizeT>::hostSynchronize(uint64_t timeout) {
template <typename TagSizeT>
ze_result_t EventImp<TagSizeT>::reset() {
if (latestUsedInOrderCmdList) {
latestUsedInOrderCmdList->unsetLastInOrderOutEvent(this->toHandle());
latestUsedInOrderCmdList = nullptr;
}
this->resetCompletionStatus();
this->resetDeviceCompletionData(false);
this->l3FlushAppliedOnKernel.reset();

View File

@@ -163,6 +163,8 @@ struct WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>
using BaseClass::isFlushTaskSubmissionEnabled;
using BaseClass::isSyncModeQueue;
using BaseClass::isTbxMode;
using BaseClass::latestInOrderOperationCompleted;
using BaseClass::latestSentInOrderEvent;
using BaseClass::partitionCount;
using BaseClass::pipeControlMultiKernelEventSync;
using BaseClass::pipelineSelectStateTracking;

View File

@@ -22,6 +22,7 @@
#include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h"
#include "level_zero/core/source/event/event.h"
#include "level_zero/core/source/event/event_imp.h"
#include "level_zero/core/test/unit_tests/fixtures/module_fixture.h"
#include "level_zero/core/test/unit_tests/fixtures/multi_tile_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
@@ -659,6 +660,115 @@ HWTEST_F(CommandListAppendLaunchKernel, givenInvalidKernelWhenAppendingThenRetur
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, returnValue);
}
struct InOrderCmdListTests : public CommandListAppendLaunchKernel {
struct MockEvent : public EventImp<uint32_t> {
using EventImp<uint32_t>::latestUsedInOrderCmdList;
};
void SetUp() override {
NEO::DebugManager.flags.ForceInOrderImmediateCmdListExecution.set(1);
CommandListAppendLaunchKernel::SetUp();
createKernel();
}
std::unique_ptr<L0::EventPool> createEvents(uint32_t numEvents) {
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
eventPoolDesc.count = numEvents;
ze_event_desc_t eventDesc = {};
auto eventPool = std::unique_ptr<L0::EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
for (uint32_t i = 0; i < numEvents; i++) {
eventDesc.index = i;
events.emplace_back(std::unique_ptr<MockEvent>(static_cast<MockEvent *>(Event::create<uint32_t>(eventPool.get(), &eventDesc, device))));
}
return eventPool;
}
DebugManagerStateRestore restorer;
ze_result_t returnValue = ZE_RESULT_SUCCESS;
ze_group_count_t groupCount = {3, 2, 1};
CmdListKernelLaunchParams launchParams = {};
std::vector<std::unique_ptr<MockEvent>> events;
};
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenAppendCalledThenHandleEventAssignment, MatchAny) {
ze_command_list_handle_t cmdListHandle;
ze_command_queue_desc_t queueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC};
queueDesc.ordinal = 0;
queueDesc.index = 0;
device->createCommandListImmediate(&queueDesc, &cmdListHandle);
auto cmdList = static_cast<L0::CommandListCoreFamilyImmediate<gfxCoreFamily> *>(CommandList::fromHandle(cmdListHandle));
auto immCmdList = static_cast<WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>> *>(cmdList);
EXPECT_TRUE(immCmdList->isInOrderExecutionEnabled());
auto eventPool = createEvents(1);
EXPECT_TRUE(immCmdList->latestInOrderOperationCompleted);
EXPECT_EQ(nullptr, immCmdList->latestSentInOrderEvent);
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false);
EXPECT_FALSE(immCmdList->latestInOrderOperationCompleted);
EXPECT_EQ(events[0]->toHandle(), immCmdList->latestSentInOrderEvent);
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_FALSE(immCmdList->latestInOrderOperationCompleted);
EXPECT_EQ(nullptr, immCmdList->latestSentInOrderEvent);
CommandList::fromHandle(cmdListHandle)->destroy();
}
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenResetEventCalledThenResetCmdList, MatchAny) {
ze_command_list_handle_t cmdListHandle;
ze_command_queue_desc_t queueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC};
queueDesc.ordinal = 0;
queueDesc.index = 0;
device->createCommandListImmediate(&queueDesc, &cmdListHandle);
auto cmdList = static_cast<L0::CommandListCoreFamilyImmediate<gfxCoreFamily> *>(CommandList::fromHandle(cmdListHandle));
auto immCmdList = static_cast<WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>> *>(cmdList);
auto eventPool = createEvents(3);
EXPECT_TRUE(immCmdList->latestInOrderOperationCompleted);
EXPECT_EQ(nullptr, immCmdList->latestSentInOrderEvent);
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false);
EXPECT_FALSE(immCmdList->latestInOrderOperationCompleted);
EXPECT_EQ(events[0]->toHandle(), immCmdList->latestSentInOrderEvent);
events[0]->reset();
EXPECT_TRUE(immCmdList->latestInOrderOperationCompleted);
EXPECT_EQ(nullptr, immCmdList->latestSentInOrderEvent);
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, events[1]->toHandle(), 0, nullptr, launchParams, false);
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, events[2]->toHandle(), 0, nullptr, launchParams, false);
// reset unused event
events[1]->reset();
EXPECT_FALSE(immCmdList->latestInOrderOperationCompleted);
EXPECT_EQ(events[2]->toHandle(), immCmdList->latestSentInOrderEvent);
// destroy
events[2]->destroy();
events[2].release();
EXPECT_TRUE(immCmdList->latestInOrderOperationCompleted);
EXPECT_EQ(nullptr, immCmdList->latestSentInOrderEvent);
CommandList::fromHandle(cmdListHandle)->destroy();
}
struct CommandListAppendLaunchKernelWithImplicitArgs : CommandListAppendLaunchKernel {
template <typename FamilyType>
uint64_t getIndirectHeapOffsetForImplicitArgsBuffer(const Mock<::L0::Kernel> &kernel) {

View File

@@ -234,6 +234,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, DisableScratchPages, -1, "-1: default, 0: do not
DECLARE_DEBUG_VARIABLE(int32_t, OptimizeIoqBarriersHandling, -1, "-1: default, 0: disable, 1: enable. If enabled, dont dispatch stalling commands for IOQ. Instead, inherit TimestampPackets from previous enqueue.")
DECLARE_DEBUG_VARIABLE(int32_t, ExitOnSubmissionNumber, -1, "Call exit(0) on X submission. >=0: submission count (start from 0)")
DECLARE_DEBUG_VARIABLE(int32_t, ExitOnSubmissionMode, 0, "Exit on X submission mode. 0: Any context type, 1: Compute context only, 2: Copy context only ")
DECLARE_DEBUG_VARIABLE(int32_t, ForceInOrderImmediateCmdListExecution, -1, "-1: default, 0: disabled, 1: all Immediate Command Lists are switched to in-order execution")
DECLARE_DEBUG_VARIABLE(int64_t, OverrideEventSynchronizeTimeout, -1, "-1: default - user provided timeout value, >0: timeout in nanoseconds")
/*LOGGING FLAGS*/

View File

@@ -518,3 +518,4 @@ AllocateHostAllocationsInHeapExtended = 1
DirectSubmissionControllerMaxTimeout = -1
ExitOnSubmissionNumber = -1
ExitOnSubmissionMode = 0
ForceInOrderImmediateCmdListExecution = -1