mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-19 06:24:51 +08:00
feature: Experimental support of immediate cmd list in-order execution [1/n]
Related-To: LOCI-4332 Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
e35a7746a4
commit
14c3777409
@@ -42,6 +42,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
|
||||
using BaseClass::BaseClass;
|
||||
using BaseClass::executeCommandListImmediate;
|
||||
using BaseClass::isCopyOnly;
|
||||
using BaseClass::isInOrderExecutionEnabled;
|
||||
|
||||
ze_result_t appendLaunchKernel(ze_kernel_handle_t kernelHandle,
|
||||
const ze_group_count_t *threadGroupDimensions,
|
||||
@@ -168,6 +169,9 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
|
||||
bool isBarrierRequired();
|
||||
|
||||
protected:
|
||||
using BaseClass::latestInOrderOperationCompleted;
|
||||
using BaseClass::latestSentInOrderEvent;
|
||||
|
||||
void printKernelsPrintfOutput(bool hangDetected);
|
||||
MOCKABLE_VIRTUAL void checkAssert();
|
||||
std::atomic<bool> dependenciesPresent{false};
|
||||
|
||||
@@ -700,6 +700,14 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediate(ze_res
|
||||
if (hSignalEvent) {
|
||||
Event::fromHandle(hSignalEvent)->setCsr(this->csr);
|
||||
}
|
||||
|
||||
if (isInOrderExecutionEnabled()) {
|
||||
latestInOrderOperationCompleted = false;
|
||||
this->latestSentInOrderEvent = hSignalEvent;
|
||||
if (hSignalEvent) {
|
||||
Event::fromHandle(hSignalEvent)->setLatestUsedInOrderCmdList(this);
|
||||
}
|
||||
}
|
||||
return inputRet;
|
||||
}
|
||||
|
||||
|
||||
@@ -164,6 +164,11 @@ CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device
|
||||
commandList->internalUsage = internalUsage;
|
||||
commandList->cmdListType = CommandListType::TYPE_IMMEDIATE;
|
||||
commandList->isSyncModeQueue = (desc->mode == ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS);
|
||||
|
||||
if (NEO::DebugManager.flags.ForceInOrderImmediateCmdListExecution.get() == 1) {
|
||||
commandList->setInOrderExecution(true);
|
||||
}
|
||||
|
||||
if (!internalUsage) {
|
||||
auto &productHelper = device->getProductHelper();
|
||||
commandList->isFlushTaskSubmissionEnabled = gfxCoreHelper.isPlatformFlushTaskEnabled(productHelper);
|
||||
@@ -219,4 +224,11 @@ void CommandListImp::setStreamPropertiesDefaultSettings(NEO::StreamProperties &s
|
||||
streamProperties.stateBaseAddress.setPropertyGlobalAtomics(cmdListDefaultGlobalAtomics, true);
|
||||
}
|
||||
|
||||
void CommandListImp::unsetLastInOrderOutEvent(ze_event_handle_t outEvent) {
|
||||
if (latestSentInOrderEvent == outEvent) {
|
||||
latestSentInOrderEvent = nullptr;
|
||||
latestInOrderOperationCompleted = true;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -32,9 +32,15 @@ struct CommandListImp : CommandList {
|
||||
|
||||
virtual NEO::LogicalStateHelper *getLogicalStateHelper() const { return nonImmediateLogicalStateHelper.get(); }
|
||||
void setStreamPropertiesDefaultSettings(NEO::StreamProperties &streamProperties);
|
||||
void setInOrderExecution(bool enabled) { inOrderExecutionEnabled = enabled; }
|
||||
bool isInOrderExecutionEnabled() const { return inOrderExecutionEnabled; }
|
||||
void unsetLastInOrderOutEvent(ze_event_handle_t outEvent);
|
||||
|
||||
protected:
|
||||
std::unique_ptr<NEO::LogicalStateHelper> nonImmediateLogicalStateHelper;
|
||||
ze_event_handle_t latestSentInOrderEvent = nullptr;
|
||||
bool latestInOrderOperationCompleted = true; // If driver is able to detect that previous operation is already done, there is no need to track dependencies.
|
||||
bool inOrderExecutionEnabled = false;
|
||||
|
||||
~CommandListImp() override = default;
|
||||
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
#include "shared/source/utilities/wait_util.h"
|
||||
|
||||
#include "level_zero/core/source/cmdlist/cmdlist.h"
|
||||
#include "level_zero/core/source/cmdlist/cmdlist_imp.h"
|
||||
#include "level_zero/core/source/cmdqueue/cmdqueue.h"
|
||||
#include "level_zero/core/source/context/context_imp.h"
|
||||
#include "level_zero/core/source/device/device.h"
|
||||
@@ -334,6 +335,10 @@ ze_result_t EventPool::openEventPoolIpcHandle(const ze_ipc_event_pool_handle_t &
|
||||
}
|
||||
|
||||
ze_result_t Event::destroy() {
|
||||
if (latestUsedInOrderCmdList) {
|
||||
latestUsedInOrderCmdList->unsetLastInOrderOutEvent(this->toHandle());
|
||||
}
|
||||
|
||||
delete this;
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -39,6 +39,7 @@ struct DriverHandle;
|
||||
struct DriverHandleImp;
|
||||
struct Device;
|
||||
struct Kernel;
|
||||
struct CommandListImp;
|
||||
|
||||
#pragma pack(1)
|
||||
struct IpcEventPoolData {
|
||||
@@ -200,6 +201,10 @@ struct Event : _ze_event_handle_t {
|
||||
this->metricStreamer = metricStreamer;
|
||||
}
|
||||
|
||||
void setLatestUsedInOrderCmdList(CommandListImp *newCmdList) {
|
||||
latestUsedInOrderCmdList = newCmdList;
|
||||
}
|
||||
|
||||
protected:
|
||||
Event(EventPool *eventPool, int index, Device *device) : device(device), eventPool(eventPool), index(index) {}
|
||||
|
||||
@@ -230,6 +235,7 @@ struct Event : _ze_event_handle_t {
|
||||
Device *device = nullptr;
|
||||
EventPool *eventPool = nullptr;
|
||||
Kernel *kernelWithPrintf = nullptr;
|
||||
CommandListImp *latestUsedInOrderCmdList = nullptr;
|
||||
|
||||
uint32_t maxKernelCount = 0;
|
||||
uint32_t kernelCount = 1u;
|
||||
|
||||
@@ -346,6 +346,10 @@ ze_result_t EventImp<TagSizeT>::hostSynchronize(uint64_t timeout) {
|
||||
|
||||
template <typename TagSizeT>
|
||||
ze_result_t EventImp<TagSizeT>::reset() {
|
||||
if (latestUsedInOrderCmdList) {
|
||||
latestUsedInOrderCmdList->unsetLastInOrderOutEvent(this->toHandle());
|
||||
latestUsedInOrderCmdList = nullptr;
|
||||
}
|
||||
this->resetCompletionStatus();
|
||||
this->resetDeviceCompletionData(false);
|
||||
this->l3FlushAppliedOnKernel.reset();
|
||||
|
||||
@@ -163,6 +163,8 @@ struct WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>
|
||||
using BaseClass::isFlushTaskSubmissionEnabled;
|
||||
using BaseClass::isSyncModeQueue;
|
||||
using BaseClass::isTbxMode;
|
||||
using BaseClass::latestInOrderOperationCompleted;
|
||||
using BaseClass::latestSentInOrderEvent;
|
||||
using BaseClass::partitionCount;
|
||||
using BaseClass::pipeControlMultiKernelEventSync;
|
||||
using BaseClass::pipelineSelectStateTracking;
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
|
||||
#include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h"
|
||||
#include "level_zero/core/source/event/event.h"
|
||||
#include "level_zero/core/source/event/event_imp.h"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/module_fixture.h"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/multi_tile_fixture.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
|
||||
@@ -659,6 +660,115 @@ HWTEST_F(CommandListAppendLaunchKernel, givenInvalidKernelWhenAppendingThenRetur
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, returnValue);
|
||||
}
|
||||
|
||||
struct InOrderCmdListTests : public CommandListAppendLaunchKernel {
|
||||
struct MockEvent : public EventImp<uint32_t> {
|
||||
using EventImp<uint32_t>::latestUsedInOrderCmdList;
|
||||
};
|
||||
|
||||
void SetUp() override {
|
||||
NEO::DebugManager.flags.ForceInOrderImmediateCmdListExecution.set(1);
|
||||
|
||||
CommandListAppendLaunchKernel::SetUp();
|
||||
createKernel();
|
||||
}
|
||||
|
||||
std::unique_ptr<L0::EventPool> createEvents(uint32_t numEvents) {
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
|
||||
eventPoolDesc.count = numEvents;
|
||||
|
||||
ze_event_desc_t eventDesc = {};
|
||||
|
||||
auto eventPool = std::unique_ptr<L0::EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
|
||||
|
||||
for (uint32_t i = 0; i < numEvents; i++) {
|
||||
eventDesc.index = i;
|
||||
events.emplace_back(std::unique_ptr<MockEvent>(static_cast<MockEvent *>(Event::create<uint32_t>(eventPool.get(), &eventDesc, device))));
|
||||
}
|
||||
|
||||
return eventPool;
|
||||
}
|
||||
|
||||
DebugManagerStateRestore restorer;
|
||||
|
||||
ze_result_t returnValue = ZE_RESULT_SUCCESS;
|
||||
ze_group_count_t groupCount = {3, 2, 1};
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
|
||||
std::vector<std::unique_ptr<MockEvent>> events;
|
||||
};
|
||||
|
||||
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenAppendCalledThenHandleEventAssignment, MatchAny) {
|
||||
ze_command_list_handle_t cmdListHandle;
|
||||
ze_command_queue_desc_t queueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC};
|
||||
queueDesc.ordinal = 0;
|
||||
queueDesc.index = 0;
|
||||
device->createCommandListImmediate(&queueDesc, &cmdListHandle);
|
||||
auto cmdList = static_cast<L0::CommandListCoreFamilyImmediate<gfxCoreFamily> *>(CommandList::fromHandle(cmdListHandle));
|
||||
auto immCmdList = static_cast<WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>> *>(cmdList);
|
||||
|
||||
EXPECT_TRUE(immCmdList->isInOrderExecutionEnabled());
|
||||
|
||||
auto eventPool = createEvents(1);
|
||||
|
||||
EXPECT_TRUE(immCmdList->latestInOrderOperationCompleted);
|
||||
EXPECT_EQ(nullptr, immCmdList->latestSentInOrderEvent);
|
||||
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false);
|
||||
|
||||
EXPECT_FALSE(immCmdList->latestInOrderOperationCompleted);
|
||||
EXPECT_EQ(events[0]->toHandle(), immCmdList->latestSentInOrderEvent);
|
||||
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
|
||||
EXPECT_FALSE(immCmdList->latestInOrderOperationCompleted);
|
||||
EXPECT_EQ(nullptr, immCmdList->latestSentInOrderEvent);
|
||||
|
||||
CommandList::fromHandle(cmdListHandle)->destroy();
|
||||
}
|
||||
|
||||
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenResetEventCalledThenResetCmdList, MatchAny) {
|
||||
ze_command_list_handle_t cmdListHandle;
|
||||
ze_command_queue_desc_t queueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC};
|
||||
queueDesc.ordinal = 0;
|
||||
queueDesc.index = 0;
|
||||
device->createCommandListImmediate(&queueDesc, &cmdListHandle);
|
||||
auto cmdList = static_cast<L0::CommandListCoreFamilyImmediate<gfxCoreFamily> *>(CommandList::fromHandle(cmdListHandle));
|
||||
auto immCmdList = static_cast<WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>> *>(cmdList);
|
||||
|
||||
auto eventPool = createEvents(3);
|
||||
|
||||
EXPECT_TRUE(immCmdList->latestInOrderOperationCompleted);
|
||||
EXPECT_EQ(nullptr, immCmdList->latestSentInOrderEvent);
|
||||
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false);
|
||||
|
||||
EXPECT_FALSE(immCmdList->latestInOrderOperationCompleted);
|
||||
EXPECT_EQ(events[0]->toHandle(), immCmdList->latestSentInOrderEvent);
|
||||
|
||||
events[0]->reset();
|
||||
|
||||
EXPECT_TRUE(immCmdList->latestInOrderOperationCompleted);
|
||||
EXPECT_EQ(nullptr, immCmdList->latestSentInOrderEvent);
|
||||
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, events[1]->toHandle(), 0, nullptr, launchParams, false);
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, events[2]->toHandle(), 0, nullptr, launchParams, false);
|
||||
|
||||
// reset unused event
|
||||
events[1]->reset();
|
||||
EXPECT_FALSE(immCmdList->latestInOrderOperationCompleted);
|
||||
EXPECT_EQ(events[2]->toHandle(), immCmdList->latestSentInOrderEvent);
|
||||
|
||||
// destroy
|
||||
events[2]->destroy();
|
||||
events[2].release();
|
||||
|
||||
EXPECT_TRUE(immCmdList->latestInOrderOperationCompleted);
|
||||
EXPECT_EQ(nullptr, immCmdList->latestSentInOrderEvent);
|
||||
|
||||
CommandList::fromHandle(cmdListHandle)->destroy();
|
||||
}
|
||||
|
||||
struct CommandListAppendLaunchKernelWithImplicitArgs : CommandListAppendLaunchKernel {
|
||||
template <typename FamilyType>
|
||||
uint64_t getIndirectHeapOffsetForImplicitArgsBuffer(const Mock<::L0::Kernel> &kernel) {
|
||||
|
||||
@@ -234,6 +234,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, DisableScratchPages, -1, "-1: default, 0: do not
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, OptimizeIoqBarriersHandling, -1, "-1: default, 0: disable, 1: enable. If enabled, dont dispatch stalling commands for IOQ. Instead, inherit TimestampPackets from previous enqueue.")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, ExitOnSubmissionNumber, -1, "Call exit(0) on X submission. >=0: submission count (start from 0)")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, ExitOnSubmissionMode, 0, "Exit on X submission mode. 0: Any context type, 1: Compute context only, 2: Copy context only ")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, ForceInOrderImmediateCmdListExecution, -1, "-1: default, 0: disabled, 1: all Immediate Command Lists are switched to in-order execution")
|
||||
DECLARE_DEBUG_VARIABLE(int64_t, OverrideEventSynchronizeTimeout, -1, "-1: default - user provided timeout value, >0: timeout in nanoseconds")
|
||||
|
||||
/*LOGGING FLAGS*/
|
||||
|
||||
@@ -518,3 +518,4 @@ AllocateHostAllocationsInHeapExtended = 1
|
||||
DirectSubmissionControllerMaxTimeout = -1
|
||||
ExitOnSubmissionNumber = -1
|
||||
ExitOnSubmissionMode = 0
|
||||
ForceInOrderImmediateCmdListExecution = -1
|
||||
Reference in New Issue
Block a user