feature: initial support of in-order regular cmd lists
Related-To: NEO-7966 Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
parent
8e989fa333
commit
d5d43ead7c
|
@ -327,6 +327,7 @@ struct CommandListCoreFamily : CommandListImp {
|
||||||
virtual bool isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents) const { return false; }
|
virtual bool isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents) const { return false; }
|
||||||
virtual void setupFlushMethod(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) {}
|
virtual void setupFlushMethod(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) {}
|
||||||
bool isInOrderEventWaitRequired(const Event &event) const;
|
bool isInOrderEventWaitRequired(const Event &event) const;
|
||||||
|
virtual bool useCounterAllocationForInOrderMode() const { return false; }
|
||||||
};
|
};
|
||||||
|
|
||||||
template <PRODUCT_FAMILY gfxProductFamily>
|
template <PRODUCT_FAMILY gfxProductFamily>
|
||||||
|
|
|
@ -135,6 +135,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::reset() {
|
||||||
cmdListCurrentStartOffset = 0;
|
cmdListCurrentStartOffset = 0;
|
||||||
|
|
||||||
mappedTsEventList.clear();
|
mappedTsEventList.clear();
|
||||||
|
|
||||||
|
inOrderDependencyCounter = 0;
|
||||||
|
inOrderAllocationOffset = 0;
|
||||||
|
|
||||||
return ZE_RESULT_SUCCESS;
|
return ZE_RESULT_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1387,7 +1391,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
||||||
addFlushRequiredCommand(dstAllocationStruct.needsFlush, signalEvent);
|
addFlushRequiredCommand(dstAllocationStruct.needsFlush, signalEvent);
|
||||||
addToMappedEventList(signalEvent);
|
addToMappedEventList(signalEvent);
|
||||||
|
|
||||||
if (this->inOrderExecutionEnabled && (launchParams.isKernelSplitOperation || inOrderCopyOnlySignalingAllowed)) {
|
if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode() && (launchParams.isKernelSplitOperation || inOrderCopyOnlySignalingAllowed)) {
|
||||||
if (!signalEvent && !isCopyOnly()) {
|
if (!signalEvent && !isCopyOnly()) {
|
||||||
NEO::PipeControlArgs args;
|
NEO::PipeControlArgs args;
|
||||||
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(*commandContainer.getCommandStream(), args);
|
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(*commandContainer.getCommandStream(), args);
|
||||||
|
@ -2130,7 +2134,11 @@ inline ze_result_t CommandListCoreFamily<gfxCoreFamily>::addEventsToCmdList(uint
|
||||||
}
|
}
|
||||||
|
|
||||||
if (hasInOrderDependencies) {
|
if (hasInOrderDependencies) {
|
||||||
|
if (useCounterAllocationForInOrderMode()) {
|
||||||
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(this->inOrderDependencyCounterAllocation, this->inOrderDependencyCounter, this->inOrderAllocationOffset, relaxedOrderingAllowed);
|
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(this->inOrderDependencyCounterAllocation, this->inOrderDependencyCounter, this->inOrderAllocationOffset, relaxedOrderingAllowed);
|
||||||
|
} else if (!isCopyOnly()) {
|
||||||
|
appendComputeBarrierCommand();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (numWaitEvents > 0) {
|
if (numWaitEvents > 0) {
|
||||||
|
@ -2141,6 +2149,10 @@ inline ze_result_t CommandListCoreFamily<gfxCoreFamily>::addEventsToCmdList(uint
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (cmdListType == TYPE_REGULAR && this->inOrderExecutionEnabled && !hasInOrderDependencies) {
|
||||||
|
inOrderDependencyCounter++; // First append is without dependencies. Increment counter to program barrier on next calls.
|
||||||
|
}
|
||||||
|
|
||||||
return ZE_RESULT_SUCCESS;
|
return ZE_RESULT_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2263,6 +2275,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
|
||||||
}
|
}
|
||||||
|
|
||||||
if (event->isInOrderExecEvent()) {
|
if (event->isInOrderExecEvent()) {
|
||||||
|
UNRECOVERABLE_IF(this->cmdListType != TYPE_IMMEDIATE);
|
||||||
if (isInOrderEventWaitRequired(*event)) {
|
if (isInOrderEventWaitRequired(*event)) {
|
||||||
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(event->getInOrderExecDataAllocation(), event->getInOrderExecSignalValue(), event->getInOrderAllocationOffset(), relaxedOrderingAllowed);
|
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(event->getInOrderExecDataAllocation(), event->getInOrderExecSignalValue(), event->getInOrderAllocationOffset(), relaxedOrderingAllowed);
|
||||||
}
|
}
|
||||||
|
|
|
@ -188,6 +188,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
|
||||||
void setupFlushMethod(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) override;
|
void setupFlushMethod(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) override;
|
||||||
void handleInOrderDependencyCounter();
|
void handleInOrderDependencyCounter();
|
||||||
bool isSkippingInOrderBarrierAllowed(ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) const;
|
bool isSkippingInOrderBarrierAllowed(ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) const;
|
||||||
|
bool useCounterAllocationForInOrderMode() const override { return true; }
|
||||||
|
|
||||||
MOCKABLE_VIRTUAL void checkAssert();
|
MOCKABLE_VIRTUAL void checkAssert();
|
||||||
ComputeFlushMethodType computeFlushMethod = nullptr;
|
ComputeFlushMethodType computeFlushMethod = nullptr;
|
||||||
|
|
|
@ -284,7 +284,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||||
this->dcFlushSupport // dcFlushEnable
|
this->dcFlushSupport // dcFlushEnable
|
||||||
};
|
};
|
||||||
|
|
||||||
bool inOrderExecSignalRequired = (this->inOrderExecutionEnabled && !launchParams.isKernelSplitOperation);
|
bool inOrderExecSignalRequired = (this->inOrderExecutionEnabled && !launchParams.isKernelSplitOperation && useCounterAllocationForInOrderMode());
|
||||||
|
|
||||||
if (inOrderExecSignalRequired) {
|
if (inOrderExecSignalRequired) {
|
||||||
if (isTimestampEvent) {
|
if (isTimestampEvent) {
|
||||||
|
|
|
@ -76,6 +76,7 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
|
||||||
using BaseClass::indirectAllocationsAllowed;
|
using BaseClass::indirectAllocationsAllowed;
|
||||||
using BaseClass::initialize;
|
using BaseClass::initialize;
|
||||||
using BaseClass::inOrderAllocationOffset;
|
using BaseClass::inOrderAllocationOffset;
|
||||||
|
using BaseClass::inOrderDependencyCounter;
|
||||||
using BaseClass::isFlushTaskSubmissionEnabled;
|
using BaseClass::isFlushTaskSubmissionEnabled;
|
||||||
using BaseClass::isRelaxedOrderingDispatchAllowed;
|
using BaseClass::isRelaxedOrderingDispatchAllowed;
|
||||||
using BaseClass::isSyncModeQueue;
|
using BaseClass::isSyncModeQueue;
|
||||||
|
|
|
@ -726,6 +726,26 @@ struct InOrderCmdListTests : public CommandListAppendLaunchKernel {
|
||||||
return cmdList;
|
return cmdList;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
|
DestroyableZeUniquePtr<WhiteBox<L0::CommandListCoreFamily<gfxCoreFamily>>> createRegularCmdList(bool copyOnly) {
|
||||||
|
auto cmdList = makeZeUniquePtr<WhiteBox<L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||||
|
|
||||||
|
auto csr = device->getNEODevice()->getDefaultEngine().commandStreamReceiver;
|
||||||
|
|
||||||
|
ze_command_queue_desc_t desc = {};
|
||||||
|
|
||||||
|
mockCmdQs.emplace_back(std::make_unique<Mock<CommandQueue>>(device, csr, &desc));
|
||||||
|
|
||||||
|
auto engineType = copyOnly ? EngineGroupType::Copy : EngineGroupType::RenderCompute;
|
||||||
|
|
||||||
|
cmdList->initialize(device, engineType, 0u);
|
||||||
|
cmdList->enableInOrderExecution();
|
||||||
|
|
||||||
|
createdCmdLists++;
|
||||||
|
|
||||||
|
return cmdList;
|
||||||
|
}
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
DestroyableZeUniquePtr<WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>> createCopyOnlyImmCmdList() {
|
DestroyableZeUniquePtr<WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>> createCopyOnlyImmCmdList() {
|
||||||
auto cmdList = createImmCmdList<gfxCoreFamily>();
|
auto cmdList = createImmCmdList<gfxCoreFamily>();
|
||||||
|
@ -2670,6 +2690,121 @@ HWTEST2_F(BcsSplitInOrderCmdListTests, givenBcsSplitEnabledWhenDispatchingCopyRe
|
||||||
EXPECT_EQ(0u, sdiCmd->getDataDword1());
|
EXPECT_EQ(0u, sdiCmd->getDataDword1());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
using InOrderRegularCmdListTests = InOrderCmdListTests;
|
||||||
|
|
||||||
|
HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdListThenProgramPipeControlsToHandleDependencies, IsAtLeastXeHpCore) {
|
||||||
|
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||||
|
using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER;
|
||||||
|
using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA;
|
||||||
|
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
|
||||||
|
|
||||||
|
auto regularCmdList = createRegularCmdList<gfxCoreFamily>(false);
|
||||||
|
|
||||||
|
auto cmdStream = regularCmdList->getCmdContainer().getCommandStream();
|
||||||
|
|
||||||
|
size_t offset = cmdStream->getUsed();
|
||||||
|
|
||||||
|
EXPECT_EQ(0u, regularCmdList->inOrderDependencyCounter);
|
||||||
|
regularCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||||
|
EXPECT_EQ(1u, regularCmdList->inOrderDependencyCounter);
|
||||||
|
|
||||||
|
{
|
||||||
|
GenCmdList cmdList;
|
||||||
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||||
|
ptrOffset(cmdStream->getCpuBase(), offset),
|
||||||
|
(cmdStream->getUsed() - offset)));
|
||||||
|
EXPECT_EQ(nullptr, genCmdCast<PIPE_CONTROL *>(*cmdList.begin()));
|
||||||
|
|
||||||
|
auto walkerItor = find<COMPUTE_WALKER *>(cmdList.begin(), cmdList.end());
|
||||||
|
ASSERT_NE(cmdList.end(), walkerItor);
|
||||||
|
|
||||||
|
auto walkerCmd = genCmdCast<COMPUTE_WALKER *>(*walkerItor);
|
||||||
|
auto &postSync = walkerCmd->getPostSync();
|
||||||
|
|
||||||
|
EXPECT_EQ(POSTSYNC_DATA::OPERATION_NO_WRITE, postSync.getOperation());
|
||||||
|
|
||||||
|
auto sdiItor = find<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
|
||||||
|
EXPECT_EQ(cmdList.end(), sdiItor);
|
||||||
|
}
|
||||||
|
|
||||||
|
offset = cmdStream->getUsed();
|
||||||
|
|
||||||
|
regularCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||||
|
EXPECT_EQ(1u, regularCmdList->inOrderDependencyCounter);
|
||||||
|
|
||||||
|
{
|
||||||
|
GenCmdList cmdList;
|
||||||
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||||
|
ptrOffset(cmdStream->getCpuBase(), offset),
|
||||||
|
(cmdStream->getUsed() - offset)));
|
||||||
|
EXPECT_NE(nullptr, genCmdCast<PIPE_CONTROL *>(*cmdList.begin()));
|
||||||
|
|
||||||
|
auto walkerItor = find<COMPUTE_WALKER *>(cmdList.begin(), cmdList.end());
|
||||||
|
ASSERT_NE(cmdList.end(), walkerItor);
|
||||||
|
|
||||||
|
auto walkerCmd = genCmdCast<COMPUTE_WALKER *>(*walkerItor);
|
||||||
|
auto &postSync = walkerCmd->getPostSync();
|
||||||
|
|
||||||
|
EXPECT_EQ(POSTSYNC_DATA::OPERATION_NO_WRITE, postSync.getOperation());
|
||||||
|
|
||||||
|
auto sdiItor = find<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
|
||||||
|
EXPECT_EQ(cmdList.end(), sdiItor);
|
||||||
|
}
|
||||||
|
|
||||||
|
regularCmdList->inOrderAllocationOffset = 123;
|
||||||
|
|
||||||
|
regularCmdList->reset();
|
||||||
|
EXPECT_EQ(0u, regularCmdList->inOrderDependencyCounter);
|
||||||
|
EXPECT_EQ(0u, regularCmdList->inOrderAllocationOffset);
|
||||||
|
}
|
||||||
|
|
||||||
|
using InOrderRegularCopyOnlyCmdListTests = InOrderCmdListTests;
|
||||||
|
|
||||||
|
HWTEST2_F(InOrderRegularCopyOnlyCmdListTests, givenInOrderModeWhenDispatchingRegularCmdListThenDontProgramBarriers, IsAtLeastXeHpCore) {
|
||||||
|
using XY_COPY_BLT = typename std::remove_const<decltype(FamilyType::cmdInitXyCopyBlt)>::type;
|
||||||
|
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
|
||||||
|
|
||||||
|
auto regularCmdList = createRegularCmdList<gfxCoreFamily>(true);
|
||||||
|
|
||||||
|
auto cmdStream = regularCmdList->getCmdContainer().getCommandStream();
|
||||||
|
|
||||||
|
size_t offset = cmdStream->getUsed();
|
||||||
|
|
||||||
|
auto alignedPtr = alignedMalloc(MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize);
|
||||||
|
|
||||||
|
regularCmdList->appendMemoryCopy(alignedPtr, alignedPtr, MemoryConstants::cacheLineSize, nullptr, 0, nullptr, false, false);
|
||||||
|
|
||||||
|
{
|
||||||
|
GenCmdList cmdList;
|
||||||
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||||
|
ptrOffset(cmdStream->getCpuBase(), offset),
|
||||||
|
(cmdStream->getUsed() - offset)));
|
||||||
|
|
||||||
|
auto sdiItor = find<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
|
||||||
|
EXPECT_EQ(cmdList.end(), sdiItor);
|
||||||
|
}
|
||||||
|
|
||||||
|
offset = cmdStream->getUsed();
|
||||||
|
|
||||||
|
regularCmdList->appendMemoryCopy(alignedPtr, alignedPtr, MemoryConstants::cacheLineSize, nullptr, 0, nullptr, false, false);
|
||||||
|
|
||||||
|
{
|
||||||
|
GenCmdList cmdList;
|
||||||
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||||
|
ptrOffset(cmdStream->getCpuBase(), offset),
|
||||||
|
(cmdStream->getUsed() - offset)));
|
||||||
|
|
||||||
|
auto sdiItor = find<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
|
||||||
|
EXPECT_EQ(cmdList.end(), sdiItor);
|
||||||
|
|
||||||
|
auto copyCmd = genCmdCast<XY_COPY_BLT *>(*cmdList.begin());
|
||||||
|
|
||||||
|
EXPECT_NE(nullptr, copyCmd);
|
||||||
|
}
|
||||||
|
|
||||||
|
alignedFree(alignedPtr);
|
||||||
|
}
|
||||||
|
|
||||||
struct CommandListAppendLaunchKernelWithImplicitArgs : CommandListAppendLaunchKernel {
|
struct CommandListAppendLaunchKernelWithImplicitArgs : CommandListAppendLaunchKernel {
|
||||||
template <typename FamilyType>
|
template <typename FamilyType>
|
||||||
uint64_t getIndirectHeapOffsetForImplicitArgsBuffer(const Mock<::L0::KernelImp> &kernel) {
|
uint64_t getIndirectHeapOffsetForImplicitArgsBuffer(const Mock<::L0::KernelImp> &kernel) {
|
||||||
|
|
Loading…
Reference in New Issue