feature: signal sync alloc in in-order CmdList appendSignalEvent path

Related-To: NEO-7966

Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2023-06-02 12:17:18 +00:00
committed by Compute-Runtime-Automation
parent ef18464353
commit 5f559ec2d5
7 changed files with 93 additions and 13 deletions

View File

@@ -322,6 +322,7 @@ struct CommandListCoreFamily : CommandListImp {
void postInitComputeSetup();
NEO::PreemptionMode obtainKernelPreemptionMode(Kernel *kernel);
void obtainNewTimestampPacketNode();
virtual bool isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents) const { return false; }
};
template <PRODUCT_FAMILY gfxProductFamily>

View File

@@ -2088,6 +2088,10 @@ inline ze_result_t CommandListCoreFamily<gfxCoreFamily>::addEventsToCmdList(uint
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_handle_t hEvent) {
if (this->inOrderExecutionEnabled) {
addEventsToCmdList(0, nullptr, isRelaxedOrderingDispatchAllowed(0), false);
}
auto event = Event::fromHandle(hEvent);
event->resetKernelCountAndPacketUsedCount();
@@ -2107,6 +2111,14 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_han
bool appendPipeControlWithPostSync = (!isCopyOnly()) && (event->isSignalScope() || event->isEventTimestampFlagSet());
dispatchEventPostSyncOperation(event, Event::STATE_SIGNALED, false, false, appendPipeControlWithPostSync);
if (this->inOrderExecutionEnabled) {
obtainNewTimestampPacketNode();
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(1, &hEvent, false, false, false);
appendSignalInOrderDependencyTimestampPacket();
}
if (NEO::DebugManager.flags.EnableSWTags.get()) {
neoDevice->getRootDeviceEnvironment().tagsManager->insertTag<GfxFamily, NEO::SWTags::CallNameEndTag>(
*commandContainer.getCommandStream(),

View File

@@ -174,7 +174,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
TransferType getTransferType(NEO::SvmAllocationData *dstAlloc, NEO::SvmAllocationData *srcAlloc);
size_t getTransferThreshold(TransferType transferType);
bool isBarrierRequired();
bool isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents) const;
bool isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents) const override;
protected:
using BaseClass::deferredTimestampPackets;

View File

@@ -307,18 +307,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
}
if (inOrderExecSignalRequired && event) {
using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT;
auto gpuAddr = event->getCompletionFieldGpuAddress(this->device);
uint32_t packetsToWait = this->signalAllEventPackets ? event->getMaxPacketsCount() : event->getPacketsInUse();
for (uint32_t i = 0u; i < packetsToWait; i++) {
NEO::EncodeSemaphore<GfxFamily>::addMiSemaphoreWaitCommand(*commandContainer.getCommandStream(),
gpuAddr,
Event::State::STATE_CLEARED,
MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD);
gpuAddr += event->getSinglePacketSize();
}
auto eventHandle = event->toHandle();
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(1, &eventHandle, false, false, false);
appendSignalInOrderDependencyTimestampPacket();
}

View File

@@ -75,6 +75,7 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
using BaseClass::indirectAllocationsAllowed;
using BaseClass::initialize;
using BaseClass::isFlushTaskSubmissionEnabled;
using BaseClass::isRelaxedOrderingDispatchAllowed;
using BaseClass::isSyncModeQueue;
using BaseClass::isTbxMode;
using BaseClass::isTimestampEventForMultiTile;

View File

@@ -465,6 +465,13 @@ HWTEST2_F(CommandListTest, givenImmediateCommandListWhenFlushImmediateThenOverri
EXPECT_EQ(event->csrs[0], cmdList.csr);
}
HWTEST2_F(CommandListTest, givenRegularCmdListWhenAskingForRelaxedOrderingThenReturnFalse, IsAtLeastSkl) {
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
EXPECT_FALSE(commandList->isRelaxedOrderingDispatchAllowed(5));
}
HWTEST2_F(CommandListTest,
givenComputeCommandListAnd2dRegionWhenMemoryCopyRegionInExternalHostAllocationCalledThenBuiltinFlagAndDestinationAllocSystemIsSet, IsAtLeastSkl) {
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();

View File

@@ -1143,6 +1143,75 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingTimestampEventThen
EXPECT_EQ(0u, sdiCmd->getDataDword0());
}
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendSignalEventThenSignalSyncAllocation, IsAtLeastXeHpCore) {
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
auto immCmdList = createImmCmdList<gfxCoreFamily>();
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
auto eventPool = createEvents<FamilyType>(1, true);
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
auto offset = cmdStream->getUsed();
immCmdList->appendSignalEvent(events[0]->toHandle());
EXPECT_EQ(1u, immCmdList->timestampPacketContainer->peekNodes().size());
EXPECT_EQ(1u, immCmdList->deferredTimestampPackets->peekNodes().size());
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
ptrOffset(cmdStream->getCpuBase(), offset),
(cmdStream->getUsed() - offset)));
{
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*cmdList.begin());
ASSERT_NE(nullptr, semaphoreCmd);
auto previousNode = immCmdList->deferredTimestampPackets->peekNodes()[0];
uint64_t nodeGpuVa = previousNode->getGpuAddress() + previousNode->getContextEndOffset();
EXPECT_EQ(TimestampPacketConstants::initValue, semaphoreCmd->getSemaphoreDataDword());
EXPECT_EQ(nodeGpuVa, semaphoreCmd->getSemaphoreGraphicsAddress());
EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD, semaphoreCmd->getCompareOperation());
}
{
auto rbeginItor = cmdList.rbegin();
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*rbeginItor);
while (sdiCmd == nullptr) {
sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*(++rbeginItor));
if (rbeginItor == cmdList.rend()) {
break;
}
}
ASSERT_NE(nullptr, sdiCmd);
auto node = getLatestTsNode(immCmdList.get());
uint64_t nodeGpuVa = node->getGpuAddress() + node->getContextEndOffset();
EXPECT_EQ(nodeGpuVa, sdiCmd->getAddress());
EXPECT_EQ(0u, sdiCmd->getStoreQword());
EXPECT_EQ(0u, sdiCmd->getDataDword0());
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*(++rbeginItor));
ASSERT_NE(nullptr, semaphoreCmd);
auto eventEndGpuVa = events[0]->getCompletionFieldGpuAddress(device);
EXPECT_EQ(static_cast<uint32_t>(Event::State::STATE_CLEARED), semaphoreCmd->getSemaphoreDataDword());
EXPECT_EQ(eventEndGpuVa, semaphoreCmd->getSemaphoreGraphicsAddress());
EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD, semaphoreCmd->getCompareOperation());
}
}
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingKernelSplitThenDontSignalFromWalker, IsAtLeastXeHpCore) {
using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER;
using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA;