mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 23:03:02 +08:00
feature: signal sync alloc in in-order CmdList appendSignalEvent path
Related-To: NEO-7966 Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
ef18464353
commit
5f559ec2d5
@@ -322,6 +322,7 @@ struct CommandListCoreFamily : CommandListImp {
|
||||
void postInitComputeSetup();
|
||||
NEO::PreemptionMode obtainKernelPreemptionMode(Kernel *kernel);
|
||||
void obtainNewTimestampPacketNode();
|
||||
virtual bool isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents) const { return false; }
|
||||
};
|
||||
|
||||
template <PRODUCT_FAMILY gfxProductFamily>
|
||||
|
||||
@@ -2088,6 +2088,10 @@ inline ze_result_t CommandListCoreFamily<gfxCoreFamily>::addEventsToCmdList(uint
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_handle_t hEvent) {
|
||||
if (this->inOrderExecutionEnabled) {
|
||||
addEventsToCmdList(0, nullptr, isRelaxedOrderingDispatchAllowed(0), false);
|
||||
}
|
||||
|
||||
auto event = Event::fromHandle(hEvent);
|
||||
event->resetKernelCountAndPacketUsedCount();
|
||||
|
||||
@@ -2107,6 +2111,14 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_han
|
||||
bool appendPipeControlWithPostSync = (!isCopyOnly()) && (event->isSignalScope() || event->isEventTimestampFlagSet());
|
||||
dispatchEventPostSyncOperation(event, Event::STATE_SIGNALED, false, false, appendPipeControlWithPostSync);
|
||||
|
||||
if (this->inOrderExecutionEnabled) {
|
||||
obtainNewTimestampPacketNode();
|
||||
|
||||
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(1, &hEvent, false, false, false);
|
||||
|
||||
appendSignalInOrderDependencyTimestampPacket();
|
||||
}
|
||||
|
||||
if (NEO::DebugManager.flags.EnableSWTags.get()) {
|
||||
neoDevice->getRootDeviceEnvironment().tagsManager->insertTag<GfxFamily, NEO::SWTags::CallNameEndTag>(
|
||||
*commandContainer.getCommandStream(),
|
||||
|
||||
@@ -174,7 +174,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
|
||||
TransferType getTransferType(NEO::SvmAllocationData *dstAlloc, NEO::SvmAllocationData *srcAlloc);
|
||||
size_t getTransferThreshold(TransferType transferType);
|
||||
bool isBarrierRequired();
|
||||
bool isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents) const;
|
||||
bool isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents) const override;
|
||||
|
||||
protected:
|
||||
using BaseClass::deferredTimestampPackets;
|
||||
|
||||
@@ -307,18 +307,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
}
|
||||
|
||||
if (inOrderExecSignalRequired && event) {
|
||||
using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT;
|
||||
auto gpuAddr = event->getCompletionFieldGpuAddress(this->device);
|
||||
|
||||
uint32_t packetsToWait = this->signalAllEventPackets ? event->getMaxPacketsCount() : event->getPacketsInUse();
|
||||
|
||||
for (uint32_t i = 0u; i < packetsToWait; i++) {
|
||||
NEO::EncodeSemaphore<GfxFamily>::addMiSemaphoreWaitCommand(*commandContainer.getCommandStream(),
|
||||
gpuAddr,
|
||||
Event::State::STATE_CLEARED,
|
||||
MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD);
|
||||
gpuAddr += event->getSinglePacketSize();
|
||||
}
|
||||
auto eventHandle = event->toHandle();
|
||||
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(1, &eventHandle, false, false, false);
|
||||
|
||||
appendSignalInOrderDependencyTimestampPacket();
|
||||
}
|
||||
|
||||
@@ -75,6 +75,7 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
|
||||
using BaseClass::indirectAllocationsAllowed;
|
||||
using BaseClass::initialize;
|
||||
using BaseClass::isFlushTaskSubmissionEnabled;
|
||||
using BaseClass::isRelaxedOrderingDispatchAllowed;
|
||||
using BaseClass::isSyncModeQueue;
|
||||
using BaseClass::isTbxMode;
|
||||
using BaseClass::isTimestampEventForMultiTile;
|
||||
|
||||
@@ -465,6 +465,13 @@ HWTEST2_F(CommandListTest, givenImmediateCommandListWhenFlushImmediateThenOverri
|
||||
EXPECT_EQ(event->csrs[0], cmdList.csr);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListTest, givenRegularCmdListWhenAskingForRelaxedOrderingThenReturnFalse, IsAtLeastSkl) {
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||
|
||||
EXPECT_FALSE(commandList->isRelaxedOrderingDispatchAllowed(5));
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListTest,
|
||||
givenComputeCommandListAnd2dRegionWhenMemoryCopyRegionInExternalHostAllocationCalledThenBuiltinFlagAndDestinationAllocSystemIsSet, IsAtLeastSkl) {
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
|
||||
@@ -1143,6 +1143,75 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingTimestampEventThen
|
||||
EXPECT_EQ(0u, sdiCmd->getDataDword0());
|
||||
}
|
||||
|
||||
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendSignalEventThenSignalSyncAllocation, IsAtLeastXeHpCore) {
|
||||
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
|
||||
auto immCmdList = createImmCmdList<gfxCoreFamily>();
|
||||
|
||||
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
|
||||
|
||||
auto eventPool = createEvents<FamilyType>(1, true);
|
||||
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
|
||||
auto offset = cmdStream->getUsed();
|
||||
|
||||
immCmdList->appendSignalEvent(events[0]->toHandle());
|
||||
|
||||
EXPECT_EQ(1u, immCmdList->timestampPacketContainer->peekNodes().size());
|
||||
EXPECT_EQ(1u, immCmdList->deferredTimestampPackets->peekNodes().size());
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||
ptrOffset(cmdStream->getCpuBase(), offset),
|
||||
(cmdStream->getUsed() - offset)));
|
||||
|
||||
{
|
||||
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*cmdList.begin());
|
||||
|
||||
ASSERT_NE(nullptr, semaphoreCmd);
|
||||
|
||||
auto previousNode = immCmdList->deferredTimestampPackets->peekNodes()[0];
|
||||
uint64_t nodeGpuVa = previousNode->getGpuAddress() + previousNode->getContextEndOffset();
|
||||
|
||||
EXPECT_EQ(TimestampPacketConstants::initValue, semaphoreCmd->getSemaphoreDataDword());
|
||||
EXPECT_EQ(nodeGpuVa, semaphoreCmd->getSemaphoreGraphicsAddress());
|
||||
EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD, semaphoreCmd->getCompareOperation());
|
||||
}
|
||||
|
||||
{
|
||||
|
||||
auto rbeginItor = cmdList.rbegin();
|
||||
|
||||
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*rbeginItor);
|
||||
while (sdiCmd == nullptr) {
|
||||
sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*(++rbeginItor));
|
||||
if (rbeginItor == cmdList.rend()) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT_NE(nullptr, sdiCmd);
|
||||
|
||||
auto node = getLatestTsNode(immCmdList.get());
|
||||
uint64_t nodeGpuVa = node->getGpuAddress() + node->getContextEndOffset();
|
||||
|
||||
EXPECT_EQ(nodeGpuVa, sdiCmd->getAddress());
|
||||
EXPECT_EQ(0u, sdiCmd->getStoreQword());
|
||||
EXPECT_EQ(0u, sdiCmd->getDataDword0());
|
||||
|
||||
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*(++rbeginItor));
|
||||
ASSERT_NE(nullptr, semaphoreCmd);
|
||||
|
||||
auto eventEndGpuVa = events[0]->getCompletionFieldGpuAddress(device);
|
||||
|
||||
EXPECT_EQ(static_cast<uint32_t>(Event::State::STATE_CLEARED), semaphoreCmd->getSemaphoreDataDword());
|
||||
EXPECT_EQ(eventEndGpuVa, semaphoreCmd->getSemaphoreGraphicsAddress());
|
||||
EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD, semaphoreCmd->getCompareOperation());
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingKernelSplitThenDontSignalFromWalker, IsAtLeastXeHpCore) {
|
||||
using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER;
|
||||
using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA;
|
||||
|
||||
Reference in New Issue
Block a user