mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-08 14:02:58 +08:00
feature: signal sync alloc in in-order CmdList appendSignalEvent path
Related-To: NEO-7966 Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
ef18464353
commit
5f559ec2d5
@@ -322,6 +322,7 @@ struct CommandListCoreFamily : CommandListImp {
|
|||||||
void postInitComputeSetup();
|
void postInitComputeSetup();
|
||||||
NEO::PreemptionMode obtainKernelPreemptionMode(Kernel *kernel);
|
NEO::PreemptionMode obtainKernelPreemptionMode(Kernel *kernel);
|
||||||
void obtainNewTimestampPacketNode();
|
void obtainNewTimestampPacketNode();
|
||||||
|
virtual bool isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents) const { return false; }
|
||||||
};
|
};
|
||||||
|
|
||||||
template <PRODUCT_FAMILY gfxProductFamily>
|
template <PRODUCT_FAMILY gfxProductFamily>
|
||||||
|
|||||||
@@ -2088,6 +2088,10 @@ inline ze_result_t CommandListCoreFamily<gfxCoreFamily>::addEventsToCmdList(uint
|
|||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_handle_t hEvent) {
|
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_handle_t hEvent) {
|
||||||
|
if (this->inOrderExecutionEnabled) {
|
||||||
|
addEventsToCmdList(0, nullptr, isRelaxedOrderingDispatchAllowed(0), false);
|
||||||
|
}
|
||||||
|
|
||||||
auto event = Event::fromHandle(hEvent);
|
auto event = Event::fromHandle(hEvent);
|
||||||
event->resetKernelCountAndPacketUsedCount();
|
event->resetKernelCountAndPacketUsedCount();
|
||||||
|
|
||||||
@@ -2107,6 +2111,14 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_han
|
|||||||
bool appendPipeControlWithPostSync = (!isCopyOnly()) && (event->isSignalScope() || event->isEventTimestampFlagSet());
|
bool appendPipeControlWithPostSync = (!isCopyOnly()) && (event->isSignalScope() || event->isEventTimestampFlagSet());
|
||||||
dispatchEventPostSyncOperation(event, Event::STATE_SIGNALED, false, false, appendPipeControlWithPostSync);
|
dispatchEventPostSyncOperation(event, Event::STATE_SIGNALED, false, false, appendPipeControlWithPostSync);
|
||||||
|
|
||||||
|
if (this->inOrderExecutionEnabled) {
|
||||||
|
obtainNewTimestampPacketNode();
|
||||||
|
|
||||||
|
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(1, &hEvent, false, false, false);
|
||||||
|
|
||||||
|
appendSignalInOrderDependencyTimestampPacket();
|
||||||
|
}
|
||||||
|
|
||||||
if (NEO::DebugManager.flags.EnableSWTags.get()) {
|
if (NEO::DebugManager.flags.EnableSWTags.get()) {
|
||||||
neoDevice->getRootDeviceEnvironment().tagsManager->insertTag<GfxFamily, NEO::SWTags::CallNameEndTag>(
|
neoDevice->getRootDeviceEnvironment().tagsManager->insertTag<GfxFamily, NEO::SWTags::CallNameEndTag>(
|
||||||
*commandContainer.getCommandStream(),
|
*commandContainer.getCommandStream(),
|
||||||
|
|||||||
@@ -174,7 +174,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
|
|||||||
TransferType getTransferType(NEO::SvmAllocationData *dstAlloc, NEO::SvmAllocationData *srcAlloc);
|
TransferType getTransferType(NEO::SvmAllocationData *dstAlloc, NEO::SvmAllocationData *srcAlloc);
|
||||||
size_t getTransferThreshold(TransferType transferType);
|
size_t getTransferThreshold(TransferType transferType);
|
||||||
bool isBarrierRequired();
|
bool isBarrierRequired();
|
||||||
bool isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents) const;
|
bool isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents) const override;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
using BaseClass::deferredTimestampPackets;
|
using BaseClass::deferredTimestampPackets;
|
||||||
|
|||||||
@@ -307,18 +307,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (inOrderExecSignalRequired && event) {
|
if (inOrderExecSignalRequired && event) {
|
||||||
using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT;
|
auto eventHandle = event->toHandle();
|
||||||
auto gpuAddr = event->getCompletionFieldGpuAddress(this->device);
|
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(1, &eventHandle, false, false, false);
|
||||||
|
|
||||||
uint32_t packetsToWait = this->signalAllEventPackets ? event->getMaxPacketsCount() : event->getPacketsInUse();
|
|
||||||
|
|
||||||
for (uint32_t i = 0u; i < packetsToWait; i++) {
|
|
||||||
NEO::EncodeSemaphore<GfxFamily>::addMiSemaphoreWaitCommand(*commandContainer.getCommandStream(),
|
|
||||||
gpuAddr,
|
|
||||||
Event::State::STATE_CLEARED,
|
|
||||||
MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD);
|
|
||||||
gpuAddr += event->getSinglePacketSize();
|
|
||||||
}
|
|
||||||
|
|
||||||
appendSignalInOrderDependencyTimestampPacket();
|
appendSignalInOrderDependencyTimestampPacket();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -75,6 +75,7 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
|
|||||||
using BaseClass::indirectAllocationsAllowed;
|
using BaseClass::indirectAllocationsAllowed;
|
||||||
using BaseClass::initialize;
|
using BaseClass::initialize;
|
||||||
using BaseClass::isFlushTaskSubmissionEnabled;
|
using BaseClass::isFlushTaskSubmissionEnabled;
|
||||||
|
using BaseClass::isRelaxedOrderingDispatchAllowed;
|
||||||
using BaseClass::isSyncModeQueue;
|
using BaseClass::isSyncModeQueue;
|
||||||
using BaseClass::isTbxMode;
|
using BaseClass::isTbxMode;
|
||||||
using BaseClass::isTimestampEventForMultiTile;
|
using BaseClass::isTimestampEventForMultiTile;
|
||||||
|
|||||||
@@ -465,6 +465,13 @@ HWTEST2_F(CommandListTest, givenImmediateCommandListWhenFlushImmediateThenOverri
|
|||||||
EXPECT_EQ(event->csrs[0], cmdList.csr);
|
EXPECT_EQ(event->csrs[0], cmdList.csr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(CommandListTest, givenRegularCmdListWhenAskingForRelaxedOrderingThenReturnFalse, IsAtLeastSkl) {
|
||||||
|
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||||
|
commandList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u);
|
||||||
|
|
||||||
|
EXPECT_FALSE(commandList->isRelaxedOrderingDispatchAllowed(5));
|
||||||
|
}
|
||||||
|
|
||||||
HWTEST2_F(CommandListTest,
|
HWTEST2_F(CommandListTest,
|
||||||
givenComputeCommandListAnd2dRegionWhenMemoryCopyRegionInExternalHostAllocationCalledThenBuiltinFlagAndDestinationAllocSystemIsSet, IsAtLeastSkl) {
|
givenComputeCommandListAnd2dRegionWhenMemoryCopyRegionInExternalHostAllocationCalledThenBuiltinFlagAndDestinationAllocSystemIsSet, IsAtLeastSkl) {
|
||||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||||
|
|||||||
@@ -1143,6 +1143,75 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingTimestampEventThen
|
|||||||
EXPECT_EQ(0u, sdiCmd->getDataDword0());
|
EXPECT_EQ(0u, sdiCmd->getDataDword0());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendSignalEventThenSignalSyncAllocation, IsAtLeastXeHpCore) {
|
||||||
|
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
|
||||||
|
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||||
|
|
||||||
|
auto immCmdList = createImmCmdList<gfxCoreFamily>();
|
||||||
|
|
||||||
|
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
|
||||||
|
|
||||||
|
auto eventPool = createEvents<FamilyType>(1, true);
|
||||||
|
|
||||||
|
immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||||
|
|
||||||
|
auto offset = cmdStream->getUsed();
|
||||||
|
|
||||||
|
immCmdList->appendSignalEvent(events[0]->toHandle());
|
||||||
|
|
||||||
|
EXPECT_EQ(1u, immCmdList->timestampPacketContainer->peekNodes().size());
|
||||||
|
EXPECT_EQ(1u, immCmdList->deferredTimestampPackets->peekNodes().size());
|
||||||
|
|
||||||
|
GenCmdList cmdList;
|
||||||
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||||
|
ptrOffset(cmdStream->getCpuBase(), offset),
|
||||||
|
(cmdStream->getUsed() - offset)));
|
||||||
|
|
||||||
|
{
|
||||||
|
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*cmdList.begin());
|
||||||
|
|
||||||
|
ASSERT_NE(nullptr, semaphoreCmd);
|
||||||
|
|
||||||
|
auto previousNode = immCmdList->deferredTimestampPackets->peekNodes()[0];
|
||||||
|
uint64_t nodeGpuVa = previousNode->getGpuAddress() + previousNode->getContextEndOffset();
|
||||||
|
|
||||||
|
EXPECT_EQ(TimestampPacketConstants::initValue, semaphoreCmd->getSemaphoreDataDword());
|
||||||
|
EXPECT_EQ(nodeGpuVa, semaphoreCmd->getSemaphoreGraphicsAddress());
|
||||||
|
EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD, semaphoreCmd->getCompareOperation());
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
|
||||||
|
auto rbeginItor = cmdList.rbegin();
|
||||||
|
|
||||||
|
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*rbeginItor);
|
||||||
|
while (sdiCmd == nullptr) {
|
||||||
|
sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*(++rbeginItor));
|
||||||
|
if (rbeginItor == cmdList.rend()) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ASSERT_NE(nullptr, sdiCmd);
|
||||||
|
|
||||||
|
auto node = getLatestTsNode(immCmdList.get());
|
||||||
|
uint64_t nodeGpuVa = node->getGpuAddress() + node->getContextEndOffset();
|
||||||
|
|
||||||
|
EXPECT_EQ(nodeGpuVa, sdiCmd->getAddress());
|
||||||
|
EXPECT_EQ(0u, sdiCmd->getStoreQword());
|
||||||
|
EXPECT_EQ(0u, sdiCmd->getDataDword0());
|
||||||
|
|
||||||
|
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*(++rbeginItor));
|
||||||
|
ASSERT_NE(nullptr, semaphoreCmd);
|
||||||
|
|
||||||
|
auto eventEndGpuVa = events[0]->getCompletionFieldGpuAddress(device);
|
||||||
|
|
||||||
|
EXPECT_EQ(static_cast<uint32_t>(Event::State::STATE_CLEARED), semaphoreCmd->getSemaphoreDataDword());
|
||||||
|
EXPECT_EQ(eventEndGpuVa, semaphoreCmd->getSemaphoreGraphicsAddress());
|
||||||
|
EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD, semaphoreCmd->getCompareOperation());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingKernelSplitThenDontSignalFromWalker, IsAtLeastXeHpCore) {
|
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingKernelSplitThenDontSignalFromWalker, IsAtLeastXeHpCore) {
|
||||||
using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER;
|
using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER;
|
||||||
using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA;
|
using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA;
|
||||||
|
|||||||
Reference in New Issue
Block a user