mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-09 22:43:00 +08:00
fix: improve in-order signaling for compact event
Related-To: NEO-7966 Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
a7e86f40bf
commit
db13d99680
@@ -76,7 +76,7 @@ void programEventL3Flush(Event *event,
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
bool CommandListCoreFamily<gfxCoreFamily>::isInOrderNonWalkerSignalingRequired(const Event *event) const {
|
||||
return (event && (event->isUsingContextEndOffset() || !event->isInOrderExecEvent()));
|
||||
return (event && (event->isUsingContextEndOffset() || !event->isInOrderExecEvent() || compactL3FlushEvent(getDcFlushRequired(event->isSignalScope()))));
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -177,6 +177,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
bool l3FlushEnable = false;
|
||||
bool isHostSignalScopeEvent = launchParams.isHostSignalScopeEvent;
|
||||
Event *compactEvent = nullptr;
|
||||
Event *eventForInOrderExec = event;
|
||||
if (event) {
|
||||
if (kernel->getPrintfBufferAllocation() != nullptr) {
|
||||
event->setKernelForPrintf(kernel);
|
||||
@@ -297,11 +298,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
};
|
||||
|
||||
bool inOrderExecSignalRequired = (this->isInOrderExecutionEnabled() && !launchParams.isKernelSplitOperation);
|
||||
bool inOrderNonWalkerSignalling = isInOrderNonWalkerSignalingRequired(event);
|
||||
bool inOrderNonWalkerSignalling = isInOrderNonWalkerSignalingRequired(eventForInOrderExec);
|
||||
|
||||
if (inOrderExecSignalRequired) {
|
||||
if (inOrderNonWalkerSignalling) {
|
||||
dispatchEventPostSyncOperation(event, Event::STATE_CLEARED, false, false, false, false);
|
||||
dispatchEventPostSyncOperation(eventForInOrderExec, Event::STATE_CLEARED, false, false, false, false);
|
||||
} else {
|
||||
dispatchKernelArgs.eventAddress = inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress() + this->inOrderAllocationOffset;
|
||||
dispatchKernelArgs.postSyncImmValue = inOrderExecInfo->inOrderDependencyCounter + 1;
|
||||
@@ -329,7 +330,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
if (inOrderExecSignalRequired) {
|
||||
if (inOrderNonWalkerSignalling) {
|
||||
if (!launchParams.skipInOrderNonWalkerSignaling) {
|
||||
appendWaitOnSingleEvent(event, false);
|
||||
appendWaitOnSingleEvent(eventForInOrderExec, false);
|
||||
appendSignalInOrderDependencyCounter();
|
||||
}
|
||||
} else {
|
||||
|
||||
@@ -52,6 +52,7 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
|
||||
using BaseClass::commandListPerThreadScratchSize;
|
||||
using BaseClass::commandListPreemptionMode;
|
||||
using BaseClass::commandsToPatch;
|
||||
using BaseClass::compactL3FlushEvent;
|
||||
using BaseClass::compactL3FlushEventPacket;
|
||||
using BaseClass::containsAnyKernel;
|
||||
using BaseClass::containsCooperativeKernelsFlag;
|
||||
@@ -162,6 +163,7 @@ struct WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>
|
||||
using BaseClass::cmdQImmediate;
|
||||
using BaseClass::commandContainer;
|
||||
using BaseClass::commandsToPatch;
|
||||
using BaseClass::compactL3FlushEvent;
|
||||
using BaseClass::compactL3FlushEventPacket;
|
||||
using BaseClass::csr;
|
||||
using BaseClass::device;
|
||||
|
||||
@@ -1473,6 +1473,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingWalkerThenSignalSy
|
||||
using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA;
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
|
||||
uint32_t counterOffset = 64;
|
||||
|
||||
@@ -1482,6 +1483,9 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingWalkerThenSignalSy
|
||||
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
|
||||
|
||||
auto eventPool = createEvents<FamilyType>(1, false);
|
||||
auto eventEndGpuVa = events[0]->getCompletionFieldGpuAddress(device);
|
||||
|
||||
bool isCompactEvent = immCmdList->compactL3FlushEvent(immCmdList->getDcFlushRequired(events[0]->isSignalScope()));
|
||||
|
||||
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
|
||||
@@ -1518,9 +1522,33 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingWalkerThenSignalSy
|
||||
auto walkerCmd = genCmdCast<COMPUTE_WALKER *>(*walkerItor);
|
||||
auto &postSync = walkerCmd->getPostSync();
|
||||
|
||||
EXPECT_EQ(POSTSYNC_DATA::OPERATION_WRITE_IMMEDIATE_DATA, postSync.getOperation());
|
||||
EXPECT_EQ(2u, postSync.getImmediateData());
|
||||
EXPECT_EQ(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress() + counterOffset, postSync.getDestinationAddress());
|
||||
if (isCompactEvent) {
|
||||
EXPECT_EQ(POSTSYNC_DATA::OPERATION_NO_WRITE, postSync.getOperation());
|
||||
|
||||
auto pcItor = find<PIPE_CONTROL *>(walkerItor, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), pcItor);
|
||||
|
||||
auto semaphoreItor = find<MI_SEMAPHORE_WAIT *>(pcItor, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), semaphoreItor);
|
||||
|
||||
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*semaphoreItor);
|
||||
ASSERT_NE(nullptr, semaphoreCmd);
|
||||
|
||||
EXPECT_EQ(static_cast<uint32_t>(Event::State::STATE_CLEARED), semaphoreCmd->getSemaphoreDataDword());
|
||||
EXPECT_EQ(eventEndGpuVa, semaphoreCmd->getSemaphoreGraphicsAddress());
|
||||
EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_NOT_EQUAL_SDD, semaphoreCmd->getCompareOperation());
|
||||
|
||||
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(++semaphoreCmd);
|
||||
ASSERT_NE(nullptr, sdiCmd);
|
||||
|
||||
EXPECT_EQ(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress() + counterOffset, sdiCmd->getAddress());
|
||||
EXPECT_EQ(immCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword());
|
||||
EXPECT_EQ(2u, sdiCmd->getDataDword0());
|
||||
} else {
|
||||
EXPECT_EQ(POSTSYNC_DATA::OPERATION_WRITE_IMMEDIATE_DATA, postSync.getOperation());
|
||||
EXPECT_EQ(2u, postSync.getImmediateData());
|
||||
EXPECT_EQ(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress() + counterOffset, postSync.getDestinationAddress());
|
||||
}
|
||||
}
|
||||
|
||||
auto hostAddress = static_cast<uint64_t *>(ptrOffset(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getUnderlyingBuffer(), counterOffset));
|
||||
@@ -2747,6 +2775,8 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingCounterWithOverflo
|
||||
|
||||
auto eventPool = createEvents<FamilyType>(1, false);
|
||||
|
||||
bool isCompactEvent = immCmdList->compactL3FlushEvent(immCmdList->getDcFlushRequired(events[0]->isSignalScope()));
|
||||
|
||||
auto eventHandle = events[0]->toHandle();
|
||||
|
||||
uint64_t baseGpuVa = immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress();
|
||||
@@ -2759,12 +2789,6 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingCounterWithOverflo
|
||||
auto walkerItor = find<COMPUTE_WALKER *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), walkerItor);
|
||||
|
||||
auto walkerCmd = genCmdCast<COMPUTE_WALKER *>(*walkerItor);
|
||||
auto &postSync = walkerCmd->getPostSync();
|
||||
|
||||
EXPECT_EQ(std::numeric_limits<uint32_t>::max(), postSync.getImmediateData());
|
||||
EXPECT_EQ(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), postSync.getDestinationAddress());
|
||||
|
||||
auto semaphoreItor = find<MI_SEMAPHORE_WAIT *>(walkerItor, cmdList.end());
|
||||
|
||||
uint64_t expectedCounter = 1;
|
||||
@@ -2773,17 +2797,39 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingCounterWithOverflo
|
||||
if (immCmdList->isQwordInOrderCounter()) {
|
||||
expectedCounter = std::numeric_limits<uint32_t>::max();
|
||||
|
||||
EXPECT_EQ(cmdList.end(), semaphoreItor);
|
||||
|
||||
auto walkerCmd = genCmdCast<COMPUTE_WALKER *>(*walkerItor);
|
||||
auto &postSync = walkerCmd->getPostSync();
|
||||
|
||||
EXPECT_EQ(POSTSYNC_DATA::OPERATION_WRITE_IMMEDIATE_DATA, postSync.getOperation());
|
||||
EXPECT_EQ(expectedCounter, postSync.getImmediateData());
|
||||
EXPECT_EQ(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), postSync.getDestinationAddress());
|
||||
if (isCompactEvent) {
|
||||
EXPECT_NE(cmdList.end(), semaphoreItor);
|
||||
|
||||
auto sdiItor = find<MI_STORE_DATA_IMM *>(semaphoreItor, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), sdiItor);
|
||||
|
||||
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*sdiItor);
|
||||
ASSERT_NE(nullptr, sdiCmd);
|
||||
|
||||
EXPECT_EQ(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), sdiCmd->getAddress());
|
||||
EXPECT_EQ(getLowPart(expectedCounter), sdiCmd->getDataDword0());
|
||||
EXPECT_EQ(getHighPart(expectedCounter), sdiCmd->getDataDword1());
|
||||
|
||||
EXPECT_EQ(POSTSYNC_DATA::OPERATION_NO_WRITE, postSync.getOperation());
|
||||
} else {
|
||||
EXPECT_EQ(cmdList.end(), semaphoreItor);
|
||||
|
||||
EXPECT_EQ(POSTSYNC_DATA::OPERATION_WRITE_IMMEDIATE_DATA, postSync.getOperation());
|
||||
EXPECT_EQ(expectedCounter, postSync.getImmediateData());
|
||||
EXPECT_EQ(immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), postSync.getDestinationAddress());
|
||||
}
|
||||
} else {
|
||||
ASSERT_NE(cmdList.end(), semaphoreItor);
|
||||
|
||||
if (isCompactEvent) {
|
||||
// commands chaining
|
||||
semaphoreItor = find<MI_SEMAPHORE_WAIT *>(++semaphoreItor, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), semaphoreItor);
|
||||
}
|
||||
|
||||
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*semaphoreItor);
|
||||
ASSERT_NE(nullptr, semaphoreCmd);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user