feature: bcs split handling for in-order CmdLists

Related-To: NEO-7966

Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2023-10-04 13:59:49 +00:00
committed by Compute-Runtime-Automation
parent 43eb4ea941
commit da8904454b
7 changed files with 214 additions and 31 deletions

View File

@@ -173,6 +173,8 @@ struct CommandListCoreFamily : CommandListImp {
ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies, bool signalInOrderCompletion) override;
void appendWaitOnInOrderDependency(std::shared_ptr<InOrderExecInfo> &inOrderExecInfo, uint64_t waitValue, uint32_t offset, bool relaxedOrderingAllowed, bool implicitDependency);
void appendSignalInOrderDependencyCounter();
void handleInOrderDependencyCounter(Event *signalEvent);
ze_result_t appendWriteGlobalTimestamp(uint64_t *dstptr, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override;
ze_result_t appendMemoryCopyFromContext(void *dstptr, ze_context_handle_t hContextSrc, const void *srcptr,
@@ -181,7 +183,7 @@ struct CommandListCoreFamily : CommandListImp {
void appendMultiPartitionPrologue(uint32_t partitionDataSize) override;
void appendMultiPartitionEpilogue() override;
void appendEventForProfilingAllWalkers(Event *event, bool beforeWalker, bool singlePacketEvent);
ze_result_t addEventsToCmdList(uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingAllowed, bool trackDependencies);
ze_result_t addEventsToCmdList(uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingAllowed, bool trackDependencies, bool waitForImplicitInOrderDependency);
ze_result_t reserveSpace(size_t size, void **ptr) override;
ze_result_t reset() override;
@@ -331,7 +333,6 @@ struct CommandListCoreFamily : CommandListImp {
virtual void setupFlushMethod(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) {}
bool canSkipInOrderEventWait(const Event &event) const;
void handleInOrderImplicitDependencies(bool relaxedOrderingAllowed);
virtual void handleInOrderDependencyCounter(Event *signalEvent);
bool isQwordInOrderCounter() const { return GfxFamily::isQwordInOrderCounter; }
bool isInOrderNonWalkerSignalingRequired(const Event *event) const;
bool hasInOrderDependencies() const;

View File

@@ -345,7 +345,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(ze_kernel_h
callId = neoDevice->getRootDeviceEnvironment().tagsManager->currentCallCount;
}
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, true);
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, true, true);
if (ret) {
return ret;
}
@@ -384,7 +384,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchCooperativeKernel(
uint32_t numWaitEvents,
ze_event_handle_t *waitEventHandles, bool relaxedOrderingDispatch) {
ze_result_t ret = addEventsToCmdList(numWaitEvents, waitEventHandles, relaxedOrderingDispatch, true);
ze_result_t ret = addEventsToCmdList(numWaitEvents, waitEventHandles, relaxedOrderingDispatch, true, true);
if (ret) {
return ret;
}
@@ -415,7 +415,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(ze_
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, true);
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, true, true);
if (ret) {
return ret;
}
@@ -453,7 +453,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchMultipleKernelsInd
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, true);
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, true, true);
if (ret) {
return ret;
}
@@ -552,7 +552,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryRangesBarrier(uint
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) {
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, false, true);
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, false, true, true);
if (ret) {
return ret;
}
@@ -1215,7 +1215,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(Ali
blitProperties.srcSize = srcSize;
blitProperties.dstSize = dstSize;
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, false);
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, false, true);
if (ret) {
return ret;
}
@@ -1388,7 +1388,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
kernelCounter += rightSize > 0 ? 1 : 0;
}
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, false);
bool waitForImplicitInOrderDependency = !isCopyOnly() || inOrderCopyOnlySignalingAllowed;
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, false, waitForImplicitInOrderDependency);
if (ret) {
return ret;
@@ -1810,7 +1812,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
return status;
}
ze_result_t res = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, false);
ze_result_t res = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, false, true);
if (res) {
return res;
}
@@ -2053,7 +2055,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBlitFill(void *ptr,
if (this->maxFillPaternSizeForCopyEngine < patternSize) {
return ZE_RESULT_ERROR_INVALID_SIZE;
} else {
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, false);
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, false, true);
if (ret) {
return ret;
}
@@ -2261,10 +2263,14 @@ void CommandListCoreFamily<gfxCoreFamily>::handleInOrderImplicitDependencies(boo
}
template <GFXCORE_FAMILY gfxCoreFamily>
inline ze_result_t CommandListCoreFamily<gfxCoreFamily>::addEventsToCmdList(uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingAllowed, bool trackDependencies) {
handleInOrderImplicitDependencies(relaxedOrderingAllowed);
inline ze_result_t CommandListCoreFamily<gfxCoreFamily>::addEventsToCmdList(uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingAllowed, bool trackDependencies, bool waitForImplicitInOrderDependency) {
bool inOrderDependencies = false;
if (waitForImplicitInOrderDependency) {
handleInOrderImplicitDependencies(relaxedOrderingAllowed);
inOrderDependencies = hasInOrderDependencies();
}
if (relaxedOrderingAllowed && numWaitEvents > 0 && !hasInOrderDependencies()) {
if (relaxedOrderingAllowed && numWaitEvents > 0 && !inOrderDependencies) {
NEO::RelaxedOrderingHelper::encodeRegistersBeforeDependencyCheckers<GfxFamily>(*commandContainer.getCommandStream());
}
@@ -2567,7 +2573,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(
uint64_t *dstptr, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) {
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, false, true);
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, false, true, true);
if (ret != ZE_RESULT_SUCCESS) {
return ret;
}
@@ -3084,7 +3090,7 @@ void CommandListCoreFamily<gfxCoreFamily>::programStateBaseAddress(NEO::CommandC
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, true);
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, true, true);
if (ret) {
return ret;
}

View File

@@ -262,7 +262,7 @@ void CommandListImp::addToMappedEventList(Event *event) {
}
void CommandListImp::incRegularCmdListSubmissionCounter() {
if (isInOrderExecutionEnabled()) {
if (isInOrderExecutionEnabled() && inOrderExecInfo->isRegularCmdList) {
inOrderExecInfo->regularCmdListSubmissionCounter++;
}
}

View File

@@ -76,7 +76,7 @@ struct BcsSplit {
auto markerEventIndex = this->events.obtainForSplit(Context::fromHandle(cmdList->getCmdListContext()), MemoryConstants::pageSize64k / sizeof(typename CommandListCoreFamilyImmediate<gfxCoreFamily>::GfxFamily::TimestampPacketType));
auto barrierRequired = cmdList->isBarrierRequired();
auto barrierRequired = !cmdList->isInOrderExecutionEnabled() && cmdList->isBarrierRequired();
if (barrierRequired) {
cmdList->appendSignalEvent(this->events.barrier[markerEventIndex]->toHandle());
}
@@ -86,17 +86,20 @@ struct BcsSplit {
auto &cmdQsForSplit = this->getCmdQsForSplit(direction);
auto signalEvent = Event::fromHandle(hSignalEvent);
auto totalSize = size;
auto engineCount = cmdQsForSplit.size();
for (size_t i = 0; i < cmdQsForSplit.size(); i++) {
if (barrierRequired) {
auto barrierEventHandle = this->events.barrier[markerEventIndex]->toHandle();
cmdList->addEventsToCmdList(1u, &barrierEventHandle, hasRelaxedOrderingDependencies, false);
cmdList->addEventsToCmdList(1u, &barrierEventHandle, hasRelaxedOrderingDependencies, false, true);
}
cmdList->addEventsToCmdList(numWaitEvents, phWaitEvents, hasRelaxedOrderingDependencies, false);
if (hSignalEvent && i == 0u) {
cmdList->appendEventForProfilingAllWalkers(Event::fromHandle(hSignalEvent), true, true);
cmdList->addEventsToCmdList(numWaitEvents, phWaitEvents, hasRelaxedOrderingDependencies, false, true);
if (signalEvent && i == 0u) {
cmdList->appendEventForProfilingAllWalkers(signalEvent, true, true);
}
auto localSize = totalSize / engineCount;
@@ -117,19 +120,20 @@ struct BcsSplit {
totalSize -= localSize;
engineCount--;
if (hSignalEvent) {
Event::fromHandle(hSignalEvent)->appendAdditionalCsr(static_cast<CommandQueueImp *>(cmdQsForSplit[i])->getCsr());
if (signalEvent) {
signalEvent->appendAdditionalCsr(static_cast<CommandQueueImp *>(cmdQsForSplit[i])->getCsr());
}
}
cmdList->addEventsToCmdList(static_cast<uint32_t>(cmdQsForSplit.size()), eventHandles.data(), hasRelaxedOrderingDependencies, false);
if (hSignalEvent) {
cmdList->appendEventForProfilingAllWalkers(Event::fromHandle(hSignalEvent), false, true);
cmdList->addEventsToCmdList(static_cast<uint32_t>(cmdQsForSplit.size()), eventHandles.data(), hasRelaxedOrderingDependencies, false, true);
if (signalEvent) {
cmdList->appendEventForProfilingAllWalkers(signalEvent, false, true);
}
cmdList->appendEventForProfilingAllWalkers(this->events.marker[markerEventIndex], false, true);
if (cmdList->isInOrderExecutionEnabled()) {
cmdList->appendSignalInOrderDependencyCounter();
cmdList->handleInOrderDependencyCounter(signalEvent);
}
return result;

View File

@@ -850,7 +850,7 @@ HWTEST2_F(CommandListCreate, givenImmediateCommandListAndAlreadyCompletedEventWh
ze_event_handle_t events[] = {&event, &event2};
event.isCompleted = Event::State::STATE_SIGNALED;
static_cast<CommandListCoreFamily<gfxCoreFamily> *>(commandList.get())->addEventsToCmdList(2, events, false, false);
static_cast<CommandListCoreFamily<gfxCoreFamily> *>(commandList.get())->addEventsToCmdList(2, events, false, false, true);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));

View File

@@ -1450,7 +1450,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenAddingRelaxedOrderingEventsTh
auto offset = cmdStream->getUsed();
immCmdList->addEventsToCmdList(0, nullptr, true, true);
immCmdList->addEventsToCmdList(0, nullptr, true, true, true);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
@@ -3576,7 +3576,7 @@ struct BcsSplitInOrderCmdListTests : public InOrderCmdListTests {
bool verifySplit(uint64_t expectedTaskCount) {
auto &bcsSplit = static_cast<DeviceImp *>(device)->bcsSplit;
for (uint32_t i = 0; i < 4; i++) {
for (uint32_t i = 0; i < numLinkCopyEngines; i++) {
if (static_cast<CommandQueueImp *>(bcsSplit.cmdQs[0])->getTaskCount() != expectedTaskCount) {
return false;
}
@@ -3600,9 +3600,118 @@ struct BcsSplitInOrderCmdListTests : public InOrderCmdListTests {
return cmdList;
}
template <typename FamilyType, GFXCORE_FAMILY gfxCoreFamily>
void verifySplitCmds(LinearStream &cmdStream, size_t streamOffset, L0::Device *device, uint64_t submissionId, WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>> &immCmdList,
uint64_t externalDependencyGpuVa);
std::unique_ptr<VariableBackup<HardwareInfo>> hwInfoBackup;
const uint32_t numLinkCopyEngines = 4;
};
template <typename FamilyType, GFXCORE_FAMILY gfxCoreFamily>
void BcsSplitInOrderCmdListTests::verifySplitCmds(LinearStream &cmdStream, size_t streamOffset, L0::Device *device, uint64_t submissionId,
WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>> &immCmdList, uint64_t externalDependencyGpuVa) {
using XY_COPY_BLT = typename std::remove_const<decltype(FamilyType::cmdInitXyCopyBlt)>::type;
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
auto &bcsSplit = static_cast<DeviceImp *>(device)->bcsSplit;
auto counterGpuAddress = immCmdList.inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress();
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, ptrOffset(cmdStream.getCpuBase(), streamOffset), (cmdStream.getUsed() - streamOffset)));
auto itor = cmdList.begin();
for (uint32_t i = 0; i < numLinkCopyEngines; i++) {
auto beginItor = itor;
auto signalSubCopyEventGpuVa = bcsSplit.events.subcopy[i + (submissionId * numLinkCopyEngines)]->getCompletionFieldGpuAddress(device);
size_t numExpectedSemaphores = 0;
if (submissionId > 0) {
numExpectedSemaphores++;
itor = find<MI_SEMAPHORE_WAIT *>(itor, cmdList.end());
ASSERT_NE(cmdList.end(), itor);
auto implicitSemaphore = genCmdCast<MI_SEMAPHORE_WAIT *>(*itor);
ASSERT_NE(nullptr, implicitSemaphore);
EXPECT_EQ(counterGpuAddress, implicitSemaphore->getSemaphoreGraphicsAddress());
EXPECT_EQ(submissionId, implicitSemaphore->getSemaphoreDataDword());
itor++;
}
if (externalDependencyGpuVa > 0) {
numExpectedSemaphores++;
itor = find<MI_SEMAPHORE_WAIT *>(itor, cmdList.end());
ASSERT_NE(cmdList.end(), itor);
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*itor);
ASSERT_NE(nullptr, semaphoreCmd);
EXPECT_EQ(externalDependencyGpuVa, semaphoreCmd->getSemaphoreGraphicsAddress());
}
itor = find<XY_COPY_BLT *>(itor, cmdList.end());
ASSERT_NE(cmdList.end(), itor);
ASSERT_NE(nullptr, genCmdCast<XY_COPY_BLT *>(*itor));
auto flushDwItor = find<MI_FLUSH_DW *>(++itor, cmdList.end());
ASSERT_NE(cmdList.end(), flushDwItor);
auto signalSubCopyEvent = genCmdCast<MI_FLUSH_DW *>(*flushDwItor);
ASSERT_NE(nullptr, signalSubCopyEvent);
while (signalSubCopyEvent->getDestinationAddress() != signalSubCopyEventGpuVa) {
flushDwItor = find<MI_FLUSH_DW *>(++flushDwItor, cmdList.end());
ASSERT_NE(cmdList.end(), flushDwItor);
signalSubCopyEvent = genCmdCast<MI_FLUSH_DW *>(*flushDwItor);
ASSERT_NE(nullptr, signalSubCopyEvent);
}
itor = ++flushDwItor;
auto semaphoreCmds = findAll<MI_SEMAPHORE_WAIT *>(beginItor, itor);
EXPECT_EQ(numExpectedSemaphores, semaphoreCmds.size());
}
auto semaphoreItor = find<MI_SEMAPHORE_WAIT *>(itor, cmdList.end());
if (submissionId > 0) {
auto implicitSemaphore = genCmdCast<MI_SEMAPHORE_WAIT *>(*semaphoreItor);
ASSERT_NE(nullptr, implicitSemaphore);
EXPECT_EQ(counterGpuAddress, implicitSemaphore->getSemaphoreGraphicsAddress());
EXPECT_EQ(submissionId, implicitSemaphore->getSemaphoreDataDword());
++semaphoreItor;
}
for (uint32_t i = 0; i < numLinkCopyEngines; i++) {
auto subCopyEventSemaphore = genCmdCast<MI_SEMAPHORE_WAIT *>(*semaphoreItor);
ASSERT_NE(nullptr, subCopyEventSemaphore);
EXPECT_EQ(bcsSplit.events.subcopy[i + (submissionId * numLinkCopyEngines)]->getCompletionFieldGpuAddress(device), subCopyEventSemaphore->getSemaphoreGraphicsAddress());
itor = ++semaphoreItor;
}
ASSERT_NE(nullptr, genCmdCast<MI_FLUSH_DW *>(*itor)); // marker event
auto implicitCounterSdi = genCmdCast<MI_STORE_DATA_IMM *>(*(++itor));
ASSERT_NE(nullptr, implicitCounterSdi);
EXPECT_EQ(counterGpuAddress, implicitCounterSdi->getAddress());
EXPECT_EQ(submissionId + 1, implicitCounterSdi->getDataDword0());
EXPECT_EQ(submissionId + 1, immCmdList.inOrderExecInfo->inOrderDependencyCounter);
auto sdiCmds = findAll<MI_STORE_DATA_IMM *>(++itor, cmdList.end());
EXPECT_EQ(0u, sdiCmds.size());
}
HWTEST2_F(BcsSplitInOrderCmdListTests, givenBcsSplitEnabledWhenDispatchingCopyThenHandleInOrderSignaling, IsAtLeastXeHpcCore) {
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
@@ -3641,6 +3750,69 @@ HWTEST2_F(BcsSplitInOrderCmdListTests, givenBcsSplitEnabledWhenDispatchingCopyTh
EXPECT_EQ(0u, sdiCmd->getDataDword1());
}
HWTEST2_F(BcsSplitInOrderCmdListTests, givenBcsSplitEnabledWhenAppendingMemoryCopyAfterBarrierWithoutImplicitDependenciesThenHandleCorrectInOrderSignaling, IsAtLeastXeHpcCore) {
auto immCmdList = createBcsSplitImmCmdList<gfxCoreFamily>();
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
uint32_t copyData = 0;
constexpr size_t copySize = 8 * MemoryConstants::megaByte;
*immCmdList->csr->getBarrierCountTagAddress() = 0u;
immCmdList->csr->getNextBarrierCount();
size_t offset = cmdStream->getUsed();
immCmdList->appendMemoryCopy(&copyData, &copyData, copySize, nullptr, 0, nullptr, false, false);
// no implicit dependencies
verifySplitCmds<FamilyType, gfxCoreFamily>(*cmdStream, offset, device, 0, *immCmdList, 0);
}
HWTEST2_F(BcsSplitInOrderCmdListTests, givenBcsSplitEnabledWhenAppendingMemoryCopyAfterBarrierWithImplicitDependenciesThenHandleCorrectInOrderSignaling, IsAtLeastXeHpcCore) {
auto immCmdList = createBcsSplitImmCmdList<gfxCoreFamily>();
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
uint32_t copyData = 0;
constexpr size_t copySize = 8 * MemoryConstants::megaByte;
*immCmdList->csr->getBarrierCountTagAddress() = 0u;
immCmdList->csr->getNextBarrierCount();
immCmdList->appendMemoryCopy(&copyData, &copyData, copySize, nullptr, 0, nullptr, false, false);
size_t offset = cmdStream->getUsed();
*immCmdList->csr->getBarrierCountTagAddress() = 0u;
immCmdList->csr->getNextBarrierCount();
immCmdList->appendMemoryCopy(&copyData, &copyData, copySize, nullptr, 0, nullptr, false, false);
// implicit dependencies
verifySplitCmds<FamilyType, gfxCoreFamily>(*cmdStream, offset, device, 1, *immCmdList, 0);
}
HWTEST2_F(BcsSplitInOrderCmdListTests, givenBcsSplitEnabledWhenAppendingMemoryCopyWithEventDependencyThenRequiredSemaphores, IsAtLeastXeHpcCore) {
auto immCmdList = createBcsSplitImmCmdList<gfxCoreFamily>();
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
uint32_t copyData = 0;
constexpr size_t copySize = 8 * MemoryConstants::megaByte;
auto eventPool = createEvents<FamilyType>(1, false);
events[0]->inOrderExecEvent = false;
auto eventHandle = events[0]->toHandle();
immCmdList->appendMemoryCopy(&copyData, &copyData, copySize, nullptr, 0, nullptr, false, false);
size_t offset = cmdStream->getUsed();
immCmdList->appendMemoryCopy(&copyData, &copyData, copySize, nullptr, 1, &eventHandle, false, false);
verifySplitCmds<FamilyType, gfxCoreFamily>(*cmdStream, offset, device, 1, *immCmdList, events[0]->getCompletionFieldGpuAddress(device));
}
HWTEST2_F(BcsSplitInOrderCmdListTests, givenBcsSplitEnabledWhenDispatchingCopyRegionThenHandleInOrderSignaling, IsAtLeastXeHpcCore) {
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;

View File

@@ -88,7 +88,7 @@ HWTEST2_F(CommandListAppendWaitOnEvent, givenImmediateCmdListWithDirectSubmissio
ultCsr->directSubmission.reset(directSubmission);
ze_event_handle_t hEventHandle = event->toHandle();
auto result = static_cast<CommandListCoreFamilyImmediate<gfxCoreFamily> *>(immCommandList.get())->addEventsToCmdList(1, &hEventHandle, true, true);
auto result = static_cast<CommandListCoreFamilyImmediate<gfxCoreFamily> *>(immCommandList.get())->addEventsToCmdList(1, &hEventHandle, true, true, true);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto usedSpaceAfter = immCommandList->getCmdContainer().getCommandStream()->getUsed();