mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-05 09:09:04 +08:00
feature: bcs split handling for in-order CmdLists
Related-To: NEO-7966 Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
43eb4ea941
commit
da8904454b
@@ -173,6 +173,8 @@ struct CommandListCoreFamily : CommandListImp {
|
||||
ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies, bool signalInOrderCompletion) override;
|
||||
void appendWaitOnInOrderDependency(std::shared_ptr<InOrderExecInfo> &inOrderExecInfo, uint64_t waitValue, uint32_t offset, bool relaxedOrderingAllowed, bool implicitDependency);
|
||||
void appendSignalInOrderDependencyCounter();
|
||||
void handleInOrderDependencyCounter(Event *signalEvent);
|
||||
|
||||
ze_result_t appendWriteGlobalTimestamp(uint64_t *dstptr, ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override;
|
||||
ze_result_t appendMemoryCopyFromContext(void *dstptr, ze_context_handle_t hContextSrc, const void *srcptr,
|
||||
@@ -181,7 +183,7 @@ struct CommandListCoreFamily : CommandListImp {
|
||||
void appendMultiPartitionPrologue(uint32_t partitionDataSize) override;
|
||||
void appendMultiPartitionEpilogue() override;
|
||||
void appendEventForProfilingAllWalkers(Event *event, bool beforeWalker, bool singlePacketEvent);
|
||||
ze_result_t addEventsToCmdList(uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingAllowed, bool trackDependencies);
|
||||
ze_result_t addEventsToCmdList(uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingAllowed, bool trackDependencies, bool waitForImplicitInOrderDependency);
|
||||
|
||||
ze_result_t reserveSpace(size_t size, void **ptr) override;
|
||||
ze_result_t reset() override;
|
||||
@@ -331,7 +333,6 @@ struct CommandListCoreFamily : CommandListImp {
|
||||
virtual void setupFlushMethod(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) {}
|
||||
bool canSkipInOrderEventWait(const Event &event) const;
|
||||
void handleInOrderImplicitDependencies(bool relaxedOrderingAllowed);
|
||||
virtual void handleInOrderDependencyCounter(Event *signalEvent);
|
||||
bool isQwordInOrderCounter() const { return GfxFamily::isQwordInOrderCounter; }
|
||||
bool isInOrderNonWalkerSignalingRequired(const Event *event) const;
|
||||
bool hasInOrderDependencies() const;
|
||||
|
||||
@@ -345,7 +345,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(ze_kernel_h
|
||||
callId = neoDevice->getRootDeviceEnvironment().tagsManager->currentCallCount;
|
||||
}
|
||||
|
||||
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, true);
|
||||
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, true, true);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
@@ -384,7 +384,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchCooperativeKernel(
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *waitEventHandles, bool relaxedOrderingDispatch) {
|
||||
|
||||
ze_result_t ret = addEventsToCmdList(numWaitEvents, waitEventHandles, relaxedOrderingDispatch, true);
|
||||
ze_result_t ret = addEventsToCmdList(numWaitEvents, waitEventHandles, relaxedOrderingDispatch, true, true);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
@@ -415,7 +415,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(ze_
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
|
||||
|
||||
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, true);
|
||||
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, true, true);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
@@ -453,7 +453,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchMultipleKernelsInd
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
|
||||
|
||||
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, true);
|
||||
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, true, true);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
@@ -552,7 +552,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryRangesBarrier(uint
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents) {
|
||||
|
||||
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, false, true);
|
||||
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, false, true, true);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
@@ -1215,7 +1215,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlitRegion(Ali
|
||||
blitProperties.srcSize = srcSize;
|
||||
blitProperties.dstSize = dstSize;
|
||||
|
||||
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, false);
|
||||
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, false, true);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
@@ -1388,7 +1388,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
|
||||
kernelCounter += rightSize > 0 ? 1 : 0;
|
||||
}
|
||||
|
||||
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, false);
|
||||
bool waitForImplicitInOrderDependency = !isCopyOnly() || inOrderCopyOnlySignalingAllowed;
|
||||
|
||||
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, false, waitForImplicitInOrderDependency);
|
||||
|
||||
if (ret) {
|
||||
return ret;
|
||||
@@ -1810,7 +1812,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
||||
return status;
|
||||
}
|
||||
|
||||
ze_result_t res = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, false);
|
||||
ze_result_t res = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, false, true);
|
||||
if (res) {
|
||||
return res;
|
||||
}
|
||||
@@ -2053,7 +2055,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBlitFill(void *ptr,
|
||||
if (this->maxFillPaternSizeForCopyEngine < patternSize) {
|
||||
return ZE_RESULT_ERROR_INVALID_SIZE;
|
||||
} else {
|
||||
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, false);
|
||||
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, false, true);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
@@ -2261,10 +2263,14 @@ void CommandListCoreFamily<gfxCoreFamily>::handleInOrderImplicitDependencies(boo
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
inline ze_result_t CommandListCoreFamily<gfxCoreFamily>::addEventsToCmdList(uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingAllowed, bool trackDependencies) {
|
||||
handleInOrderImplicitDependencies(relaxedOrderingAllowed);
|
||||
inline ze_result_t CommandListCoreFamily<gfxCoreFamily>::addEventsToCmdList(uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingAllowed, bool trackDependencies, bool waitForImplicitInOrderDependency) {
|
||||
bool inOrderDependencies = false;
|
||||
if (waitForImplicitInOrderDependency) {
|
||||
handleInOrderImplicitDependencies(relaxedOrderingAllowed);
|
||||
inOrderDependencies = hasInOrderDependencies();
|
||||
}
|
||||
|
||||
if (relaxedOrderingAllowed && numWaitEvents > 0 && !hasInOrderDependencies()) {
|
||||
if (relaxedOrderingAllowed && numWaitEvents > 0 && !inOrderDependencies) {
|
||||
NEO::RelaxedOrderingHelper::encodeRegistersBeforeDependencyCheckers<GfxFamily>(*commandContainer.getCommandStream());
|
||||
}
|
||||
|
||||
@@ -2567,7 +2573,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(
|
||||
uint64_t *dstptr, ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) {
|
||||
|
||||
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, false, true);
|
||||
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, false, true, true);
|
||||
if (ret != ZE_RESULT_SUCCESS) {
|
||||
return ret;
|
||||
}
|
||||
@@ -3084,7 +3090,7 @@ void CommandListCoreFamily<gfxCoreFamily>::programStateBaseAddress(NEO::CommandC
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
|
||||
|
||||
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, true);
|
||||
ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, relaxedOrderingDispatch, true, true);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -262,7 +262,7 @@ void CommandListImp::addToMappedEventList(Event *event) {
|
||||
}
|
||||
|
||||
void CommandListImp::incRegularCmdListSubmissionCounter() {
|
||||
if (isInOrderExecutionEnabled()) {
|
||||
if (isInOrderExecutionEnabled() && inOrderExecInfo->isRegularCmdList) {
|
||||
inOrderExecInfo->regularCmdListSubmissionCounter++;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -76,7 +76,7 @@ struct BcsSplit {
|
||||
|
||||
auto markerEventIndex = this->events.obtainForSplit(Context::fromHandle(cmdList->getCmdListContext()), MemoryConstants::pageSize64k / sizeof(typename CommandListCoreFamilyImmediate<gfxCoreFamily>::GfxFamily::TimestampPacketType));
|
||||
|
||||
auto barrierRequired = cmdList->isBarrierRequired();
|
||||
auto barrierRequired = !cmdList->isInOrderExecutionEnabled() && cmdList->isBarrierRequired();
|
||||
if (barrierRequired) {
|
||||
cmdList->appendSignalEvent(this->events.barrier[markerEventIndex]->toHandle());
|
||||
}
|
||||
@@ -86,17 +86,20 @@ struct BcsSplit {
|
||||
|
||||
auto &cmdQsForSplit = this->getCmdQsForSplit(direction);
|
||||
|
||||
auto signalEvent = Event::fromHandle(hSignalEvent);
|
||||
|
||||
auto totalSize = size;
|
||||
auto engineCount = cmdQsForSplit.size();
|
||||
for (size_t i = 0; i < cmdQsForSplit.size(); i++) {
|
||||
if (barrierRequired) {
|
||||
auto barrierEventHandle = this->events.barrier[markerEventIndex]->toHandle();
|
||||
cmdList->addEventsToCmdList(1u, &barrierEventHandle, hasRelaxedOrderingDependencies, false);
|
||||
cmdList->addEventsToCmdList(1u, &barrierEventHandle, hasRelaxedOrderingDependencies, false, true);
|
||||
}
|
||||
|
||||
cmdList->addEventsToCmdList(numWaitEvents, phWaitEvents, hasRelaxedOrderingDependencies, false);
|
||||
if (hSignalEvent && i == 0u) {
|
||||
cmdList->appendEventForProfilingAllWalkers(Event::fromHandle(hSignalEvent), true, true);
|
||||
cmdList->addEventsToCmdList(numWaitEvents, phWaitEvents, hasRelaxedOrderingDependencies, false, true);
|
||||
|
||||
if (signalEvent && i == 0u) {
|
||||
cmdList->appendEventForProfilingAllWalkers(signalEvent, true, true);
|
||||
}
|
||||
|
||||
auto localSize = totalSize / engineCount;
|
||||
@@ -117,19 +120,20 @@ struct BcsSplit {
|
||||
totalSize -= localSize;
|
||||
engineCount--;
|
||||
|
||||
if (hSignalEvent) {
|
||||
Event::fromHandle(hSignalEvent)->appendAdditionalCsr(static_cast<CommandQueueImp *>(cmdQsForSplit[i])->getCsr());
|
||||
if (signalEvent) {
|
||||
signalEvent->appendAdditionalCsr(static_cast<CommandQueueImp *>(cmdQsForSplit[i])->getCsr());
|
||||
}
|
||||
}
|
||||
|
||||
cmdList->addEventsToCmdList(static_cast<uint32_t>(cmdQsForSplit.size()), eventHandles.data(), hasRelaxedOrderingDependencies, false);
|
||||
if (hSignalEvent) {
|
||||
cmdList->appendEventForProfilingAllWalkers(Event::fromHandle(hSignalEvent), false, true);
|
||||
cmdList->addEventsToCmdList(static_cast<uint32_t>(cmdQsForSplit.size()), eventHandles.data(), hasRelaxedOrderingDependencies, false, true);
|
||||
if (signalEvent) {
|
||||
cmdList->appendEventForProfilingAllWalkers(signalEvent, false, true);
|
||||
}
|
||||
cmdList->appendEventForProfilingAllWalkers(this->events.marker[markerEventIndex], false, true);
|
||||
|
||||
if (cmdList->isInOrderExecutionEnabled()) {
|
||||
cmdList->appendSignalInOrderDependencyCounter();
|
||||
cmdList->handleInOrderDependencyCounter(signalEvent);
|
||||
}
|
||||
|
||||
return result;
|
||||
|
||||
@@ -850,7 +850,7 @@ HWTEST2_F(CommandListCreate, givenImmediateCommandListAndAlreadyCompletedEventWh
|
||||
ze_event_handle_t events[] = {&event, &event2};
|
||||
event.isCompleted = Event::State::STATE_SIGNALED;
|
||||
|
||||
static_cast<CommandListCoreFamily<gfxCoreFamily> *>(commandList.get())->addEventsToCmdList(2, events, false, false);
|
||||
static_cast<CommandListCoreFamily<gfxCoreFamily> *>(commandList.get())->addEventsToCmdList(2, events, false, false, true);
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed()));
|
||||
|
||||
@@ -1450,7 +1450,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenAddingRelaxedOrderingEventsTh
|
||||
|
||||
auto offset = cmdStream->getUsed();
|
||||
|
||||
immCmdList->addEventsToCmdList(0, nullptr, true, true);
|
||||
immCmdList->addEventsToCmdList(0, nullptr, true, true, true);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
@@ -3576,7 +3576,7 @@ struct BcsSplitInOrderCmdListTests : public InOrderCmdListTests {
|
||||
bool verifySplit(uint64_t expectedTaskCount) {
|
||||
auto &bcsSplit = static_cast<DeviceImp *>(device)->bcsSplit;
|
||||
|
||||
for (uint32_t i = 0; i < 4; i++) {
|
||||
for (uint32_t i = 0; i < numLinkCopyEngines; i++) {
|
||||
if (static_cast<CommandQueueImp *>(bcsSplit.cmdQs[0])->getTaskCount() != expectedTaskCount) {
|
||||
return false;
|
||||
}
|
||||
@@ -3600,9 +3600,118 @@ struct BcsSplitInOrderCmdListTests : public InOrderCmdListTests {
|
||||
return cmdList;
|
||||
}
|
||||
|
||||
template <typename FamilyType, GFXCORE_FAMILY gfxCoreFamily>
|
||||
void verifySplitCmds(LinearStream &cmdStream, size_t streamOffset, L0::Device *device, uint64_t submissionId, WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>> &immCmdList,
|
||||
uint64_t externalDependencyGpuVa);
|
||||
|
||||
std::unique_ptr<VariableBackup<HardwareInfo>> hwInfoBackup;
|
||||
const uint32_t numLinkCopyEngines = 4;
|
||||
};
|
||||
|
||||
template <typename FamilyType, GFXCORE_FAMILY gfxCoreFamily>
|
||||
void BcsSplitInOrderCmdListTests::verifySplitCmds(LinearStream &cmdStream, size_t streamOffset, L0::Device *device, uint64_t submissionId,
|
||||
WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>> &immCmdList, uint64_t externalDependencyGpuVa) {
|
||||
using XY_COPY_BLT = typename std::remove_const<decltype(FamilyType::cmdInitXyCopyBlt)>::type;
|
||||
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
|
||||
|
||||
auto &bcsSplit = static_cast<DeviceImp *>(device)->bcsSplit;
|
||||
auto counterGpuAddress = immCmdList.inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress();
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, ptrOffset(cmdStream.getCpuBase(), streamOffset), (cmdStream.getUsed() - streamOffset)));
|
||||
|
||||
auto itor = cmdList.begin();
|
||||
|
||||
for (uint32_t i = 0; i < numLinkCopyEngines; i++) {
|
||||
auto beginItor = itor;
|
||||
|
||||
auto signalSubCopyEventGpuVa = bcsSplit.events.subcopy[i + (submissionId * numLinkCopyEngines)]->getCompletionFieldGpuAddress(device);
|
||||
|
||||
size_t numExpectedSemaphores = 0;
|
||||
|
||||
if (submissionId > 0) {
|
||||
numExpectedSemaphores++;
|
||||
itor = find<MI_SEMAPHORE_WAIT *>(itor, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto implicitSemaphore = genCmdCast<MI_SEMAPHORE_WAIT *>(*itor);
|
||||
ASSERT_NE(nullptr, implicitSemaphore);
|
||||
|
||||
EXPECT_EQ(counterGpuAddress, implicitSemaphore->getSemaphoreGraphicsAddress());
|
||||
EXPECT_EQ(submissionId, implicitSemaphore->getSemaphoreDataDword());
|
||||
itor++;
|
||||
}
|
||||
|
||||
if (externalDependencyGpuVa > 0) {
|
||||
numExpectedSemaphores++;
|
||||
itor = find<MI_SEMAPHORE_WAIT *>(itor, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*itor);
|
||||
ASSERT_NE(nullptr, semaphoreCmd);
|
||||
|
||||
EXPECT_EQ(externalDependencyGpuVa, semaphoreCmd->getSemaphoreGraphicsAddress());
|
||||
}
|
||||
|
||||
itor = find<XY_COPY_BLT *>(itor, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
ASSERT_NE(nullptr, genCmdCast<XY_COPY_BLT *>(*itor));
|
||||
|
||||
auto flushDwItor = find<MI_FLUSH_DW *>(++itor, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), flushDwItor);
|
||||
|
||||
auto signalSubCopyEvent = genCmdCast<MI_FLUSH_DW *>(*flushDwItor);
|
||||
ASSERT_NE(nullptr, signalSubCopyEvent);
|
||||
|
||||
while (signalSubCopyEvent->getDestinationAddress() != signalSubCopyEventGpuVa) {
|
||||
flushDwItor = find<MI_FLUSH_DW *>(++flushDwItor, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), flushDwItor);
|
||||
|
||||
signalSubCopyEvent = genCmdCast<MI_FLUSH_DW *>(*flushDwItor);
|
||||
ASSERT_NE(nullptr, signalSubCopyEvent);
|
||||
}
|
||||
|
||||
itor = ++flushDwItor;
|
||||
|
||||
auto semaphoreCmds = findAll<MI_SEMAPHORE_WAIT *>(beginItor, itor);
|
||||
EXPECT_EQ(numExpectedSemaphores, semaphoreCmds.size());
|
||||
}
|
||||
|
||||
auto semaphoreItor = find<MI_SEMAPHORE_WAIT *>(itor, cmdList.end());
|
||||
|
||||
if (submissionId > 0) {
|
||||
auto implicitSemaphore = genCmdCast<MI_SEMAPHORE_WAIT *>(*semaphoreItor);
|
||||
ASSERT_NE(nullptr, implicitSemaphore);
|
||||
|
||||
EXPECT_EQ(counterGpuAddress, implicitSemaphore->getSemaphoreGraphicsAddress());
|
||||
EXPECT_EQ(submissionId, implicitSemaphore->getSemaphoreDataDword());
|
||||
|
||||
++semaphoreItor;
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < numLinkCopyEngines; i++) {
|
||||
auto subCopyEventSemaphore = genCmdCast<MI_SEMAPHORE_WAIT *>(*semaphoreItor);
|
||||
ASSERT_NE(nullptr, subCopyEventSemaphore);
|
||||
|
||||
EXPECT_EQ(bcsSplit.events.subcopy[i + (submissionId * numLinkCopyEngines)]->getCompletionFieldGpuAddress(device), subCopyEventSemaphore->getSemaphoreGraphicsAddress());
|
||||
|
||||
itor = ++semaphoreItor;
|
||||
}
|
||||
|
||||
ASSERT_NE(nullptr, genCmdCast<MI_FLUSH_DW *>(*itor)); // marker event
|
||||
|
||||
auto implicitCounterSdi = genCmdCast<MI_STORE_DATA_IMM *>(*(++itor));
|
||||
ASSERT_NE(nullptr, implicitCounterSdi);
|
||||
|
||||
EXPECT_EQ(counterGpuAddress, implicitCounterSdi->getAddress());
|
||||
EXPECT_EQ(submissionId + 1, implicitCounterSdi->getDataDword0());
|
||||
|
||||
EXPECT_EQ(submissionId + 1, immCmdList.inOrderExecInfo->inOrderDependencyCounter);
|
||||
|
||||
auto sdiCmds = findAll<MI_STORE_DATA_IMM *>(++itor, cmdList.end());
|
||||
EXPECT_EQ(0u, sdiCmds.size());
|
||||
}
|
||||
|
||||
HWTEST2_F(BcsSplitInOrderCmdListTests, givenBcsSplitEnabledWhenDispatchingCopyThenHandleInOrderSignaling, IsAtLeastXeHpcCore) {
|
||||
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
@@ -3641,6 +3750,69 @@ HWTEST2_F(BcsSplitInOrderCmdListTests, givenBcsSplitEnabledWhenDispatchingCopyTh
|
||||
EXPECT_EQ(0u, sdiCmd->getDataDword1());
|
||||
}
|
||||
|
||||
HWTEST2_F(BcsSplitInOrderCmdListTests, givenBcsSplitEnabledWhenAppendingMemoryCopyAfterBarrierWithoutImplicitDependenciesThenHandleCorrectInOrderSignaling, IsAtLeastXeHpcCore) {
|
||||
auto immCmdList = createBcsSplitImmCmdList<gfxCoreFamily>();
|
||||
|
||||
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
|
||||
|
||||
uint32_t copyData = 0;
|
||||
constexpr size_t copySize = 8 * MemoryConstants::megaByte;
|
||||
|
||||
*immCmdList->csr->getBarrierCountTagAddress() = 0u;
|
||||
immCmdList->csr->getNextBarrierCount();
|
||||
|
||||
size_t offset = cmdStream->getUsed();
|
||||
|
||||
immCmdList->appendMemoryCopy(©Data, ©Data, copySize, nullptr, 0, nullptr, false, false);
|
||||
|
||||
// no implicit dependencies
|
||||
verifySplitCmds<FamilyType, gfxCoreFamily>(*cmdStream, offset, device, 0, *immCmdList, 0);
|
||||
}
|
||||
|
||||
HWTEST2_F(BcsSplitInOrderCmdListTests, givenBcsSplitEnabledWhenAppendingMemoryCopyAfterBarrierWithImplicitDependenciesThenHandleCorrectInOrderSignaling, IsAtLeastXeHpcCore) {
|
||||
auto immCmdList = createBcsSplitImmCmdList<gfxCoreFamily>();
|
||||
|
||||
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
|
||||
|
||||
uint32_t copyData = 0;
|
||||
constexpr size_t copySize = 8 * MemoryConstants::megaByte;
|
||||
|
||||
*immCmdList->csr->getBarrierCountTagAddress() = 0u;
|
||||
immCmdList->csr->getNextBarrierCount();
|
||||
|
||||
immCmdList->appendMemoryCopy(©Data, ©Data, copySize, nullptr, 0, nullptr, false, false);
|
||||
|
||||
size_t offset = cmdStream->getUsed();
|
||||
|
||||
*immCmdList->csr->getBarrierCountTagAddress() = 0u;
|
||||
immCmdList->csr->getNextBarrierCount();
|
||||
immCmdList->appendMemoryCopy(©Data, ©Data, copySize, nullptr, 0, nullptr, false, false);
|
||||
|
||||
// implicit dependencies
|
||||
verifySplitCmds<FamilyType, gfxCoreFamily>(*cmdStream, offset, device, 1, *immCmdList, 0);
|
||||
}
|
||||
|
||||
HWTEST2_F(BcsSplitInOrderCmdListTests, givenBcsSplitEnabledWhenAppendingMemoryCopyWithEventDependencyThenRequiredSemaphores, IsAtLeastXeHpcCore) {
|
||||
auto immCmdList = createBcsSplitImmCmdList<gfxCoreFamily>();
|
||||
|
||||
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
|
||||
|
||||
uint32_t copyData = 0;
|
||||
constexpr size_t copySize = 8 * MemoryConstants::megaByte;
|
||||
|
||||
auto eventPool = createEvents<FamilyType>(1, false);
|
||||
events[0]->inOrderExecEvent = false;
|
||||
auto eventHandle = events[0]->toHandle();
|
||||
|
||||
immCmdList->appendMemoryCopy(©Data, ©Data, copySize, nullptr, 0, nullptr, false, false);
|
||||
|
||||
size_t offset = cmdStream->getUsed();
|
||||
|
||||
immCmdList->appendMemoryCopy(©Data, ©Data, copySize, nullptr, 1, &eventHandle, false, false);
|
||||
|
||||
verifySplitCmds<FamilyType, gfxCoreFamily>(*cmdStream, offset, device, 1, *immCmdList, events[0]->getCompletionFieldGpuAddress(device));
|
||||
}
|
||||
|
||||
HWTEST2_F(BcsSplitInOrderCmdListTests, givenBcsSplitEnabledWhenDispatchingCopyRegionThenHandleInOrderSignaling, IsAtLeastXeHpcCore) {
|
||||
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
|
||||
@@ -88,7 +88,7 @@ HWTEST2_F(CommandListAppendWaitOnEvent, givenImmediateCmdListWithDirectSubmissio
|
||||
ultCsr->directSubmission.reset(directSubmission);
|
||||
|
||||
ze_event_handle_t hEventHandle = event->toHandle();
|
||||
auto result = static_cast<CommandListCoreFamilyImmediate<gfxCoreFamily> *>(immCommandList.get())->addEventsToCmdList(1, &hEventHandle, true, true);
|
||||
auto result = static_cast<CommandListCoreFamilyImmediate<gfxCoreFamily> *>(immCommandList.get())->addEventsToCmdList(1, &hEventHandle, true, true, true);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto usedSpaceAfter = immCommandList->getCmdContainer().getCommandStream()->getUsed();
|
||||
|
||||
Reference in New Issue
Block a user