feature: enable waiting for in-order events from regular CmdList

This is prework. Functional only on immediate CmdLists

Related-To: NEO-8145

Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2023-09-29 13:47:43 +00:00
committed by Compute-Runtime-Automation
parent dc32e2b7da
commit 891cc2d09b
13 changed files with 165 additions and 79 deletions

View File

@@ -334,8 +334,9 @@ struct CommandListCoreFamily : CommandListImp {
virtual void handleInOrderDependencyCounter(Event *signalEvent);
bool isQwordInOrderCounter() const { return GfxFamily::isQwordInOrderCounter; }
bool isInOrderNonWalkerSignalingRequired(const Event *event) const;
bool hasInOrderDependencies() const;
void addCmdForPatching(void *cmd, uint64_t counterValue, InOrderPatchCommandTypes::CmdType cmdType);
void addCmdForPatching(void *cmd, uint64_t counterValue, InOrderPatchCommandHelpers::PatchCmdType patchCmdType);
InOrderPatchCommandsContainer<GfxFamily> inOrderPatchCmds;
};

View File

@@ -139,10 +139,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::reset() {
mappedTsEventList.clear();
inOrderDependencyCounter = 0;
inOrderAllocationOffset = 0;
if (inOrderExecInfo) {
inOrderExecInfo->inOrderDependencyCounter = 0;
auto &inOrderDependencyCounterAllocation = inOrderExecInfo->inOrderDependencyCounterAllocation;
memset(inOrderDependencyCounterAllocation.getUnderlyingBuffer(), 0, inOrderDependencyCounterAllocation.getUnderlyingBufferSize());
}
@@ -152,10 +153,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::reset() {
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::handleInOrderDependencyCounter(Event *signalEvent) {
if (!isQwordInOrderCounter() && ((inOrderDependencyCounter + 1) == std::numeric_limits<uint32_t>::max())) {
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(&inOrderExecInfo->inOrderDependencyCounterAllocation, inOrderDependencyCounter + 1, inOrderAllocationOffset, false, true);
if (!isQwordInOrderCounter() && ((inOrderExecInfo->inOrderDependencyCounter + 1) == std::numeric_limits<uint32_t>::max())) {
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(&inOrderExecInfo->inOrderDependencyCounterAllocation, inOrderExecInfo->inOrderDependencyCounter + 1, inOrderAllocationOffset, false, true);
inOrderDependencyCounter = 0;
inOrderExecInfo->inOrderDependencyCounter = 0;
// multitile immediate writes are uint64_t aligned
uint32_t offset = this->partitionCount * static_cast<uint32_t>(sizeof(uint64_t));
@@ -167,12 +168,12 @@ void CommandListCoreFamily<gfxCoreFamily>::handleInOrderDependencyCounter(Event
CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(); // write 1 on new offset
}
inOrderDependencyCounter++;
inOrderExecInfo->inOrderDependencyCounter++;
this->commandContainer.addToResidencyContainer(&inOrderExecInfo->inOrderDependencyCounterAllocation);
if (signalEvent && signalEvent->isInOrderExecEvent()) {
signalEvent->updateInOrderExecState(inOrderExecInfo, this->inOrderDependencyCounter, this->inOrderAllocationOffset);
signalEvent->updateInOrderExecState(inOrderExecInfo, inOrderExecInfo->inOrderDependencyCounter, this->inOrderAllocationOffset);
}
}
@@ -2224,24 +2225,20 @@ inline uint32_t CommandListCoreFamily<gfxCoreFamily>::getRegionOffsetForAppendMe
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::handleInOrderImplicitDependencies(bool relaxedOrderingAllowed) {
auto hasInOrderDependencies = (inOrderDependencyCounter > 0);
if (hasInOrderDependencies) {
if (hasInOrderDependencies()) {
if (relaxedOrderingAllowed) {
NEO::RelaxedOrderingHelper::encodeRegistersBeforeDependencyCheckers<GfxFamily>(*commandContainer.getCommandStream());
}
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(&inOrderExecInfo->inOrderDependencyCounterAllocation, this->inOrderDependencyCounter, this->inOrderAllocationOffset, relaxedOrderingAllowed, true);
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(&inOrderExecInfo->inOrderDependencyCounterAllocation, inOrderExecInfo->inOrderDependencyCounter, this->inOrderAllocationOffset, relaxedOrderingAllowed, true);
}
}
template <GFXCORE_FAMILY gfxCoreFamily>
inline ze_result_t CommandListCoreFamily<gfxCoreFamily>::addEventsToCmdList(uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingAllowed, bool trackDependencies) {
auto hasInOrderDependencies = (inOrderDependencyCounter > 0);
handleInOrderImplicitDependencies(relaxedOrderingAllowed);
if (relaxedOrderingAllowed && numWaitEvents > 0 && !hasInOrderDependencies) {
if (relaxedOrderingAllowed && numWaitEvents > 0 && !hasInOrderDependencies()) {
NEO::RelaxedOrderingHelper::encodeRegistersBeforeDependencyCheckers<GfxFamily>(*commandContainer.getCommandStream());
}
@@ -2320,7 +2317,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(NEO::Gr
false, true, isQwordInOrderCounter(), false);
if (implicitDependency) {
addCmdForPatching(semaphoreCommand, waitValue, InOrderPatchCommandTypes::CmdType::Semaphore);
addCmdForPatching(semaphoreCommand, waitValue, InOrderPatchCommandHelpers::PatchCmdType::Semaphore);
}
}
@@ -2426,7 +2423,7 @@ template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter() {
using MI_STORE_DATA_IMM = typename GfxFamily::MI_STORE_DATA_IMM;
uint64_t signalValue = this->inOrderDependencyCounter + 1;
uint64_t signalValue = inOrderExecInfo->inOrderDependencyCounter + 1;
uint64_t gpuVa = inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress() + this->inOrderAllocationOffset;
@@ -2435,7 +2432,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(
NEO::EncodeStoreMemory<GfxFamily>::programStoreDataImm(miStoreCmd, gpuVa, getLowPart(signalValue), getHighPart(signalValue),
isQwordInOrderCounter(), (this->partitionCount > 1));
addCmdForPatching(miStoreCmd, signalValue, InOrderPatchCommandTypes::CmdType::Sdi);
addCmdForPatching(miStoreCmd, signalValue, InOrderPatchCommandHelpers::PatchCmdType::Sdi);
if (NEO::EncodeUserInterruptHelper::isOperationAllowed(NEO::EncodeUserInterruptHelper::onSignalingFenceMask)) {
NEO::EnodeUserInterrupt<GfxFamily>::encode(*commandContainer.getCommandStream());
@@ -3458,20 +3455,25 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnSingleEvent(Event *event,
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::addCmdForPatching(void *cmd, uint64_t counterValue, InOrderPatchCommandTypes::CmdType cmdType) {
void CommandListCoreFamily<gfxCoreFamily>::addCmdForPatching(void *cmd, uint64_t counterValue, InOrderPatchCommandHelpers::PatchCmdType patchCmdType) {
if ((NEO::DebugManager.flags.EnableInOrderRegularCmdListPatching.get() != 0) && (this->cmdListType == TYPE_REGULAR)) {
this->inOrderPatchCmds.emplace_back(cmd, counterValue, cmdType);
this->inOrderPatchCmds.emplace_back(cmd, counterValue, patchCmdType);
}
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::patchInOrderCmds() {
if (isInOrderExecutionEnabled() && inOrderExecInfo->regularCmdListSubmissionCounter > 0) {
auto appendCounter = inOrderExecInfo->regularCmdListSubmissionCounter * inOrderDependencyCounter;
if (isInOrderExecutionEnabled() && inOrderExecInfo->regularCmdListSubmissionCounter > 1) {
auto appendCounter = InOrderPatchCommandHelpers::getAppendCounterValue(*inOrderExecInfo);
for (auto &cmd : inOrderPatchCmds) {
cmd.patch(appendCounter);
}
}
}
template <GFXCORE_FAMILY gfxCoreFamily>
bool CommandListCoreFamily<gfxCoreFamily>::hasInOrderDependencies() const {
return (inOrderExecInfo.get() && inOrderExecInfo->inOrderDependencyCounter > 0);
}
} // namespace L0

View File

@@ -182,7 +182,6 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
bool skipInOrderNonWalkerSignalingAllowed(ze_event_handle_t signalEvent) const override;
protected:
using BaseClass::inOrderDependencyCounter;
using BaseClass::inOrderExecInfo;
void printKernelsPrintfOutput(bool hangDetected);

View File

@@ -411,7 +411,7 @@ bool CommandListCoreFamilyImmediate<gfxCoreFamily>::waitForEventsFromHost() {
template <GFXCORE_FAMILY gfxCoreFamily>
bool CommandListCoreFamilyImmediate<gfxCoreFamily>::hasStallingCmdsForRelaxedOrdering(uint32_t numWaitEvents, bool relaxedOrderingDispatch) const {
return (!relaxedOrderingDispatch && (numWaitEvents > 0 || this->inOrderDependencyCounter > 0));
return (!relaxedOrderingDispatch && (numWaitEvents > 0 || this->hasInOrderDependencies()));
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -517,7 +517,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendBarrier(ze_even
if (isInOrderExecutionEnabled()) {
if (isSkippingInOrderBarrierAllowed(hSignalEvent, numWaitEvents, phWaitEvents)) {
if (hSignalEvent) {
Event::fromHandle(hSignalEvent)->updateInOrderExecState(inOrderExecInfo, this->inOrderDependencyCounter, this->inOrderAllocationOffset);
Event::fromHandle(hSignalEvent)->updateInOrderExecState(inOrderExecInfo, inOrderExecInfo->inOrderDependencyCounter, this->inOrderAllocationOffset);
}
return ZE_RESULT_SUCCESS;
@@ -1253,7 +1253,7 @@ void CommandListCoreFamilyImmediate<gfxCoreFamily>::checkAssert() {
template <GFXCORE_FAMILY gfxCoreFamily>
bool CommandListCoreFamilyImmediate<gfxCoreFamily>::isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents) const {
auto numEvents = numWaitEvents + ((inOrderDependencyCounter > 0) ? 1 : 0);
auto numEvents = numWaitEvents + (this->hasInOrderDependencies() ? 1 : 0);
return NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, numEvents);
}
@@ -1265,7 +1265,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::synchronizeInOrderExe
ze_result_t status = ZE_RESULT_NOT_READY;
auto waitValue = this->inOrderDependencyCounter;
auto waitValue = inOrderExecInfo->inOrderDependencyCounter;
lastHangCheckTime = std::chrono::high_resolution_clock::now();
waitStartTime = lastHangCheckTime;

View File

@@ -304,7 +304,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
dispatchEventPostSyncOperation(event, Event::STATE_CLEARED, false, false, false, false);
} else {
dispatchKernelArgs.eventAddress = inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress() + this->inOrderAllocationOffset;
dispatchKernelArgs.postSyncImmValue = this->inOrderDependencyCounter + 1;
dispatchKernelArgs.postSyncImmValue = inOrderExecInfo->inOrderDependencyCounter + 1;
}
}
@@ -334,7 +334,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
}
} else {
UNRECOVERABLE_IF(!dispatchKernelArgs.outWalkerPtr);
addCmdForPatching(dispatchKernelArgs.outWalkerPtr, dispatchKernelArgs.postSyncImmValue, InOrderPatchCommandTypes::CmdType::Walker);
addCmdForPatching(dispatchKernelArgs.outWalkerPtr, dispatchKernelArgs.postSyncImmValue, InOrderPatchCommandHelpers::PatchCmdType::Walker);
}
}

View File

@@ -40,7 +40,6 @@ struct CommandListImp : CommandList {
protected:
std::shared_ptr<InOrderExecInfo> inOrderExecInfo;
uint64_t inOrderDependencyCounter = 0;
uint32_t inOrderAllocationOffset = 0;
~CommandListImp() override = default;

View File

@@ -537,8 +537,8 @@ void CommandQueueHw<gfxCoreFamily>::setupCmdListsAndContextParams(
auto commandList = static_cast<CommandListImp *>(CommandList::fromHandle(phCommandLists[i]));
commandList->setCsr(this->csr);
commandList->storeReferenceTsToMappedEvents(false);
commandList->patchInOrderCmds();
commandList->incRegularCmdListSubmissionCounter();
commandList->patchInOrderCmds();
auto &commandContainer = commandList->getCmdContainer();

View File

@@ -404,6 +404,10 @@ void Event::updateInOrderExecState(std::shared_ptr<InOrderExecInfo> &newInOrderE
inOrderAllocationOffset = allocationOffset;
}
uint64_t Event::getInOrderExecSignalValue() const {
return (inOrderExecSignalValue + InOrderPatchCommandHelpers::getAppendCounterValue(*inOrderExecInfo));
}
void Event::setLatestUsedCmdQueue(CommandQueue *newCmdQ) {
this->latestUsedCmdQueue = newCmdQ;
}

View File

@@ -220,7 +220,7 @@ struct Event : _ze_event_handle_t {
bool isInOrderExecEvent() const { return inOrderExecEvent; }
void enableInOrderMode() { this->inOrderExecEvent = true; }
NEO::GraphicsAllocation *getInOrderExecDataAllocation() const;
uint64_t getInOrderExecSignalValue() const { return inOrderExecSignalValue; }
uint64_t getInOrderExecSignalValue() const;
uint32_t getInOrderAllocationOffset() const { return inOrderAllocationOffset; }
void setLatestUsedCmdQueue(CommandQueue *newCmdQ);
NEO::TimeStampData *peekReferenceTs() {

View File

@@ -157,10 +157,11 @@ ze_result_t EventImp<TagSizeT>::queryInOrderEventStatus() {
}
auto hostAddress = static_cast<uint64_t *>(ptrOffset(inOrderExecInfo->inOrderDependencyCounterAllocation.getUnderlyingBuffer(), this->inOrderAllocationOffset));
auto waitValue = getInOrderExecSignalValue();
bool signaled = true;
for (uint32_t i = 0; i < this->getPacketsInUse(); i++) {
if (!NEO::WaitUtils::waitFunctionWithPredicate<const uint64_t>(hostAddress, this->inOrderExecSignalValue, std::greater_equal<uint64_t>())) {
if (!NEO::WaitUtils::waitFunctionWithPredicate<const uint64_t>(hostAddress, waitValue, std::greater_equal<uint64_t>())) {
signaled = false;
break;
}
@@ -391,7 +392,7 @@ ze_result_t EventImp<TagSizeT>::waitForUserFence(uint64_t timeout) {
uint64_t waitAddress = castToUint64(ptrOffset(inOrderExecInfo->inOrderDependencyCounterAllocation.getUnderlyingBuffer(), this->inOrderAllocationOffset));
if (!csrs[0]->waitUserFence(this->inOrderExecSignalValue, waitAddress, timeout)) {
if (!csrs[0]->waitUserFence(getInOrderExecSignalValue(), waitAddress, timeout)) {
return ZE_RESULT_NOT_READY;
}

View File

@@ -29,12 +29,21 @@ struct InOrderExecInfo : public NEO::NonCopyableClass {
NEO::GraphicsAllocation &inOrderDependencyCounterAllocation;
NEO::MemoryManager &memoryManager;
uint64_t inOrderDependencyCounter = 0;
uint64_t regularCmdListSubmissionCounter = 0;
bool isRegularCmdList = false;
};
namespace InOrderPatchCommandTypes {
enum class CmdType {
namespace InOrderPatchCommandHelpers {
inline uint64_t getAppendCounterValue(const InOrderExecInfo &inOrderExecInfo) {
if (inOrderExecInfo.isRegularCmdList && inOrderExecInfo.regularCmdListSubmissionCounter > 1) {
return inOrderExecInfo.inOrderDependencyCounter * (inOrderExecInfo.regularCmdListSubmissionCounter - 1);
}
return 0;
}
enum class PatchCmdType {
None,
Sdi,
Semaphore,
@@ -42,18 +51,18 @@ enum class CmdType {
};
template <typename GfxFamily>
struct BaseCmd {
BaseCmd(void *cmd, uint64_t baseCounterValue, CmdType cmdType) : cmd(cmd), baseCounterValue(baseCounterValue), cmdType(cmdType) {}
struct PatchCmd {
PatchCmd(void *cmd, uint64_t baseCounterValue, PatchCmdType patchCmdType) : cmd(cmd), baseCounterValue(baseCounterValue), patchCmdType(patchCmdType) {}
void patch(uint64_t appendCunterValue) {
switch (cmdType) {
case CmdType::Sdi:
switch (patchCmdType) {
case PatchCmdType::Sdi:
patchSdi(appendCunterValue);
break;
case CmdType::Semaphore:
case PatchCmdType::Semaphore:
patchSemaphore(appendCunterValue);
break;
case CmdType::Walker:
case PatchCmdType::Walker:
patchComputeWalker(appendCunterValue);
break;
default:
@@ -64,7 +73,7 @@ struct BaseCmd {
void *cmd = nullptr;
const uint64_t baseCounterValue = 0;
const CmdType cmdType = CmdType::None;
const PatchCmdType patchCmdType = PatchCmdType::None;
protected:
void patchSdi(uint64_t appendCunterValue) {
@@ -88,12 +97,12 @@ struct BaseCmd {
}
}
BaseCmd() = delete;
PatchCmd() = delete;
};
} // namespace InOrderPatchCommandTypes
} // namespace InOrderPatchCommandHelpers
template <typename GfxFamily>
using InOrderPatchCommandsContainer = std::vector<InOrderPatchCommandTypes::BaseCmd<GfxFamily>>;
using InOrderPatchCommandsContainer = std::vector<InOrderPatchCommandHelpers::PatchCmd<GfxFamily>>;
} // namespace L0

View File

@@ -77,7 +77,6 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
using BaseClass::indirectAllocationsAllowed;
using BaseClass::initialize;
using BaseClass::inOrderAllocationOffset;
using BaseClass::inOrderDependencyCounter;
using BaseClass::inOrderExecInfo;
using BaseClass::inOrderPatchCmds;
using BaseClass::isFlushTaskSubmissionEnabled;
@@ -170,7 +169,6 @@ struct WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>
using BaseClass::getHostPtrAlloc;
using BaseClass::hostSynchronize;
using BaseClass::immediateCmdListHeapSharing;
using BaseClass::inOrderDependencyCounter;
using BaseClass::inOrderExecInfo;
using BaseClass::inOrderPatchCmds;
using BaseClass::isBcsSplitNeeded;

View File

@@ -959,7 +959,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenResetEventCalledThenResetEven
EXPECT_EQ(MemoryConstants::pageSize64k, immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getUnderlyingBufferSize());
EXPECT_TRUE(events[0]->inOrderExecEvent);
EXPECT_EQ(events[0]->inOrderExecSignalValue, immCmdList->inOrderDependencyCounter);
EXPECT_EQ(events[0]->inOrderExecSignalValue, immCmdList->inOrderExecInfo->inOrderDependencyCounter);
EXPECT_EQ(&events[0]->inOrderExecInfo->inOrderDependencyCounterAllocation, &immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation);
EXPECT_EQ(events[0]->inOrderAllocationOffset, 0u);
@@ -1369,17 +1369,17 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenDispatchingThenHandleDependen
EXPECT_NE(nullptr, immCmdList->inOrderExecInfo.get());
EXPECT_EQ(AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getAllocationType());
EXPECT_EQ(0u, immCmdList->inOrderDependencyCounter);
EXPECT_EQ(0u, immCmdList->inOrderExecInfo->inOrderDependencyCounter);
auto ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver);
ultCsr->storeMakeResidentAllocations = true;
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(1u, immCmdList->inOrderDependencyCounter);
EXPECT_EQ(1u, immCmdList->inOrderExecInfo->inOrderDependencyCounter);
EXPECT_EQ(1u, ultCsr->makeResidentAllocations[&immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation]);
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(2u, immCmdList->inOrderDependencyCounter);
EXPECT_EQ(2u, immCmdList->inOrderExecInfo->inOrderDependencyCounter);
EXPECT_EQ(2u, ultCsr->makeResidentAllocations[&immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation]);
}
@@ -1573,7 +1573,7 @@ HWTEST2_F(InOrderCmdListTests, givenRelaxedOrderingWhenProgrammingTimestampEvent
auto eventPool = createEvents<FamilyType>(1, true);
events[0]->signalScope = 0;
immCmdList->inOrderDependencyCounter = 1;
immCmdList->inOrderExecInfo->inOrderDependencyCounter = 1;
EXPECT_TRUE(immCmdList->isRelaxedOrderingDispatchAllowed(0));
@@ -1582,7 +1582,7 @@ HWTEST2_F(InOrderCmdListTests, givenRelaxedOrderingWhenProgrammingTimestampEvent
zeCommandListAppendLaunchKernel(immCmdList->toHandle(), kernel->toHandle(), &groupCount, events[0]->toHandle(), 0, nullptr);
ASSERT_EQ(2u, immCmdList->flushData.size());
EXPECT_EQ(2u, immCmdList->inOrderDependencyCounter);
EXPECT_EQ(2u, immCmdList->inOrderExecInfo->inOrderDependencyCounter);
{
@@ -1681,7 +1681,7 @@ HWTEST2_F(InOrderCmdListTests, givenDebugFlagSetWhenChainingWithRelaxedOrderingT
auto eventPool = createEvents<FamilyType>(1, true);
events[0]->signalScope = 0;
immCmdList->inOrderDependencyCounter = 1;
immCmdList->inOrderExecInfo->inOrderDependencyCounter = 1;
EXPECT_TRUE(immCmdList->isRelaxedOrderingDispatchAllowed(0));
@@ -1690,7 +1690,7 @@ HWTEST2_F(InOrderCmdListTests, givenDebugFlagSetWhenChainingWithRelaxedOrderingT
zeCommandListAppendLaunchKernel(immCmdList->toHandle(), kernel->toHandle(), &groupCount, events[0]->toHandle(), 0, nullptr);
ASSERT_EQ(1u, immCmdList->flushCount);
EXPECT_EQ(2u, immCmdList->inOrderDependencyCounter);
EXPECT_EQ(2u, immCmdList->inOrderExecInfo->inOrderDependencyCounter);
}
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingRegularEventThenClearAndChainWithSyncAllocSignaling, IsAtLeastXeHpCore) {
@@ -1786,11 +1786,11 @@ HWTEST2_F(InOrderCmdListTests, givenHostVisibleEventOnLatestFlushWhenCallingSync
using NonPostSyncWalkerMatcher = IsWithinGfxCore<IGFX_GEN9_CORE, IGFX_GEN12LP_CORE>;
HWTEST2_F(InOrderCmdListTests, givenNonPostSyncWalkerWhenPatchingThenThrow, NonPostSyncWalkerMatcher) {
InOrderPatchCommandTypes::BaseCmd<FamilyType> incorrectCmd(nullptr, 1, InOrderPatchCommandTypes::CmdType::None);
InOrderPatchCommandHelpers::PatchCmd<FamilyType> incorrectCmd(nullptr, 1, InOrderPatchCommandHelpers::PatchCmdType::None);
EXPECT_ANY_THROW(incorrectCmd.patch(1));
InOrderPatchCommandTypes::BaseCmd<FamilyType> walkerCmd(nullptr, 1, InOrderPatchCommandTypes::CmdType::Walker);
InOrderPatchCommandHelpers::PatchCmd<FamilyType> walkerCmd(nullptr, 1, InOrderPatchCommandHelpers::PatchCmdType::Walker);
EXPECT_ANY_THROW(walkerCmd.patch(1));
}
@@ -1815,7 +1815,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingWalkerThenProgramP
auto immCmdList = createImmCmdList<gfxCoreFamily>();
immCmdList->inOrderAllocationOffset = 64;
immCmdList->inOrderDependencyCounter = 123;
immCmdList->inOrderExecInfo->inOrderDependencyCounter = 123;
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
@@ -1844,7 +1844,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingWalkerThenProgramP
EXPECT_EQ(expectedAddress, sdiCmd->getAddress());
EXPECT_EQ(immCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword());
EXPECT_EQ(immCmdList->inOrderDependencyCounter, sdiCmd->getDataDword0());
EXPECT_EQ(immCmdList->inOrderExecInfo->inOrderDependencyCounter, sdiCmd->getDataDword0());
}
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingKernelSplitThenProgramPcAndSignalAlloc, NonPostSyncWalkerMatcher) {
@@ -1854,7 +1854,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingKernelSplitThenPro
auto immCmdList = createImmCmdList<gfxCoreFamily>();
immCmdList->inOrderAllocationOffset = 64;
immCmdList->inOrderDependencyCounter = 123;
immCmdList->inOrderExecInfo->inOrderDependencyCounter = 123;
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
@@ -1892,7 +1892,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingKernelSplitThenPro
EXPECT_EQ(expectedAddress, sdiCmd->getAddress());
EXPECT_EQ(immCmdList->isQwordInOrderCounter(), sdiCmd->getStoreQword());
EXPECT_EQ(immCmdList->inOrderDependencyCounter, sdiCmd->getDataDword0());
EXPECT_EQ(immCmdList->inOrderExecInfo->inOrderDependencyCounter, sdiCmd->getDataDword0());
context->freeMem(hostAlloc);
}
@@ -2196,9 +2196,9 @@ HWTEST2_F(InOrderCmdListTests, givenImmediateEventWhenWaitingFromRegularCmdListT
ASSERT_EQ(1u, regularCmdList->inOrderPatchCmds.size());
if (NonPostSyncWalkerMatcher::isMatched<productFamily>()) {
EXPECT_EQ(InOrderPatchCommandTypes::CmdType::Sdi, regularCmdList->inOrderPatchCmds[0].cmdType);
EXPECT_EQ(InOrderPatchCommandHelpers::PatchCmdType::Sdi, regularCmdList->inOrderPatchCmds[0].patchCmdType);
} else {
EXPECT_EQ(InOrderPatchCommandTypes::CmdType::Walker, regularCmdList->inOrderPatchCmds[0].cmdType);
EXPECT_EQ(InOrderPatchCommandHelpers::PatchCmdType::Walker, regularCmdList->inOrderPatchCmds[0].patchCmdType);
}
GenCmdList cmdList;
@@ -2215,6 +2215,79 @@ HWTEST2_F(InOrderCmdListTests, givenImmediateEventWhenWaitingFromRegularCmdListT
EXPECT_NE(cmdList.end(), walkerItor);
}
HWTEST2_F(InOrderCmdListTests, givenEventGeneratedByRegularCmdListWhenWaitingFromImmediateThenUseSubmissionCounter, IsAtLeastSkl) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
ze_command_queue_desc_t desc = {};
auto mockCmdQHw = makeZeUniquePtr<MockCommandQueueHw<gfxCoreFamily>>(device, device->getNEODevice()->getDefaultEngine().commandStreamReceiver, &desc);
mockCmdQHw->initialize(true, false, false);
auto regularCmdList = createRegularCmdList<gfxCoreFamily>(false);
auto immCmdList = createImmCmdList<gfxCoreFamily>();
auto regularCmdListHandle = regularCmdList->toHandle();
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
auto offset = cmdStream->getUsed();
auto eventPool = createEvents<FamilyType>(1, false);
auto eventHandle = events[0]->toHandle();
regularCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
regularCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
regularCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, eventHandle, 0, nullptr, launchParams, false);
uint64_t expectedCounterValue = regularCmdList->inOrderExecInfo->inOrderDependencyCounter;
regularCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
regularCmdList->close();
uint64_t expectedCounterAppendValue = regularCmdList->inOrderExecInfo->inOrderDependencyCounter;
auto verifySemaphore = [&](uint64_t expectedValue) {
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, ptrOffset(cmdStream->getCpuBase(), offset), (cmdStream->getUsed() - offset)));
auto semaphoreItor = find<MI_SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), semaphoreItor);
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*semaphoreItor);
ASSERT_NE(nullptr, semaphoreCmd);
if (semaphoreCmd->getSemaphoreGraphicsAddress() == immCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress()) {
// skip implicit dependency
semaphoreItor++;
semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*semaphoreItor);
ASSERT_NE(nullptr, semaphoreCmd);
}
EXPECT_EQ(expectedValue, semaphoreCmd->getSemaphoreDataDword());
EXPECT_EQ(regularCmdList->inOrderExecInfo->inOrderDependencyCounterAllocation.getGpuAddress(), semaphoreCmd->getSemaphoreGraphicsAddress());
};
// 0 Execute calls
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 1, &eventHandle, launchParams, false);
verifySemaphore(expectedCounterValue);
// 1 Execute call
offset = cmdStream->getUsed();
mockCmdQHw->executeCommandLists(1, &regularCmdListHandle, nullptr, false);
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 1, &eventHandle, launchParams, false);
verifySemaphore(expectedCounterValue);
// 2 Execute calls
offset = cmdStream->getUsed();
mockCmdQHw->executeCommandLists(1, &regularCmdListHandle, nullptr, false);
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 1, &eventHandle, launchParams, false);
verifySemaphore(expectedCounterValue + expectedCounterAppendValue);
// 3 Execute calls
offset = cmdStream->getUsed();
mockCmdQHw->executeCommandLists(1, &regularCmdListHandle, nullptr, false);
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 1, &eventHandle, launchParams, false);
verifySemaphore(expectedCounterValue + (expectedCounterAppendValue * 2));
}
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingKernelSplitThenDontSignalFromWalker, IsAtLeastXeHpCore) {
using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER;
using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA;
@@ -2610,7 +2683,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingCounterWithOverflo
using POSTSYNC_DATA = typename FamilyType::POSTSYNC_DATA;
auto immCmdList = createImmCmdList<gfxCoreFamily>();
immCmdList->inOrderDependencyCounter = std::numeric_limits<uint32_t>::max() - 1;
immCmdList->inOrderExecInfo->inOrderDependencyCounter = std::numeric_limits<uint32_t>::max() - 1;
auto cmdStream = immCmdList->getCmdContainer().getCommandStream();
@@ -2668,7 +2741,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingCounterWithOverflo
EXPECT_EQ(1u, sdiCmd->getDataDword0());
}
EXPECT_EQ(expectedCounter, immCmdList->inOrderDependencyCounter);
EXPECT_EQ(expectedCounter, immCmdList->inOrderExecInfo->inOrderDependencyCounter);
EXPECT_EQ(offset, immCmdList->inOrderAllocationOffset);
EXPECT_EQ(expectedCounter, events[0]->inOrderExecSignalValue);
@@ -2756,7 +2829,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendBarrierWitho
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(1u, immCmdList->inOrderDependencyCounter);
EXPECT_EQ(1u, immCmdList->inOrderExecInfo->inOrderDependencyCounter);
auto offset = cmdStream->getUsed();
@@ -2820,7 +2893,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendBarrierWitho
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(1u, immCmdList->inOrderDependencyCounter);
EXPECT_EQ(1u, immCmdList->inOrderExecInfo->inOrderDependencyCounter);
auto offset = cmdStream->getUsed();
@@ -2856,7 +2929,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendBarrierWitho
immCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(1u, immCmdList->inOrderDependencyCounter);
EXPECT_EQ(1u, immCmdList->inOrderExecInfo->inOrderDependencyCounter);
auto offset = cmdStream->getUsed();
@@ -3404,10 +3477,10 @@ HWTEST2_F(MultiTileInOrderCmdListTests, whenUsingRegularCmdListThenAddWalkerToPa
walkerFromParser2 = genCmdCast<COMPUTE_WALKER *>(*itor);
}
EXPECT_EQ(2u, regularCmdList->inOrderDependencyCounter);
EXPECT_EQ(2u, regularCmdList->inOrderExecInfo->inOrderDependencyCounter);
auto verifyPatching = [&](uint64_t executionCounter) {
auto appendValue = regularCmdList->inOrderDependencyCounter * executionCounter;
auto appendValue = regularCmdList->inOrderExecInfo->inOrderDependencyCounter * executionCounter;
EXPECT_EQ(1u + appendValue, walkerFromContainer1->getPostSync().getImmediateData());
EXPECT_EQ(1u + appendValue, walkerFromParser1->getPostSync().getImmediateData());
@@ -3629,10 +3702,10 @@ HWTEST2_F(InOrderRegularCmdListTests, whenUsingRegularCmdListThenAddCmdsToPatch,
sdiFromParser2 = genCmdCast<MI_STORE_DATA_IMM *>(*sdiItor);
}
EXPECT_EQ(2u, regularCmdList->inOrderDependencyCounter);
EXPECT_EQ(2u, regularCmdList->inOrderExecInfo->inOrderDependencyCounter);
auto verifyPatching = [&](uint64_t executionCounter) {
auto appendValue = regularCmdList->inOrderDependencyCounter * executionCounter;
auto appendValue = regularCmdList->inOrderExecInfo->inOrderDependencyCounter * executionCounter;
EXPECT_EQ(1u + appendValue, sdiFromContainer1->getDataDword0());
EXPECT_EQ(1u + appendValue, sdiFromParser1->getDataDword0());
@@ -3716,10 +3789,10 @@ HWTEST2_F(InOrderRegularCmdListTests, whenUsingRegularCmdListThenAddWalkerToPatc
walkerFromParser2 = genCmdCast<COMPUTE_WALKER *>(*itor);
}
EXPECT_EQ(2u, regularCmdList->inOrderDependencyCounter);
EXPECT_EQ(2u, regularCmdList->inOrderExecInfo->inOrderDependencyCounter);
auto verifyPatching = [&](uint64_t executionCounter) {
auto appendValue = regularCmdList->inOrderDependencyCounter * executionCounter;
auto appendValue = regularCmdList->inOrderExecInfo->inOrderDependencyCounter * executionCounter;
EXPECT_EQ(1u + appendValue, walkerFromContainer1->getPostSync().getImmediateData());
EXPECT_EQ(1u + appendValue, walkerFromParser1->getPostSync().getImmediateData());
@@ -3754,9 +3827,9 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL
size_t offset = cmdStream->getUsed();
EXPECT_EQ(0u, regularCmdList->inOrderDependencyCounter);
EXPECT_EQ(0u, regularCmdList->inOrderExecInfo->inOrderDependencyCounter);
regularCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(1u, regularCmdList->inOrderDependencyCounter);
EXPECT_EQ(1u, regularCmdList->inOrderExecInfo->inOrderDependencyCounter);
{
GenCmdList cmdList;
@@ -3781,7 +3854,7 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL
offset = cmdStream->getUsed();
regularCmdList->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(2u, regularCmdList->inOrderDependencyCounter);
EXPECT_EQ(2u, regularCmdList->inOrderExecInfo->inOrderDependencyCounter);
{
GenCmdList cmdList;
@@ -3809,7 +3882,7 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL
*hostAddr = 0x1234;
regularCmdList->reset();
EXPECT_EQ(0u, regularCmdList->inOrderDependencyCounter);
EXPECT_EQ(0u, regularCmdList->inOrderExecInfo->inOrderDependencyCounter);
EXPECT_EQ(0u, regularCmdList->inOrderAllocationOffset);
EXPECT_EQ(0u, *hostAddr);
}
@@ -3829,7 +3902,7 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL
size_t offset = cmdStream->getUsed();
EXPECT_EQ(0u, regularCmdList->inOrderDependencyCounter);
EXPECT_EQ(0u, regularCmdList->inOrderExecInfo->inOrderDependencyCounter);
EXPECT_NE(nullptr, regularCmdList->inOrderExecInfo.get());
constexpr size_t size = 128 * sizeof(uint32_t);