feature: enable in-order sync allocation for regular cmd lists 2

Related-To: NEO-7966

Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz 2023-09-13 10:02:22 +00:00 committed by Compute-Runtime-Automation
parent 3eb98163a8
commit efddaa1251
7 changed files with 182 additions and 87 deletions

View File

@ -327,8 +327,8 @@ struct CommandListCoreFamily : CommandListImp {
virtual bool isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents) const { return false; }
virtual void setupFlushMethod(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) {}
bool isInOrderEventWaitRequired(const Event &event) const;
virtual bool useCounterAllocationForInOrderMode() const { return false; }
void handleInOrderImplicitDependencies(bool relaxedOrderingAllowed);
virtual void handleInOrderDependencyCounter();
};
template <PRODUCT_FAMILY gfxProductFamily>

View File

@ -141,9 +141,35 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::reset() {
inOrderDependencyCounter = 0;
inOrderAllocationOffset = 0;
if (inOrderDependencyCounterAllocation) {
memset(inOrderDependencyCounterAllocation->getUnderlyingBuffer(), 0, inOrderDependencyCounterAllocation->getUnderlyingBufferSize());
}
return ZE_RESULT_SUCCESS;
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::handleInOrderDependencyCounter() {
if ((inOrderDependencyCounter + 1) == std::numeric_limits<uint32_t>::max()) {
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(inOrderDependencyCounterAllocation, inOrderDependencyCounter + 1, inOrderAllocationOffset, false);
inOrderDependencyCounter = 0;
// multitile immediate writes are uint64_t aligned
uint32_t offset = this->partitionCount * static_cast<uint32_t>(sizeof(uint64_t));
inOrderAllocationOffset += offset;
UNRECOVERABLE_IF(inOrderAllocationOffset + offset >= inOrderDependencyCounterAllocation->getUnderlyingBufferSize());
CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(); // write 1 on new offset
}
inOrderDependencyCounter++;
this->commandContainer.addToResidencyContainer(this->inOrderDependencyCounterAllocation);
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::handlePostSubmissionState() {
this->commandContainer.getResidencyContainer().clear();
@ -327,6 +353,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(ze_kernel_h
auto res = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), threadGroupDimensions,
event, launchParams);
if (isInOrderExecutionEnabled()) {
handleInOrderDependencyCounter();
}
addToMappedEventList(event);
if (NEO::DebugManager.flags.EnableSWTags.get()) {
neoDevice->getRootDeviceEnvironment().tagsManager->insertTag<GfxFamily, NEO::SWTags::CallNameEndTag>(
@ -363,6 +394,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchCooperativeKernel(
ret = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), launchKernelArgs,
event, launchParams);
addToMappedEventList(event);
if (this->inOrderExecutionEnabled) {
handleInOrderDependencyCounter();
}
return ret;
}
@ -395,6 +430,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(ze_
addToMappedEventList(event);
appendSignalEventPostWalker(event);
if (isInOrderExecutionEnabled()) {
handleInOrderDependencyCounter();
}
return ret;
}
@ -479,8 +518,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendEventReset(ze_event_hand
}
}
if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode()) {
if (this->inOrderExecutionEnabled) {
appendSignalInOrderDependencyCounter();
handleInOrderDependencyCounter();
}
if (NEO::DebugManager.flags.EnableSWTags.get()) {
@ -517,8 +557,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryRangesBarrier(uint
appendSignalEventPostWalker(signalEvent);
addToMappedEventList(signalEvent);
if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode()) {
if (this->inOrderExecutionEnabled) {
appendSignalInOrderDependencyCounter();
handleInOrderDependencyCounter();
}
return ZE_RESULT_SUCCESS;
@ -672,6 +713,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemory(ze_i
event, numWaitEvents, phWaitEvents,
launchParams, relaxedOrderingDispatch);
addToMappedEventList(Event::fromHandle(hEvent));
return status;
}
@ -974,6 +1016,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyRegion(ze_image
event, numWaitEvents, phWaitEvents,
launchParams, relaxedOrderingDispatch);
addToMappedEventList(event);
return status;
}
@ -1416,12 +1459,18 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
addFlushRequiredCommand(dstAllocationStruct.needsFlush, signalEvent);
addToMappedEventList(signalEvent);
if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode() && (launchParams.isKernelSplitOperation || inOrderCopyOnlySignalingAllowed)) {
if (!signalEvent && !isCopyOnly()) {
NEO::PipeControlArgs args;
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(*commandContainer.getCommandStream(), args);
if (this->inOrderExecutionEnabled) {
if (launchParams.isKernelSplitOperation || inOrderCopyOnlySignalingAllowed) {
if (!signalEvent && !isCopyOnly()) {
NEO::PipeControlArgs args;
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(*commandContainer.getCommandStream(), args);
}
appendSignalInOrderDependencyCounter();
}
if (!isCopyOnly() || inOrderCopyOnlySignalingAllowed) {
handleInOrderDependencyCounter();
}
appendSignalInOrderDependencyCounter();
}
if (NEO::DebugManager.flags.EnableSWTags.get()) {
@ -1449,7 +1498,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(void *d
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch,
bool forceDisableCopyOnlyInOrderSignaling) {
const bool inOrderCopyOnlySignalingAllowed = this->inOrderExecutionEnabled && !forceDisableCopyOnlyInOrderSignaling;
const bool inOrderCopyOnlySignalingAllowed = this->inOrderExecutionEnabled && !forceDisableCopyOnlyInOrderSignaling && isCopyOnly();
NEO::Device *neoDevice = device->getNEODevice();
uint32_t callId = 0;
@ -1509,8 +1558,14 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(void *d
addToMappedEventList(signalEvent);
addFlushRequiredCommand(dstAllocationStruct.needsFlush, signalEvent);
if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode() && isCopyOnly() && inOrderCopyOnlySignalingAllowed) {
appendSignalInOrderDependencyCounter();
if (this->inOrderExecutionEnabled) {
if (inOrderCopyOnlySignalingAllowed) {
appendSignalInOrderDependencyCounter();
}
if (!isCopyOnly() || inOrderCopyOnlySignalingAllowed) {
handleInOrderDependencyCounter();
}
}
if (NEO::DebugManager.flags.EnableSWTags.get()) {
@ -1932,12 +1987,16 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
appendEventForProfilingAllWalkers(signalEvent, false, singlePipeControlPacket);
addFlushRequiredCommand(hostPointerNeedsFlush, signalEvent);
if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode() && launchParams.isKernelSplitOperation) {
if (!signalEvent) {
NEO::PipeControlArgs args;
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(*commandContainer.getCommandStream(), args);
if (this->inOrderExecutionEnabled) {
if (launchParams.isKernelSplitOperation) {
if (!signalEvent) {
NEO::PipeControlArgs args;
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(*commandContainer.getCommandStream(), args);
}
appendSignalInOrderDependencyCounter();
}
appendSignalInOrderDependencyCounter();
handleInOrderDependencyCounter();
}
if (NEO::DebugManager.flags.EnableSWTags.get()) {
@ -1998,8 +2057,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBlitFill(void *ptr,
appendSignalEventPostWalker(signalEvent);
if (isInOrderExecutionEnabled() && useCounterAllocationForInOrderMode()) {
if (isInOrderExecutionEnabled()) {
appendSignalInOrderDependencyCounter();
handleInOrderDependencyCounter();
}
}
return ZE_RESULT_SUCCESS;
@ -2166,16 +2226,12 @@ void CommandListCoreFamily<gfxCoreFamily>::handleInOrderImplicitDependencies(boo
NEO::RelaxedOrderingHelper::encodeRegistersBeforeDependencyCheckers<GfxFamily>(*commandContainer.getCommandStream());
}
if (useCounterAllocationForInOrderMode()) {
if (this->cmdListType == CommandListType::TYPE_IMMEDIATE) {
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(this->inOrderDependencyCounterAllocation, this->inOrderDependencyCounter, this->inOrderAllocationOffset, relaxedOrderingAllowed);
} else if (!isCopyOnly()) {
appendComputeBarrierCommand();
}
}
if (cmdListType == TYPE_REGULAR && this->inOrderExecutionEnabled && !hasInOrderDependencies) {
inOrderDependencyCounter++; // First append is without dependencies. Increment counter to program barrier on next calls.
}
}
template <GFXCORE_FAMILY gfxCoreFamily>
@ -2224,8 +2280,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_han
bool appendPipeControlWithPostSync = (!isCopyOnly()) && (event->isSignalScope() || event->isEventTimestampFlagSet());
dispatchEventPostSyncOperation(event, Event::STATE_SIGNALED, false, false, appendPipeControlWithPostSync, false);
if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode()) {
if (this->inOrderExecutionEnabled) {
appendSignalInOrderDependencyCounter();
handleInOrderDependencyCounter();
}
if (NEO::DebugManager.flags.EnableSWTags.get()) {
@ -2244,7 +2301,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(NEO::Gr
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
UNRECOVERABLE_IF(waitValue > std::numeric_limits<uint32_t>::max());
UNRECOVERABLE_IF(!useCounterAllocationForInOrderMode());
UNRECOVERABLE_IF(this->cmdListType == CommandListType::TYPE_REGULAR);
commandContainer.addToResidencyContainer(dependencyCounterAllocation);
@ -2338,8 +2395,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
commandContainer.addToResidencyContainer(this->csr->getTagAllocation());
}
if (signalInOrderCompletion && useCounterAllocationForInOrderMode()) {
if (signalInOrderCompletion) {
appendSignalInOrderDependencyCounter();
handleInOrderDependencyCounter();
}
makeResidentDummyAllocation();
@ -2357,8 +2415,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter() {
UNRECOVERABLE_IF(!useCounterAllocationForInOrderMode());
uint32_t signalValue = this->inOrderDependencyCounter + 1;
uint64_t gpuVa = this->inOrderDependencyCounterAllocation->getGpuAddress() + this->inOrderAllocationOffset;
@ -2494,8 +2550,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(
appendSignalEventPostWalker(signalEvent);
if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode()) {
if (this->inOrderExecutionEnabled) {
appendSignalInOrderDependencyCounter();
handleInOrderDependencyCounter();
}
auto allocationStruct = getAlignedAllocationData(this->device, dstptr, sizeof(uint64_t), false);
@ -2988,7 +3045,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
appendEventForProfiling(signalEvent, true);
if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode()) {
if (this->inOrderExecutionEnabled) {
appendSignalInOrderDependencyCounter();
} else if (isCopyOnly()) {
NEO::MiFlushArgs args{this->dummyBlitWa};
@ -3009,6 +3066,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
addToMappedEventList(signalEvent);
appendSignalEventPostWalker(signalEvent);
if (isInOrderExecutionEnabled()) {
handleInOrderDependencyCounter();
}
return ZE_RESULT_SUCCESS;
}
@ -3130,12 +3192,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnMemory(void *desc,
}
UNRECOVERABLE_IF(srcAllocationStruct.alloc == nullptr);
appendEventForProfiling(signalEvent, true);
if (this->inOrderExecutionEnabled) {
handleInOrderImplicitDependencies(false);
}
appendEventForProfiling(signalEvent, true);
commandContainer.addToResidencyContainer(srcAllocationStruct.alloc);
uint64_t gpuAddress = static_cast<uint64_t>(srcAllocationStruct.alignedAllocationPtr);
NEO::EncodeSemaphore<GfxFamily>::addMiSemaphoreWaitCommand(*commandContainer.getCommandStream(), gpuAddress, data, comparator, false, false, false);
@ -3151,6 +3213,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnMemory(void *desc,
appendSignalEventPostWalker(signalEvent);
if (this->inOrderExecutionEnabled) {
appendSignalInOrderDependencyCounter();
handleInOrderDependencyCounter();
}
return ZE_RESULT_SUCCESS;
}
@ -3194,6 +3261,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteToMemory(void *desc
args);
}
if (this->inOrderExecutionEnabled) {
appendSignalInOrderDependencyCounter();
handleInOrderDependencyCounter();
}
return ZE_RESULT_SUCCESS;
}

View File

@ -189,9 +189,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
ze_result_t hostSynchronize(uint64_t timeout, TaskCountType taskCount, bool handlePostWaitOperations);
bool hasStallingCmdsForRelaxedOrdering(uint32_t numWaitEvents, bool relaxedOrderingDispatch) const;
void setupFlushMethod(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) override;
void handleInOrderDependencyCounter();
bool isSkippingInOrderBarrierAllowed(ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) const;
bool useCounterAllocationForInOrderMode() const override { return true; }
void allocateOrReuseKernelPrivateMemoryIfNeeded(Kernel *kernel, uint32_t sizePerHwThread) override;
MOCKABLE_VIRTUAL void checkAssert();

View File

@ -876,10 +876,6 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediate(ze_res
auto signalEvent = Event::fromHandle(hSignalEvent);
if (inputRet == ZE_RESULT_SUCCESS) {
if (isInOrderExecutionEnabled()) {
handleInOrderDependencyCounter();
}
if (this->isFlushTaskSubmissionEnabled) {
if (signalEvent && (NEO::DebugManager.flags.TrackNumCsrClientsOnSyncPoints.get() != 0)) {
signalEvent->setLatestUsedCmdQueue(this->cmdQImmediate);
@ -904,28 +900,6 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediate(ze_res
return inputRet;
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamilyImmediate<gfxCoreFamily>::handleInOrderDependencyCounter() {
if ((inOrderDependencyCounter + 1) == std::numeric_limits<uint32_t>::max()) {
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(inOrderDependencyCounterAllocation, inOrderDependencyCounter + 1, inOrderAllocationOffset, false);
inOrderDependencyCounter = 0;
// multitile immediate writes are uint64_t aligned
uint32_t offset = this->partitionCount * static_cast<uint32_t>(sizeof(uint64_t));
inOrderAllocationOffset += offset;
UNRECOVERABLE_IF(inOrderAllocationOffset + offset >= inOrderDependencyCounterAllocation->getUnderlyingBufferSize());
CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(); // write 1 on new offset
}
inOrderDependencyCounter++;
this->commandContainer.addToResidencyContainer(this->inOrderDependencyCounterAllocation);
}
template <GFXCORE_FAMILY gfxCoreFamily>
bool CommandListCoreFamilyImmediate<gfxCoreFamily>::preferCopyThroughLockedPtr(CpuMemCopyInfo &cpuMemCopyInfo, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) {
if (NEO::DebugManager.flags.ExperimentalForceCopyThroughLock.get() == 1) {

View File

@ -296,7 +296,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
this->dcFlushSupport // dcFlushEnable
};
bool inOrderExecSignalRequired = (this->inOrderExecutionEnabled && !launchParams.isKernelSplitOperation && useCounterAllocationForInOrderMode());
bool inOrderExecSignalRequired = (this->inOrderExecutionEnabled && !launchParams.isKernelSplitOperation);
bool inOrderNonWalkerSignalling = event && (isTimestampEvent || !isInOrderExecEvent);
if (inOrderExecSignalRequired) {

View File

@ -228,17 +228,15 @@ void CommandListImp::setStreamPropertiesDefaultSettings(NEO::StreamProperties &s
void CommandListImp::enableInOrderExecution() {
UNRECOVERABLE_IF(inOrderExecutionEnabled);
if (this->cmdListType == TYPE_IMMEDIATE) {
auto device = this->device->getNEODevice();
auto device = this->device->getNEODevice();
NEO::AllocationProperties allocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize64k, NEO::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, device->getDeviceBitfield()};
NEO::AllocationProperties allocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize64k, NEO::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, device->getDeviceBitfield()};
inOrderDependencyCounterAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties(allocationProperties);
inOrderDependencyCounterAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties(allocationProperties);
UNRECOVERABLE_IF(!inOrderDependencyCounterAllocation);
UNRECOVERABLE_IF(!inOrderDependencyCounterAllocation);
memset(inOrderDependencyCounterAllocation->getUnderlyingBuffer(), 0, inOrderDependencyCounterAllocation->getUnderlyingBufferSize());
}
memset(inOrderDependencyCounterAllocation->getUnderlyingBuffer(), 0, inOrderDependencyCounterAllocation->getUnderlyingBufferSize());
inOrderExecutionEnabled = true;
}

View File

@ -1807,6 +1807,8 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderRegularCmdListWhenProgrammingNonKerne
uint8_t ptr[64] = {};
uint64_t inOrderSyncVa = regularCmdList->inOrderDependencyCounterAllocation->getGpuAddress();
regularCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
auto verifyPc = [](const GenCmdList::iterator &iterator) {
@ -1815,9 +1817,21 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderRegularCmdListWhenProgrammingNonKerne
ASSERT_NE(nullptr, pcCmd);
};
auto verifySdi = [](GenCmdList::reverse_iterator rIterator) {
auto verifySdi = [&inOrderSyncVa](GenCmdList::reverse_iterator rIterator, GenCmdList::reverse_iterator rEnd, uint32_t signalValue) {
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*rIterator);
EXPECT_EQ(nullptr, sdiCmd);
while (sdiCmd == nullptr) {
sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*(++rIterator));
if (rIterator == rEnd) {
break;
}
}
ASSERT_NE(nullptr, sdiCmd);
EXPECT_EQ(inOrderSyncVa, sdiCmd->getAddress());
EXPECT_EQ(0u, sdiCmd->getStoreQword());
EXPECT_EQ(signalValue, sdiCmd->getDataDword0());
EXPECT_EQ(0u, sdiCmd->getDataDword1());
};
{
@ -1831,7 +1845,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderRegularCmdListWhenProgrammingNonKerne
(cmdStream->getUsed() - offset)));
verifyPc(cmdList.begin());
verifySdi(cmdList.rbegin());
verifySdi(cmdList.rbegin(), cmdList.rend(), 2);
}
{
@ -1847,7 +1861,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderRegularCmdListWhenProgrammingNonKerne
(cmdStream->getUsed() - offset)));
verifyPc(cmdList.begin());
verifySdi(cmdList.rbegin());
verifySdi(cmdList.rbegin(), cmdList.rend(), 3);
}
{
@ -1861,7 +1875,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderRegularCmdListWhenProgrammingNonKerne
(cmdStream->getUsed() - offset)));
verifyPc(cmdList.begin());
verifySdi(cmdList.rbegin());
verifySdi(cmdList.rbegin(), cmdList.rend(), 4);
}
{
@ -1877,7 +1891,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderRegularCmdListWhenProgrammingNonKerne
(cmdStream->getUsed() - offset)));
verifyPc(cmdList.begin());
verifySdi(cmdList.rbegin());
verifySdi(cmdList.rbegin(), cmdList.rend(), 5);
}
{
@ -1893,7 +1907,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderRegularCmdListWhenProgrammingNonKerne
(cmdStream->getUsed() - offset)));
verifyPc(cmdList.begin());
verifySdi(cmdList.rbegin());
verifySdi(cmdList.rbegin(), cmdList.rend(), 6);
}
}
@ -2275,7 +2289,17 @@ HWTEST2_F(InOrderCmdListTests, givenRegularInOrderCmdListWhenProgrammingAppendWa
ASSERT_NE(nullptr, pcCmd);
auto sdiItor = find<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(cmdList.end(), sdiItor);
EXPECT_NE(cmdList.end(), sdiItor);
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*sdiItor);
ASSERT_NE(nullptr, sdiCmd);
uint64_t syncVa = regularCmdList->inOrderDependencyCounterAllocation->getGpuAddress();
EXPECT_EQ(syncVa, sdiCmd->getAddress());
EXPECT_EQ(0u, sdiCmd->getStoreQword());
EXPECT_EQ(3u, sdiCmd->getDataDword0());
EXPECT_EQ(0u, sdiCmd->getDataDword1());
}
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingCounterWithOverflowThenHandleItCorrectly, IsAtLeastXeHpCore) {
@ -3171,7 +3195,9 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL
auto walkerCmd = genCmdCast<COMPUTE_WALKER *>(*walkerItor);
auto &postSync = walkerCmd->getPostSync();
EXPECT_EQ(POSTSYNC_DATA::OPERATION_NO_WRITE, postSync.getOperation());
EXPECT_EQ(POSTSYNC_DATA::OPERATION_WRITE_IMMEDIATE_DATA, postSync.getOperation());
EXPECT_EQ(1u, postSync.getImmediateData());
EXPECT_EQ(regularCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), postSync.getDestinationAddress());
auto sdiItor = find<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(cmdList.end(), sdiItor);
@ -3180,7 +3206,7 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL
offset = cmdStream->getUsed();
regularCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(1u, regularCmdList->inOrderDependencyCounter);
EXPECT_EQ(2u, regularCmdList->inOrderDependencyCounter);
{
GenCmdList cmdList;
@ -3195,20 +3221,25 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL
auto walkerCmd = genCmdCast<COMPUTE_WALKER *>(*walkerItor);
auto &postSync = walkerCmd->getPostSync();
EXPECT_EQ(POSTSYNC_DATA::OPERATION_NO_WRITE, postSync.getOperation());
EXPECT_EQ(POSTSYNC_DATA::OPERATION_WRITE_IMMEDIATE_DATA, postSync.getOperation());
EXPECT_EQ(2u, postSync.getImmediateData());
EXPECT_EQ(regularCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), postSync.getDestinationAddress());
auto sdiItor = find<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(cmdList.end(), sdiItor);
}
regularCmdList->inOrderAllocationOffset = 123;
auto hostAddr = static_cast<uint32_t *>(regularCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer());
*hostAddr = 0x1234;
regularCmdList->reset();
EXPECT_EQ(0u, regularCmdList->inOrderDependencyCounter);
EXPECT_EQ(0u, regularCmdList->inOrderAllocationOffset);
EXPECT_EQ(0u, *hostAddr);
}
HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdListThenDontUpdateCounterAllocation, IsAtLeastXeHpCore) {
HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdListThenUpdateCounterAllocation, IsAtLeastXeHpCore) {
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
auto eventPool = createEvents<FamilyType>(1, true);
@ -3224,7 +3255,7 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL
size_t offset = cmdStream->getUsed();
EXPECT_EQ(0u, regularCmdList->inOrderDependencyCounter);
EXPECT_EQ(nullptr, regularCmdList->inOrderDependencyCounterAllocation);
EXPECT_NE(nullptr, regularCmdList->inOrderDependencyCounterAllocation);
constexpr size_t size = 128 * sizeof(uint32_t);
auto data = allocHostMem(size);
@ -3245,8 +3276,8 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL
ptrOffset(cmdStream->getCpuBase(), offset),
(cmdStream->getUsed() - offset)));
auto sdiItor = find<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(cmdList.end(), sdiItor);
auto sdiCmds = findAll<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(2u, sdiCmds.size());
}
offset = copyOnlyCmdStream->getUsed();
@ -3259,7 +3290,7 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL
(copyOnlyCmdStream->getUsed() - offset)));
auto sdiItor = find<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(cmdList.end(), sdiItor);
EXPECT_NE(cmdList.end(), sdiItor);
}
context->freeMem(data);
@ -3288,7 +3319,18 @@ HWTEST2_F(InOrderRegularCopyOnlyCmdListTests, givenInOrderModeWhenDispatchingReg
(cmdStream->getUsed() - offset)));
auto sdiItor = find<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(cmdList.end(), sdiItor);
EXPECT_NE(cmdList.end(), sdiItor);
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*sdiItor);
ASSERT_NE(nullptr, sdiCmd);
auto gpuAddress = regularCmdList->inOrderDependencyCounterAllocation->getGpuAddress();
EXPECT_EQ(gpuAddress, sdiCmd->getAddress());
EXPECT_EQ(0u, sdiCmd->getStoreQword());
EXPECT_EQ(1u, sdiCmd->getDataDword0());
EXPECT_EQ(0u, sdiCmd->getDataDword1());
}
offset = cmdStream->getUsed();
@ -3301,12 +3343,23 @@ HWTEST2_F(InOrderRegularCopyOnlyCmdListTests, givenInOrderModeWhenDispatchingReg
ptrOffset(cmdStream->getCpuBase(), offset),
(cmdStream->getUsed() - offset)));
auto sdiItor = find<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(cmdList.end(), sdiItor);
auto copyCmd = genCmdCast<XY_COPY_BLT *>(*cmdList.begin());
EXPECT_NE(nullptr, copyCmd);
auto sdiItor = find<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), sdiItor);
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*sdiItor);
ASSERT_NE(nullptr, sdiCmd);
auto gpuAddress = regularCmdList->inOrderDependencyCounterAllocation->getGpuAddress();
EXPECT_EQ(gpuAddress, sdiCmd->getAddress());
EXPECT_EQ(0u, sdiCmd->getStoreQword());
EXPECT_EQ(2u, sdiCmd->getDataDword0());
EXPECT_EQ(0u, sdiCmd->getDataDword1());
}
alignedFree(alignedPtr);