feature: enable in-order sync allocation for regular cmd lists

Related-To: NEO-7966

Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2023-09-05 16:54:37 +00:00
committed by Compute-Runtime-Automation
parent 6648065703
commit c8a3d7d268
7 changed files with 194 additions and 87 deletions

View File

@@ -329,8 +329,8 @@ struct CommandListCoreFamily : CommandListImp {
virtual bool isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents) const { return false; }
virtual void setupFlushMethod(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) {}
bool isInOrderEventWaitRequired(const Event &event) const;
virtual bool useCounterAllocationForInOrderMode() const { return false; }
void handleInOrderImplicitDependencies(bool relaxedOrderingAllowed);
virtual void handleInOrderDependencyCounter();
};
template <PRODUCT_FAMILY gfxProductFamily>

View File

@@ -142,9 +142,35 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::reset() {
inOrderDependencyCounter = 0;
inOrderAllocationOffset = 0;
if (inOrderDependencyCounterAllocation) {
memset(inOrderDependencyCounterAllocation->getUnderlyingBuffer(), 0, inOrderDependencyCounterAllocation->getUnderlyingBufferSize());
}
return ZE_RESULT_SUCCESS;
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::handleInOrderDependencyCounter() {
if ((inOrderDependencyCounter + 1) == std::numeric_limits<uint32_t>::max()) {
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(inOrderDependencyCounterAllocation, inOrderDependencyCounter + 1, inOrderAllocationOffset, false);
inOrderDependencyCounter = 0;
// multitile immediate writes are uint64_t aligned
uint32_t offset = this->partitionCount * static_cast<uint32_t>(sizeof(uint64_t));
inOrderAllocationOffset += offset;
UNRECOVERABLE_IF(inOrderAllocationOffset + offset >= inOrderDependencyCounterAllocation->getUnderlyingBufferSize());
CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(); // write 1 on new offset
}
inOrderDependencyCounter++;
this->commandContainer.addToResidencyContainer(this->inOrderDependencyCounterAllocation);
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::handlePostSubmissionState() {
this->commandContainer.getResidencyContainer().clear();
@@ -334,6 +360,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(ze_kernel_h
auto res = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), threadGroupDimensions,
event, launchParams);
if (isInOrderExecutionEnabled()) {
handleInOrderDependencyCounter();
}
addToMappedEventList(event);
if (NEO::DebugManager.flags.EnableSWTags.get()) {
neoDevice->getRootDeviceEnvironment().tagsManager->insertTag<GfxFamily, NEO::SWTags::CallNameEndTag>(
@@ -370,6 +401,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchCooperativeKernel(
ret = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandle), launchKernelArgs,
event, launchParams);
addToMappedEventList(event);
if (this->inOrderExecutionEnabled) {
handleInOrderDependencyCounter();
}
return ret;
}
@@ -402,6 +437,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(ze_
addToMappedEventList(event);
appendSignalEventPostWalker(event);
if (isInOrderExecutionEnabled()) {
handleInOrderDependencyCounter();
}
return ret;
}
@@ -486,8 +525,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendEventReset(ze_event_hand
}
}
if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode()) {
if (this->inOrderExecutionEnabled) {
appendSignalInOrderDependencyCounter();
handleInOrderDependencyCounter();
}
if (NEO::DebugManager.flags.EnableSWTags.get()) {
@@ -524,8 +564,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryRangesBarrier(uint
appendSignalEventPostWalker(signalEvent);
addToMappedEventList(signalEvent);
if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode()) {
if (this->inOrderExecutionEnabled) {
appendSignalInOrderDependencyCounter();
handleInOrderDependencyCounter();
}
return ZE_RESULT_SUCCESS;
@@ -679,6 +720,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemory(ze_i
event, numWaitEvents, phWaitEvents,
launchParams, relaxedOrderingDispatch);
addToMappedEventList(Event::fromHandle(hEvent));
if (this->inOrderExecutionEnabled) {
handleInOrderDependencyCounter();
}
return status;
}
@@ -838,6 +884,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemory(void *
addFlushRequiredCommand(allocationStruct.needsFlush, event);
if (this->inOrderExecutionEnabled) {
handleInOrderDependencyCounter();
}
return ret;
}
@@ -981,6 +1031,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyRegion(ze_image
event, numWaitEvents, phWaitEvents,
launchParams, relaxedOrderingDispatch);
addToMappedEventList(event);
if (this->inOrderExecutionEnabled) {
handleInOrderDependencyCounter();
}
return status;
}
@@ -1423,12 +1478,18 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
addFlushRequiredCommand(dstAllocationStruct.needsFlush, signalEvent);
addToMappedEventList(signalEvent);
if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode() && (launchParams.isKernelSplitOperation || inOrderCopyOnlySignalingAllowed)) {
if (!signalEvent && !isCopyOnly()) {
NEO::PipeControlArgs args;
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(*commandContainer.getCommandStream(), args);
if (this->inOrderExecutionEnabled) {
if (launchParams.isKernelSplitOperation || inOrderCopyOnlySignalingAllowed) {
if (!signalEvent && !isCopyOnly()) {
NEO::PipeControlArgs args;
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(*commandContainer.getCommandStream(), args);
}
appendSignalInOrderDependencyCounter();
}
if (!isCopyOnly() || inOrderCopyOnlySignalingAllowed) {
handleInOrderDependencyCounter();
}
appendSignalInOrderDependencyCounter();
}
if (NEO::DebugManager.flags.EnableSWTags.get()) {
@@ -1456,7 +1517,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(void *d
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch,
bool forceDisableCopyOnlyInOrderSignaling) {
const bool inOrderCopyOnlySignalingAllowed = this->inOrderExecutionEnabled && !forceDisableCopyOnlyInOrderSignaling;
const bool inOrderCopyOnlySignalingAllowed = this->inOrderExecutionEnabled && !forceDisableCopyOnlyInOrderSignaling && isCopyOnly();
NEO::Device *neoDevice = device->getNEODevice();
uint32_t callId = 0;
@@ -1516,8 +1577,14 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(void *d
addToMappedEventList(signalEvent);
addFlushRequiredCommand(dstAllocationStruct.needsFlush, signalEvent);
if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode() && isCopyOnly() && inOrderCopyOnlySignalingAllowed) {
appendSignalInOrderDependencyCounter();
if (this->inOrderExecutionEnabled) {
if (inOrderCopyOnlySignalingAllowed) {
appendSignalInOrderDependencyCounter();
}
if (!isCopyOnly() || inOrderCopyOnlySignalingAllowed) {
handleInOrderDependencyCounter();
}
}
if (NEO::DebugManager.flags.EnableSWTags.get()) {
@@ -1939,12 +2006,16 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
appendEventForProfilingAllWalkers(signalEvent, false, singlePipeControlPacket);
addFlushRequiredCommand(hostPointerNeedsFlush, signalEvent);
if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode() && launchParams.isKernelSplitOperation) {
if (!signalEvent) {
NEO::PipeControlArgs args;
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(*commandContainer.getCommandStream(), args);
if (this->inOrderExecutionEnabled) {
if (launchParams.isKernelSplitOperation) {
if (!signalEvent) {
NEO::PipeControlArgs args;
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(*commandContainer.getCommandStream(), args);
}
appendSignalInOrderDependencyCounter();
}
appendSignalInOrderDependencyCounter();
handleInOrderDependencyCounter();
}
if (NEO::DebugManager.flags.EnableSWTags.get()) {
@@ -2005,8 +2076,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBlitFill(void *ptr,
appendSignalEventPostWalker(signalEvent);
if (isInOrderExecutionEnabled() && useCounterAllocationForInOrderMode()) {
if (isInOrderExecutionEnabled()) {
appendSignalInOrderDependencyCounter();
handleInOrderDependencyCounter();
}
}
return ZE_RESULT_SUCCESS;
@@ -2173,16 +2245,12 @@ void CommandListCoreFamily<gfxCoreFamily>::handleInOrderImplicitDependencies(boo
NEO::RelaxedOrderingHelper::encodeRegistersBeforeDependencyCheckers<GfxFamily>(*commandContainer.getCommandStream());
}
if (useCounterAllocationForInOrderMode()) {
if (this->cmdListType == CommandListType::TYPE_IMMEDIATE) {
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(this->inOrderDependencyCounterAllocation, this->inOrderDependencyCounter, this->inOrderAllocationOffset, relaxedOrderingAllowed);
} else if (!isCopyOnly()) {
appendComputeBarrierCommand();
}
}
if (cmdListType == TYPE_REGULAR && this->inOrderExecutionEnabled && !hasInOrderDependencies) {
inOrderDependencyCounter++; // First append is without dependencies. Increment counter to program barrier on next calls.
}
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -2231,8 +2299,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_han
bool appendPipeControlWithPostSync = (!isCopyOnly()) && (event->isSignalScope() || event->isEventTimestampFlagSet());
dispatchEventPostSyncOperation(event, Event::STATE_SIGNALED, false, false, appendPipeControlWithPostSync, false);
if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode()) {
if (this->inOrderExecutionEnabled) {
appendSignalInOrderDependencyCounter();
handleInOrderDependencyCounter();
}
if (NEO::DebugManager.flags.EnableSWTags.get()) {
@@ -2251,7 +2320,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(NEO::Gr
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
UNRECOVERABLE_IF(waitValue > std::numeric_limits<uint32_t>::max());
UNRECOVERABLE_IF(!useCounterAllocationForInOrderMode());
UNRECOVERABLE_IF(this->cmdListType == CommandListType::TYPE_REGULAR);
commandContainer.addToResidencyContainer(dependencyCounterAllocation);
@@ -2345,8 +2414,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
commandContainer.addToResidencyContainer(this->csr->getTagAllocation());
}
if (signalInOrderCompletion && useCounterAllocationForInOrderMode()) {
if (signalInOrderCompletion) {
appendSignalInOrderDependencyCounter();
handleInOrderDependencyCounter();
}
makeResidentDummyAllocation();
@@ -2364,8 +2434,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter() {
UNRECOVERABLE_IF(!useCounterAllocationForInOrderMode());
uint32_t signalValue = this->inOrderDependencyCounter + 1;
uint64_t gpuVa = this->inOrderDependencyCounterAllocation->getGpuAddress() + this->inOrderAllocationOffset;
@@ -2501,8 +2569,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(
appendSignalEventPostWalker(signalEvent);
if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode()) {
if (this->inOrderExecutionEnabled) {
appendSignalInOrderDependencyCounter();
handleInOrderDependencyCounter();
}
auto allocationStruct = getAlignedAllocationData(this->device, dstptr, sizeof(uint64_t), false);
@@ -2997,7 +3066,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
appendEventForProfiling(signalEvent, true);
if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode()) {
if (this->inOrderExecutionEnabled) {
appendSignalInOrderDependencyCounter();
} else if (isCopyOnly()) {
NEO::MiFlushArgs args{this->dummyBlitWa};
@@ -3018,6 +3087,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
addToMappedEventList(signalEvent);
appendSignalEventPostWalker(signalEvent);
if (isInOrderExecutionEnabled()) {
handleInOrderDependencyCounter();
}
return ZE_RESULT_SUCCESS;
}
@@ -3139,12 +3213,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnMemory(void *desc,
}
UNRECOVERABLE_IF(srcAllocationStruct.alloc == nullptr);
appendEventForProfiling(signalEvent, true);
if (this->inOrderExecutionEnabled) {
handleInOrderImplicitDependencies(false);
}
appendEventForProfiling(signalEvent, true);
commandContainer.addToResidencyContainer(srcAllocationStruct.alloc);
uint64_t gpuAddress = static_cast<uint64_t>(srcAllocationStruct.alignedAllocationPtr);
NEO::EncodeSemaphore<GfxFamily>::addMiSemaphoreWaitCommand(*commandContainer.getCommandStream(), gpuAddress, data, comparator, false, false, false);
@@ -3160,6 +3234,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnMemory(void *desc,
appendSignalEventPostWalker(signalEvent);
if (this->inOrderExecutionEnabled) {
appendSignalInOrderDependencyCounter();
handleInOrderDependencyCounter();
}
return ZE_RESULT_SUCCESS;
}
@@ -3203,6 +3282,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWriteToMemory(void *desc
args);
}
if (this->inOrderExecutionEnabled) {
appendSignalInOrderDependencyCounter();
handleInOrderDependencyCounter();
}
return ZE_RESULT_SUCCESS;
}

View File

@@ -192,9 +192,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
ze_result_t hostSynchronize(uint64_t timeout, TaskCountType taskCount, bool handlePostWaitOperations);
bool hasStallingCmdsForRelaxedOrdering(uint32_t numWaitEvents, bool relaxedOrderingDispatch) const;
void setupFlushMethod(const NEO::RootDeviceEnvironment &rootDeviceEnvironment) override;
void handleInOrderDependencyCounter();
bool isSkippingInOrderBarrierAllowed(ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) const;
bool useCounterAllocationForInOrderMode() const override { return true; }
void allocateOrReuseKernelPrivateMemoryIfNeeded(Kernel *kernel, uint32_t sizePerHwThread) override;
MOCKABLE_VIRTUAL void checkAssert();

View File

@@ -883,10 +883,6 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediate(ze_res
auto signalEvent = Event::fromHandle(hSignalEvent);
if (inputRet == ZE_RESULT_SUCCESS) {
if (isInOrderExecutionEnabled()) {
handleInOrderDependencyCounter();
}
if (this->isFlushTaskSubmissionEnabled) {
if (signalEvent && (NEO::DebugManager.flags.TrackNumCsrClientsOnSyncPoints.get() != 0)) {
signalEvent->setLatestUsedCmdQueue(this->cmdQImmediate);
@@ -911,28 +907,6 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediate(ze_res
return inputRet;
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamilyImmediate<gfxCoreFamily>::handleInOrderDependencyCounter() {
if ((inOrderDependencyCounter + 1) == std::numeric_limits<uint32_t>::max()) {
CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(inOrderDependencyCounterAllocation, inOrderDependencyCounter + 1, inOrderAllocationOffset, false);
inOrderDependencyCounter = 0;
// multitile immediate writes are uint64_t aligned
uint32_t offset = this->partitionCount * static_cast<uint32_t>(sizeof(uint64_t));
inOrderAllocationOffset += offset;
UNRECOVERABLE_IF(inOrderAllocationOffset + offset >= inOrderDependencyCounterAllocation->getUnderlyingBufferSize());
CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter(); // write 1 on new offset
}
inOrderDependencyCounter++;
this->commandContainer.addToResidencyContainer(this->inOrderDependencyCounterAllocation);
}
template <GFXCORE_FAMILY gfxCoreFamily>
bool CommandListCoreFamilyImmediate<gfxCoreFamily>::preferCopyThroughLockedPtr(CpuMemCopyInfo &cpuMemCopyInfo, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) {
if (NEO::DebugManager.flags.ExperimentalForceCopyThroughLock.get() == 1) {

View File

@@ -296,7 +296,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
this->dcFlushSupport // dcFlushEnable
};
bool inOrderExecSignalRequired = (this->inOrderExecutionEnabled && !launchParams.isKernelSplitOperation && useCounterAllocationForInOrderMode());
bool inOrderExecSignalRequired = (this->inOrderExecutionEnabled && !launchParams.isKernelSplitOperation);
bool inOrderNonWalkerSignalling = event && (isTimestampEvent || !isInOrderExecEvent);
if (inOrderExecSignalRequired) {

View File

@@ -229,17 +229,15 @@ void CommandListImp::setStreamPropertiesDefaultSettings(NEO::StreamProperties &s
void CommandListImp::enableInOrderExecution() {
UNRECOVERABLE_IF(inOrderExecutionEnabled);
if (this->cmdListType == TYPE_IMMEDIATE) {
auto device = this->device->getNEODevice();
auto device = this->device->getNEODevice();
NEO::AllocationProperties allocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize64k, NEO::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, device->getDeviceBitfield()};
NEO::AllocationProperties allocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize64k, NEO::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, device->getDeviceBitfield()};
inOrderDependencyCounterAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties(allocationProperties);
inOrderDependencyCounterAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties(allocationProperties);
UNRECOVERABLE_IF(!inOrderDependencyCounterAllocation);
UNRECOVERABLE_IF(!inOrderDependencyCounterAllocation);
memset(inOrderDependencyCounterAllocation->getUnderlyingBuffer(), 0, inOrderDependencyCounterAllocation->getUnderlyingBufferSize());
}
memset(inOrderDependencyCounterAllocation->getUnderlyingBuffer(), 0, inOrderDependencyCounterAllocation->getUnderlyingBufferSize());
inOrderExecutionEnabled = true;
}

View File

@@ -1807,6 +1807,8 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderRegularCmdListWhenProgrammingNonKerne
uint8_t ptr[64] = {};
uint64_t inOrderSyncVa = regularCmdList->inOrderDependencyCounterAllocation->getGpuAddress();
regularCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
auto verifyPc = [](const GenCmdList::iterator &iterator) {
@@ -1815,9 +1817,21 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderRegularCmdListWhenProgrammingNonKerne
ASSERT_NE(nullptr, pcCmd);
};
auto verifySdi = [](GenCmdList::reverse_iterator rIterator) {
auto verifySdi = [&inOrderSyncVa](GenCmdList::reverse_iterator rIterator, GenCmdList::reverse_iterator rEnd, uint32_t signalValue) {
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*rIterator);
EXPECT_EQ(nullptr, sdiCmd);
while (sdiCmd == nullptr) {
sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*(++rIterator));
if (rIterator == rEnd) {
break;
}
}
ASSERT_NE(nullptr, sdiCmd);
EXPECT_EQ(inOrderSyncVa, sdiCmd->getAddress());
EXPECT_EQ(0u, sdiCmd->getStoreQword());
EXPECT_EQ(signalValue, sdiCmd->getDataDword0());
EXPECT_EQ(0u, sdiCmd->getDataDword1());
};
{
@@ -1831,7 +1845,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderRegularCmdListWhenProgrammingNonKerne
(cmdStream->getUsed() - offset)));
verifyPc(cmdList.begin());
verifySdi(cmdList.rbegin());
verifySdi(cmdList.rbegin(), cmdList.rend(), 2);
}
{
@@ -1847,7 +1861,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderRegularCmdListWhenProgrammingNonKerne
(cmdStream->getUsed() - offset)));
verifyPc(cmdList.begin());
verifySdi(cmdList.rbegin());
verifySdi(cmdList.rbegin(), cmdList.rend(), 3);
}
{
@@ -1861,7 +1875,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderRegularCmdListWhenProgrammingNonKerne
(cmdStream->getUsed() - offset)));
verifyPc(cmdList.begin());
verifySdi(cmdList.rbegin());
verifySdi(cmdList.rbegin(), cmdList.rend(), 4);
}
{
@@ -1877,7 +1891,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderRegularCmdListWhenProgrammingNonKerne
(cmdStream->getUsed() - offset)));
verifyPc(cmdList.begin());
verifySdi(cmdList.rbegin());
verifySdi(cmdList.rbegin(), cmdList.rend(), 5);
}
{
@@ -1893,7 +1907,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderRegularCmdListWhenProgrammingNonKerne
(cmdStream->getUsed() - offset)));
verifyPc(cmdList.begin());
verifySdi(cmdList.rbegin());
verifySdi(cmdList.rbegin(), cmdList.rend(), 6);
}
}
@@ -2275,7 +2289,17 @@ HWTEST2_F(InOrderCmdListTests, givenRegularInOrderCmdListWhenProgrammingAppendWa
ASSERT_NE(nullptr, pcCmd);
auto sdiItor = find<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(cmdList.end(), sdiItor);
EXPECT_NE(cmdList.end(), sdiItor);
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*sdiItor);
ASSERT_NE(nullptr, sdiCmd);
uint64_t syncVa = regularCmdList->inOrderDependencyCounterAllocation->getGpuAddress();
EXPECT_EQ(syncVa, sdiCmd->getAddress());
EXPECT_EQ(0u, sdiCmd->getStoreQword());
EXPECT_EQ(3u, sdiCmd->getDataDword0());
EXPECT_EQ(0u, sdiCmd->getDataDword1());
}
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingCounterWithOverflowThenHandleItCorrectly, IsAtLeastXeHpCore) {
@@ -3171,7 +3195,9 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL
auto walkerCmd = genCmdCast<COMPUTE_WALKER *>(*walkerItor);
auto &postSync = walkerCmd->getPostSync();
EXPECT_EQ(POSTSYNC_DATA::OPERATION_NO_WRITE, postSync.getOperation());
EXPECT_EQ(POSTSYNC_DATA::OPERATION_WRITE_IMMEDIATE_DATA, postSync.getOperation());
EXPECT_EQ(1u, postSync.getImmediateData());
EXPECT_EQ(regularCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), postSync.getDestinationAddress());
auto sdiItor = find<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(cmdList.end(), sdiItor);
@@ -3180,7 +3206,7 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL
offset = cmdStream->getUsed();
regularCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(1u, regularCmdList->inOrderDependencyCounter);
EXPECT_EQ(2u, regularCmdList->inOrderDependencyCounter);
{
GenCmdList cmdList;
@@ -3195,20 +3221,25 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL
auto walkerCmd = genCmdCast<COMPUTE_WALKER *>(*walkerItor);
auto &postSync = walkerCmd->getPostSync();
EXPECT_EQ(POSTSYNC_DATA::OPERATION_NO_WRITE, postSync.getOperation());
EXPECT_EQ(POSTSYNC_DATA::OPERATION_WRITE_IMMEDIATE_DATA, postSync.getOperation());
EXPECT_EQ(2u, postSync.getImmediateData());
EXPECT_EQ(regularCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), postSync.getDestinationAddress());
auto sdiItor = find<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(cmdList.end(), sdiItor);
}
regularCmdList->inOrderAllocationOffset = 123;
auto hostAddr = static_cast<uint32_t *>(regularCmdList->inOrderDependencyCounterAllocation->getUnderlyingBuffer());
*hostAddr = 0x1234;
regularCmdList->reset();
EXPECT_EQ(0u, regularCmdList->inOrderDependencyCounter);
EXPECT_EQ(0u, regularCmdList->inOrderAllocationOffset);
EXPECT_EQ(0u, *hostAddr);
}
HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdListThenDontUpdateCounterAllocation, IsAtLeastXeHpCore) {
HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdListThenUpdateCounterAllocation, IsAtLeastXeHpCore) {
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
auto eventPool = createEvents<FamilyType>(1, true);
@@ -3224,7 +3255,7 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL
size_t offset = cmdStream->getUsed();
EXPECT_EQ(0u, regularCmdList->inOrderDependencyCounter);
EXPECT_EQ(nullptr, regularCmdList->inOrderDependencyCounterAllocation);
EXPECT_NE(nullptr, regularCmdList->inOrderDependencyCounterAllocation);
constexpr size_t size = 128 * sizeof(uint32_t);
auto data = allocHostMem(size);
@@ -3245,8 +3276,8 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL
ptrOffset(cmdStream->getCpuBase(), offset),
(cmdStream->getUsed() - offset)));
auto sdiItor = find<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(cmdList.end(), sdiItor);
auto sdiCmds = findAll<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(2u, sdiCmds.size());
}
offset = copyOnlyCmdStream->getUsed();
@@ -3259,7 +3290,7 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL
(copyOnlyCmdStream->getUsed() - offset)));
auto sdiItor = find<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(cmdList.end(), sdiItor);
EXPECT_NE(cmdList.end(), sdiItor);
}
context->freeMem(data);
@@ -3288,7 +3319,18 @@ HWTEST2_F(InOrderRegularCopyOnlyCmdListTests, givenInOrderModeWhenDispatchingReg
(cmdStream->getUsed() - offset)));
auto sdiItor = find<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(cmdList.end(), sdiItor);
EXPECT_NE(cmdList.end(), sdiItor);
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*sdiItor);
ASSERT_NE(nullptr, sdiCmd);
auto gpuAddress = regularCmdList->inOrderDependencyCounterAllocation->getGpuAddress();
EXPECT_EQ(gpuAddress, sdiCmd->getAddress());
EXPECT_EQ(0u, sdiCmd->getStoreQword());
EXPECT_EQ(1u, sdiCmd->getDataDword0());
EXPECT_EQ(0u, sdiCmd->getDataDword1());
}
offset = cmdStream->getUsed();
@@ -3301,12 +3343,23 @@ HWTEST2_F(InOrderRegularCopyOnlyCmdListTests, givenInOrderModeWhenDispatchingReg
ptrOffset(cmdStream->getCpuBase(), offset),
(cmdStream->getUsed() - offset)));
auto sdiItor = find<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(cmdList.end(), sdiItor);
auto copyCmd = genCmdCast<XY_COPY_BLT *>(*cmdList.begin());
EXPECT_NE(nullptr, copyCmd);
auto sdiItor = find<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
EXPECT_NE(cmdList.end(), sdiItor);
auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(*sdiItor);
ASSERT_NE(nullptr, sdiCmd);
auto gpuAddress = regularCmdList->inOrderDependencyCounterAllocation->getGpuAddress();
EXPECT_EQ(gpuAddress, sdiCmd->getAddress());
EXPECT_EQ(0u, sdiCmd->getStoreQword());
EXPECT_EQ(2u, sdiCmd->getDataDword0());
EXPECT_EQ(0u, sdiCmd->getDataDword1());
}
alignedFree(alignedPtr);