feature: extend in order execution support on regular command lists

Related-To: NEO-7966

Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2023-08-08 14:49:34 +00:00
committed by Compute-Runtime-Automation
parent fc1f45b630
commit b09357470a
4 changed files with 72 additions and 10 deletions

View File

@@ -1484,7 +1484,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(void *d
addToMappedEventList(signalEvent);
addFlushRequiredCommand(dstAllocationStruct.needsFlush, signalEvent);
if (this->inOrderExecutionEnabled && isCopyOnly() && inOrderCopyOnlySignalingAllowed) {
if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode() && isCopyOnly() && inOrderCopyOnlySignalingAllowed) {
appendSignalInOrderDependencyCounter();
}
@@ -1901,7 +1901,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
appendEventForProfilingAllWalkers(signalEvent, false, singlePipeControlPacket);
addFlushRequiredCommand(hostPointerNeedsFlush, signalEvent);
if (this->inOrderExecutionEnabled && launchParams.isKernelSplitOperation) {
if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode() && launchParams.isKernelSplitOperation) {
if (!signalEvent) {
NEO::PipeControlArgs args;
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(*commandContainer.getCommandStream(), args);
@@ -1967,7 +1967,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBlitFill(void *ptr,
appendSignalEventPostWalker(signalEvent);
if (isInOrderExecutionEnabled()) {
if (isInOrderExecutionEnabled() && useCounterAllocationForInOrderMode()) {
appendSignalInOrderDependencyCounter();
}
}
@@ -2181,7 +2181,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(ze_event_han
bool appendPipeControlWithPostSync = (!isCopyOnly()) && (event->isSignalScope() || event->isEventTimestampFlagSet());
dispatchEventPostSyncOperation(event, Event::STATE_SIGNALED, false, false, appendPipeControlWithPostSync);
if (this->inOrderExecutionEnabled) {
if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode()) {
appendSignalInOrderDependencyCounter();
}
@@ -2201,6 +2201,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(NEO::Gr
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
UNRECOVERABLE_IF(waitValue > std::numeric_limits<uint32_t>::max());
UNRECOVERABLE_IF(!useCounterAllocationForInOrderMode());
commandContainer.addToResidencyContainer(dependencyCounterAllocation);
@@ -2313,6 +2314,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(uint32_t nu
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendSignalInOrderDependencyCounter() {
UNRECOVERABLE_IF(!useCounterAllocationForInOrderMode());
uint32_t signalValue = this->inOrderDependencyCounter + 1;
uint64_t gpuVa = this->inOrderDependencyCounterAllocation->getGpuAddress() + this->inOrderAllocationOffset;
@@ -2942,7 +2945,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBarrier(ze_event_handle_
appendEventForProfiling(signalEvent, true);
if (this->inOrderExecutionEnabled) {
if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode()) {
appendSignalInOrderDependencyCounter();
} else if (isCopyOnly()) {
NEO::MiFlushArgs args{this->dummyBlitWa};

View File

@@ -229,15 +229,17 @@ void CommandListImp::setStreamPropertiesDefaultSettings(NEO::StreamProperties &s
void CommandListImp::enableInOrderExecution() {
UNRECOVERABLE_IF(inOrderExecutionEnabled);
auto device = this->device->getNEODevice();
if (this->cmdListType == TYPE_IMMEDIATE) {
auto device = this->device->getNEODevice();
NEO::AllocationProperties allocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize64k, NEO::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, device->getDeviceBitfield()};
NEO::AllocationProperties allocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize64k, NEO::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, device->getDeviceBitfield()};
inOrderDependencyCounterAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties(allocationProperties);
inOrderDependencyCounterAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties(allocationProperties);
UNRECOVERABLE_IF(!inOrderDependencyCounterAllocation);
UNRECOVERABLE_IF(!inOrderDependencyCounterAllocation);
memset(inOrderDependencyCounterAllocation->getUnderlyingBuffer(), 0, inOrderDependencyCounterAllocation->getUnderlyingBufferSize());
memset(inOrderDependencyCounterAllocation->getUnderlyingBuffer(), 0, inOrderDependencyCounterAllocation->getUnderlyingBufferSize());
}
inOrderExecutionEnabled = true;
}

View File

@@ -77,6 +77,7 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
using BaseClass::initialize;
using BaseClass::inOrderAllocationOffset;
using BaseClass::inOrderDependencyCounter;
using BaseClass::inOrderDependencyCounterAllocation;
using BaseClass::isFlushTaskSubmissionEnabled;
using BaseClass::isRelaxedOrderingDispatchAllowed;
using BaseClass::isSyncModeQueue;

View File

@@ -2758,6 +2758,62 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL
EXPECT_EQ(0u, regularCmdList->inOrderAllocationOffset);
}
HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdListThenDontUpdateCounterAllocation, IsAtLeastXeHpCore) {
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
auto eventPool = createEvents<FamilyType>(1, true);
auto eventHandle = events[0]->toHandle();
auto regularCmdList = createRegularCmdList<gfxCoreFamily>(false);
auto regularCopyOnlyCmdList = createRegularCmdList<gfxCoreFamily>(true);
auto cmdStream = regularCmdList->getCmdContainer().getCommandStream();
auto copyOnlyCmdStream = regularCopyOnlyCmdList->getCmdContainer().getCommandStream();
size_t offset = cmdStream->getUsed();
EXPECT_EQ(0u, regularCmdList->inOrderDependencyCounter);
EXPECT_EQ(nullptr, regularCmdList->inOrderDependencyCounterAllocation);
constexpr size_t size = 128 * sizeof(uint32_t);
auto data = allocHostMem(size);
ze_copy_region_t region = {0, 0, 0, 1, 1, 1};
regularCmdList->appendMemoryCopyRegion(data, &region, 1, 1, data, &region, 1, 1, nullptr, 0, nullptr, false, false);
regularCmdList->appendMemoryFill(data, data, 1, size, nullptr, 0, nullptr, false);
regularCmdList->appendSignalEvent(eventHandle);
regularCmdList->appendBarrier(nullptr, 1, &eventHandle);
{
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
ptrOffset(cmdStream->getCpuBase(), offset),
(cmdStream->getUsed() - offset)));
auto sdiItor = find<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(cmdList.end(), sdiItor);
}
offset = copyOnlyCmdStream->getUsed();
regularCopyOnlyCmdList->appendMemoryFill(data, data, 1, size, nullptr, 0, nullptr, false);
{
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
ptrOffset(copyOnlyCmdStream->getCpuBase(), offset),
(copyOnlyCmdStream->getUsed() - offset)));
auto sdiItor = find<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(cmdList.end(), sdiItor);
}
context->freeMem(data);
}
using InOrderRegularCopyOnlyCmdListTests = InOrderCmdListTests;
HWTEST2_F(InOrderRegularCopyOnlyCmdListTests, givenInOrderModeWhenDispatchingRegularCmdListThenDontProgramBarriers, IsAtLeastXeHpCore) {