RelaxedOrdering: Improve dependencies tracking

Avoid not needed scheduler programming
Related-To: NEO-7458

Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2022-11-26 20:10:32 +00:00
committed by Compute-Runtime-Automation
parent ad6237478f
commit 3f962bf3e8
23 changed files with 734 additions and 161 deletions

View File

@@ -124,12 +124,12 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
uint32_t numWaitEvents,
ze_event_handle_t *waitEventHandles) override;
MOCKABLE_VIRTUAL ze_result_t executeCommandListImmediateWithFlushTask(bool performMigration, bool hasStallingCmds);
MOCKABLE_VIRTUAL ze_result_t executeCommandListImmediateWithFlushTask(bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies);
void checkAvailableSpace();
void updateDispatchFlagsWithRequiredStreamState(NEO::DispatchFlags &dispatchFlags);
ze_result_t flushImmediate(ze_result_t inputRet, bool performMigration, bool hasStallingCmds, ze_event_handle_t hSignalEvent);
ze_result_t flushImmediate(ze_result_t inputRet, bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, ze_event_handle_t hSignalEvent);
void createLogicalStateHelper() override {}
NEO::LogicalStateHelper *getLogicalStateHelper() const override;

View File

@@ -69,7 +69,7 @@ void CommandListCoreFamilyImmediate<gfxCoreFamily>::updateDispatchFlagsWithRequi
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommandListImmediateWithFlushTask(bool performMigration, bool hasStallingCmds) {
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommandListImmediateWithFlushTask(bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies) {
NEO::DispatchFlags dispatchFlags(
{}, // csrDependencies
nullptr, // barrierTimestampPacketNodes
@@ -100,7 +100,8 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommandListImm
this->device->getNEODevice()->getNumGenericSubDevices() > 1, // areMultipleSubDevicesInContext
false, // memoryMigrationRequired
false, // textureCacheFlush
hasStallingCmds // hasStallingCmds
hasStallingCmds, // hasStallingCmds
hasRelaxedOrderingDependencies // hasRelaxedOrderingDependencies
);
this->updateDispatchFlagsWithRequiredStreamState(dispatchFlags);
@@ -260,7 +261,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchKernel(
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(kernelHandle, threadGroupDimensions,
hSignalEvent, numWaitEvents, phWaitEvents,
launchParams);
return flushImmediate(ret, true, false, hSignalEvent);
return flushImmediate(ret, true, false, (numWaitEvents > 0), hSignalEvent);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -273,7 +274,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchKernelInd
}
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(kernelHandle, pDispatchArgumentsBuffer,
hSignalEvent, numWaitEvents, phWaitEvents);
return flushImmediate(ret, true, false, hSignalEvent);
return flushImmediate(ret, true, false, (numWaitEvents > 0), hSignalEvent);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -289,7 +290,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendBarrier(
ret = CommandListCoreFamily<gfxCoreFamily>::appendBarrier(hSignalEvent, numWaitEvents, phWaitEvents);
this->dependenciesPresent = true;
return flushImmediate(ret, true, true, hSignalEvent);
return flushImmediate(ret, true, true, (numWaitEvents > 0), hSignalEvent);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -323,7 +324,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopy(
ret = CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(dstptr, srcptr, size, hSignalEvent,
numWaitEvents, phWaitEvents);
}
return flushImmediate(ret, true, false, hSignalEvent);
return flushImmediate(ret, true, false, (numWaitEvents > 0), hSignalEvent);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -366,7 +367,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopyRegio
hSignalEvent, numWaitEvents, phWaitEvents);
}
return flushImmediate(ret, true, false, hSignalEvent);
return flushImmediate(ret, true, false, (numWaitEvents > 0), hSignalEvent);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -381,7 +382,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryFill(void
}
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(ptr, pattern, patternSize, size, hSignalEvent, numWaitEvents, phWaitEvents);
return flushImmediate(ret, true, false, hSignalEvent);
return flushImmediate(ret, true, false, (numWaitEvents > 0), hSignalEvent);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -393,7 +394,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendSignalEvent(ze_
checkAvailableSpace();
}
ret = CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(hSignalEvent);
return flushImmediate(ret, true, true, hSignalEvent);
return flushImmediate(ret, true, true, false, hSignalEvent);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -405,7 +406,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendEventReset(ze_e
checkAvailableSpace();
}
ret = CommandListCoreFamily<gfxCoreFamily>::appendEventReset(hSignalEvent);
return flushImmediate(ret, true, true, hSignalEvent);
return flushImmediate(ret, true, true, false, hSignalEvent);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -431,7 +432,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendPageFaultCopy(N
} else {
ret = CommandListCoreFamily<gfxCoreFamily>::appendPageFaultCopy(dstAllocation, srcAllocation, size, flushHost);
}
return flushImmediate(ret, false, false, nullptr);
return flushImmediate(ret, false, false, false, nullptr);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -448,7 +449,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendWaitOnEvents(ui
}
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(numEvents, phWaitEvents);
this->dependenciesPresent = true;
return flushImmediate(ret, true, true, nullptr);
return flushImmediate(ret, true, true, (numEvents > 0), nullptr);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -461,7 +462,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendWriteGlobalTime
}
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(dstptr, hSignalEvent, numWaitEvents, phWaitEvents);
return flushImmediate(ret, true, true, hSignalEvent);
return flushImmediate(ret, true, true, (numWaitEvents > 0), hSignalEvent);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -497,7 +498,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyRegion
}
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendImageCopyRegion(hDstImage, hSrcImage, pDstRegion, pSrcRegion, hSignalEvent,
numWaitEvents, phWaitEvents);
return flushImmediate(ret, true, false, hSignalEvent);
return flushImmediate(ret, true, false, (numWaitEvents > 0), hSignalEvent);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -515,7 +516,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyFromMe
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemory(hDstImage, srcPtr, pDstRegion, hSignalEvent,
numWaitEvents, phWaitEvents);
return flushImmediate(ret, true, false, hSignalEvent);
return flushImmediate(ret, true, false, (numWaitEvents > 0), hSignalEvent);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -533,7 +534,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyToMemo
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemory(dstPtr, hSrcImage, pSrcRegion, hSignalEvent,
numWaitEvents, phWaitEvents);
return flushImmediate(ret, true, false, hSignalEvent);
return flushImmediate(ret, true, false, (numWaitEvents > 0), hSignalEvent);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -547,7 +548,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryRangesBar
checkAvailableSpace();
}
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendMemoryRangesBarrier(numRanges, pRangeSizes, pRanges, hSignalEvent, numWaitEvents, phWaitEvents);
return flushImmediate(ret, true, true, hSignalEvent);
return flushImmediate(ret, true, true, (numWaitEvents > 0), hSignalEvent);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -560,14 +561,15 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchCooperati
checkAvailableSpace();
}
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchCooperativeKernel(kernelHandle, launchKernelArgs, hSignalEvent, numWaitEvents, waitEventHandles);
return flushImmediate(ret, true, false, hSignalEvent);
return flushImmediate(ret, true, false, (numWaitEvents > 0), hSignalEvent);
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediate(ze_result_t inputRet, bool performMigration, bool hasStallingCmds, ze_event_handle_t hSignalEvent) {
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediate(ze_result_t inputRet, bool performMigration, bool hasStallingCmds,
bool hasRelaxedOrderingDependencies, ze_event_handle_t hSignalEvent) {
if (inputRet == ZE_RESULT_SUCCESS) {
if (this->isFlushTaskSubmissionEnabled) {
inputRet = executeCommandListImmediateWithFlushTask(performMigration, hasStallingCmds);
inputRet = executeCommandListImmediateWithFlushTask(performMigration, hasStallingCmds, hasRelaxedOrderingDependencies);
} else {
inputRet = executeCommandListImmediate(performMigration);
}

View File

@@ -99,7 +99,7 @@ NEO::SubmissionStatus CommandQueueImp::submitBatchBuffer(size_t offset, NEO::Res
NEO::BatchBuffer batchBuffer(commandStream.getGraphicsAllocation(), offset, 0, 0, nullptr, false, false,
NEO::QueueThrottle::HIGH, NEO::QueueSliceCount::defaultSliceCount,
commandStream.getUsed(), &commandStream, endingCmdPtr, isCooperative, false);
commandStream.getUsed(), &commandStream, endingCmdPtr, isCooperative, false, false);
commandStream.getGraphicsAllocation()->updateTaskCount(csr->peekTaskCount() + 1, csr->getOsContext().getContextId());
commandStream.getGraphicsAllocation()->updateResidencyTaskCount(csr->peekTaskCount() + 1, csr->getOsContext().getContextId());

View File

@@ -515,7 +515,7 @@ class MockCommandListImmediateHw : public WhiteBox<::L0::CommandListCoreFamilyIm
return executeCommandListImmediateReturnValue;
}
ze_result_t executeCommandListImmediateWithFlushTask(bool performMigration, bool hasStallingCmds) override {
ze_result_t executeCommandListImmediateWithFlushTask(bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies) override {
++executeCommandListImmediateWithFlushTaskCalledCount;
return executeCommandListImmediateWithFlushTaskReturnValue;
}

View File

@@ -1068,6 +1068,118 @@ HWTEST2_F(CommandListCreate, givenDirectSubmissionAndImmCmdListWhenDispatchingTh
driverHandle->releaseImportedPointer(dstPtr);
}
HWTEST2_F(CommandListCreate, givenDirectSubmissionAndImmCmdListWhenDispatchingThenPassRelaxedOrderingDependenciesInfo, IsAtLeastXeHpcCore) {
ze_command_queue_desc_t desc = {};
desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue));
ASSERT_NE(nullptr, commandList);
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.count = 1;
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
ze_event_desc_t eventDesc = {};
eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST;
ze_event_handle_t event = nullptr;
std::unique_ptr<L0::EventPool> eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
ASSERT_EQ(ZE_RESULT_SUCCESS, eventPool->createEvent(&eventDesc, &event));
std::unique_ptr<L0::Event> eventObject(L0::Event::fromHandle(event));
Mock<::L0::Kernel> kernel;
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
uint8_t srcPtr[64] = {};
uint8_t dstPtr[64] = {};
const ze_copy_region_t region = {0U, 0U, 0U, 1, 1, 0U};
driverHandle->importExternalPointer(dstPtr, MemoryConstants::pageSize);
auto ultCsr = static_cast<NEO::UltCommandStreamReceiver<FamilyType> *>(commandList->csr);
ultCsr->recordFlusheBatchBuffer = true;
auto verifyFlags = [&ultCsr](ze_result_t result, bool dispatchFlag, bool bbFlag) {
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(ultCsr->recordedDispatchFlags.hasRelaxedOrderingDependencies, dispatchFlag);
EXPECT_EQ(ultCsr->latestFlushedBatchBuffer.hasRelaxedOrderingDependencies, bbFlag);
};
for (bool hasEventDependencies : {true, false}) {
ze_event_handle_t *waitlist = hasEventDependencies ? &event : nullptr;
uint32_t numWaitlistEvents = hasEventDependencies ? 1 : 0;
verifyFlags(commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, numWaitlistEvents, waitlist, launchParams),
hasEventDependencies, hasEventDependencies);
verifyFlags(commandList->appendLaunchKernelIndirect(kernel.toHandle(), &groupCount, nullptr, numWaitlistEvents, waitlist),
hasEventDependencies, hasEventDependencies);
verifyFlags(commandList->appendBarrier(nullptr, numWaitlistEvents, waitlist),
hasEventDependencies, hasEventDependencies);
verifyFlags(commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, numWaitlistEvents, waitlist),
hasEventDependencies, hasEventDependencies);
verifyFlags(commandList->appendMemoryCopyRegion(dstPtr, &region, 0, 0, srcPtr, &region, 0, 0, nullptr, numWaitlistEvents, waitlist),
hasEventDependencies, hasEventDependencies);
verifyFlags(commandList->appendMemoryFill(dstPtr, srcPtr, 8, 1, nullptr, numWaitlistEvents, waitlist),
hasEventDependencies, hasEventDependencies);
verifyFlags(commandList->appendEventReset(event), false, false);
verifyFlags(commandList->appendSignalEvent(event), false, false);
verifyFlags(commandList->appendPageFaultCopy(kernel.getIsaAllocation(), kernel.getIsaAllocation(), 1, false),
false, false);
verifyFlags(commandList->appendWaitOnEvents(1, &event), true, true);
verifyFlags(commandList->appendWriteGlobalTimestamp(reinterpret_cast<uint64_t *>(dstPtr), nullptr, numWaitlistEvents, waitlist),
hasEventDependencies, hasEventDependencies);
if constexpr (FamilyType::supportsSampler) {
auto kernel = device->getBuiltinFunctionsLib()->getImageFunction(ImageBuiltin::CopyImageRegion);
auto mockBuiltinKernel = static_cast<Mock<::L0::Kernel> *>(kernel);
mockBuiltinKernel->setArgRedescribedImageCallBase = false;
auto image = std::make_unique<WhiteBox<::L0::ImageCoreFamily<gfxCoreFamily>>>();
ze_image_region_t imgRegion = {1, 1, 1, 1, 1, 1};
ze_image_desc_t zeDesc = {};
zeDesc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC;
image->initialize(device, &zeDesc);
verifyFlags(commandList->appendImageCopyRegion(image->toHandle(), image->toHandle(), &imgRegion, &imgRegion, nullptr, numWaitlistEvents, waitlist),
hasEventDependencies, hasEventDependencies);
verifyFlags(commandList->appendImageCopyFromMemory(image->toHandle(), dstPtr, &imgRegion, nullptr, numWaitlistEvents, waitlist),
hasEventDependencies, hasEventDependencies);
verifyFlags(commandList->appendImageCopyToMemory(dstPtr, image->toHandle(), &imgRegion, nullptr, numWaitlistEvents, waitlist),
hasEventDependencies, hasEventDependencies);
}
size_t rangeSizes = 1;
const void **ranges = reinterpret_cast<const void **>(&dstPtr[0]);
verifyFlags(commandList->appendMemoryRangesBarrier(1, &rangeSizes, ranges, nullptr, numWaitlistEvents, waitlist),
hasEventDependencies, hasEventDependencies);
}
for (bool hasEventDependencies : {true, false}) {
ze_event_handle_t *waitlist = hasEventDependencies ? &event : nullptr;
uint32_t numWaitlistEvents = hasEventDependencies ? 1 : 0;
verifyFlags(commandList->appendLaunchCooperativeKernel(kernel.toHandle(), &groupCount, nullptr, numWaitlistEvents, waitlist),
hasEventDependencies, hasEventDependencies);
}
driverHandle->releaseImportedPointer(dstPtr);
}
TEST_F(CommandListCreate, GivenGpuHangWhenCreatingImmCmdListWithSyncModeAndAppendBarrierThenAppendBarrierReturnsDeviceLost) {
DebugManagerStateRestore restorer;
DebugManager.flags.EnableFlushTaskSubmission.set(1);

View File

@@ -73,7 +73,7 @@ HWTEST2_F(CommandListExecuteImmediate, whenExecutingCommandListImmediateWithFlus
commandListImmediate.requiredStreamState.stateComputeMode.isCoherencyRequired.value = 1;
commandListImmediate.requiredStreamState.stateComputeMode.largeGrfMode.value = 1;
commandListImmediate.requiredStreamState.stateComputeMode.threadArbitrationPolicy.value = NEO::ThreadArbitrationPolicy::RoundRobin;
commandListImmediate.executeCommandListImmediateWithFlushTask(false, false);
commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false);
NEO::StateComputeModePropertiesSupport scmPropertiesSupport = {};
hwInfoConfig.fillScmPropertiesSupportStructure(scmPropertiesSupport);
@@ -102,7 +102,7 @@ HWTEST2_F(CommandListExecuteImmediate, whenExecutingCommandListImmediateWithFlus
commandListImmediate.requiredStreamState.stateComputeMode.isCoherencyRequired.value = 0;
commandListImmediate.requiredStreamState.stateComputeMode.largeGrfMode.value = 0;
commandListImmediate.requiredStreamState.stateComputeMode.threadArbitrationPolicy.value = NEO::ThreadArbitrationPolicy::AgeBased;
commandListImmediate.executeCommandListImmediateWithFlushTask(false, false);
commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false);
expectedLargeGrfMode = scmPropertiesSupport.largeGrfMode ? 0 : -1;
expectedIsCoherencyRequired = scmPropertiesSupport.coherencyRequired ? 0 : -1;
@@ -128,7 +128,7 @@ HWTEST2_F(CommandListExecuteImmediate, whenExecutingCommandListImmediateWithFlus
auto &commandListImmediate = static_cast<MockCommandListImmediate<gfxCoreFamily> &>(*commandList);
commandListImmediate.containsAnyKernel = true;
commandListImmediate.executeCommandListImmediateWithFlushTask(false, false);
commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false);
EXPECT_FALSE(commandListImmediate.containsAnyKernel);
}
@@ -139,7 +139,7 @@ HWTEST2_F(CommandListExecuteImmediate, whenExecutingCommandListImmediateWithFlus
commandList.reset(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue));
auto &commandListImmediate = static_cast<MockCommandListImmediate<gfxCoreFamily> &>(*commandList);
EXPECT_EQ(ZE_RESULT_SUCCESS, commandListImmediate.executeCommandListImmediateWithFlushTask(false, false));
EXPECT_EQ(ZE_RESULT_SUCCESS, commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false));
}
HWTEST2_F(CommandListExecuteImmediate, givenOutOfHostMemoryErrorOnFlushWhenExecutingCommandListImmediateWithFlushTaskThenProperErrorIsReturned, IsAtLeastSkl) {
@@ -151,7 +151,7 @@ HWTEST2_F(CommandListExecuteImmediate, givenOutOfHostMemoryErrorOnFlushWhenExecu
auto &commandStreamReceiver = neoDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.flushReturnValue = SubmissionStatus::OUT_OF_HOST_MEMORY;
EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY, commandListImmediate.executeCommandListImmediateWithFlushTask(false, false));
EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY, commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false));
}
HWTEST2_F(CommandListExecuteImmediate, givenOutOfDeviceMemoryErrorOnFlushWhenExecutingCommandListImmediateWithFlushTaskThenProperErrorIsReturned, IsAtLeastSkl) {
@@ -163,7 +163,7 @@ HWTEST2_F(CommandListExecuteImmediate, givenOutOfDeviceMemoryErrorOnFlushWhenExe
auto &commandStreamReceiver = neoDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.flushReturnValue = SubmissionStatus::OUT_OF_MEMORY;
EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY, commandListImmediate.executeCommandListImmediateWithFlushTask(false, false));
EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY, commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false));
}
using CommandListTest = Test<DeviceFixture>;
@@ -351,7 +351,7 @@ HWTEST2_F(CommandListTest, givenImmediateCommandListWhenFlushImmediateThenOverri
MockCommandStreamReceiver mockCommandStreamReceiver(*neoDevice->executionEnvironment, neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield());
cmdList.csr = event->csr;
event->csr = &mockCommandStreamReceiver;
cmdList.flushImmediate(ZE_RESULT_SUCCESS, false, false, event->toHandle());
cmdList.flushImmediate(ZE_RESULT_SUCCESS, false, false, false, event->toHandle());
EXPECT_EQ(event->csr, cmdList.csr);
}
@@ -772,7 +772,7 @@ HWTEST2_F(CommandListTest, givenCmdListWithIndirectAccessWhenExecutingCommandLis
auto oldCommandQueue = commandList->cmdQImmediate;
commandList->cmdQImmediate = &mockCommandQueue;
commandListImmediate.indirectAllocationsAllowed = true;
commandListImmediate.executeCommandListImmediateWithFlushTask(false, false);
commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false);
EXPECT_EQ(mockCommandQueue.handleIndirectAllocationResidencyCalledTimes, 1u);
commandList->cmdQImmediate = oldCommandQueue;
}
@@ -791,7 +791,7 @@ HWTEST2_F(CommandListTest, givenCmdListWithNoIndirectAccessWhenExecutingCommandL
auto oldCommandQueue = commandList->cmdQImmediate;
commandList->cmdQImmediate = &mockCommandQueue;
commandListImmediate.indirectAllocationsAllowed = false;
commandListImmediate.executeCommandListImmediateWithFlushTask(false, false);
commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false);
EXPECT_EQ(mockCommandQueue.handleIndirectAllocationResidencyCalledTimes, 0u);
commandList->cmdQImmediate = oldCommandQueue;
}

View File

@@ -804,7 +804,8 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
kernel->areMultipleSubDevicesInContext(), // areMultipleSubDevicesInContext
kernel->requiresMemoryMigration(), // memoryMigrationRequired
isTextureCacheFlushNeeded(commandType), // textureCacheFlush
false); // hasStallingCmds
false, // hasStallingCmds
false); // hasRelaxedOrderingDependencies
dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = mediaSamplerRequired;
dispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode = systolicPipelineSelectMode;
@@ -1050,7 +1051,8 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
context->containsMultipleSubDevices(rootDeviceIndex), // areMultipleSubDevicesInContext
false, // memoryMigrationRequired
false, // textureCacheFlush
false); // hasStallingCmds
false, // hasStallingCmds
false); // hasRelaxedOrderingDependencies
const bool isHandlingBarrier = getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired();

View File

@@ -80,7 +80,8 @@ CompletionStamp &CommandMapUnmap::submit(uint32_t taskLevel, bool terminated) {
false, // areMultipleSubDevicesInContext
false, // memoryMigrationRequired
false, // textureCacheFlush
false); // hasStallingCmds
false, // hasStallingCmds
false); // hasRelaxedOrderingDependencies
DEBUG_BREAK_IF(taskLevel >= CompletionStamp::notReady);
@@ -210,7 +211,8 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
kernel->areMultipleSubDevicesInContext(), // areMultipleSubDevicesInContext
kernel->requiresMemoryMigration(), // memoryMigrationRequired
commandQueue.isTextureCacheFlushNeeded(this->commandType), // textureCacheFlush
false); // hasStallingCmds
false, // hasStallingCmds
false); // hasRelaxedOrderingDependencies
if (commandQueue.getContext().getRootDeviceIndices().size() > 1) {
eventsRequest.fillCsrDependenciesForTaskCountContainer(dispatchFlags.csrDependencies, commandStreamReceiver);
@@ -385,7 +387,8 @@ CompletionStamp &CommandWithoutKernel::submit(uint32_t taskLevel, bool terminate
commandQueue.getContext().containsMultipleSubDevices(rootDeviceIndex), // areMultipleSubDevicesInContext
false, // memoryMigrationRequired
false, // textureCacheFlush
false); // hasStallingCmds
false, // hasStallingCmds
false); // hasRelaxedOrderingDependencies
if (commandQueue.getContext().getRootDeviceIndices().size() > 1) {
eventsRequest.fillCsrDependenciesForTaskCountContainer(dispatchFlags.csrDependencies, commandStreamReceiver);

View File

@@ -615,7 +615,8 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
auto &streamToSubmit = submitCommandStreamFromCsr ? commandStreamCSR : commandStreamTask;
BatchBuffer batchBuffer{streamToSubmit.getGraphicsAllocation(), startOffset, chainedBatchBufferStartOffset, taskStartAddress, chainedBatchBuffer,
dispatchFlags.requiresCoherency, dispatchFlags.lowPriority, dispatchFlags.throttle, dispatchFlags.sliceCount,
streamToSubmit.getUsed(), &streamToSubmit, bbEndLocation, dispatchFlags.useSingleSubdevice, (submitCSR || dispatchFlags.hasStallingCmds)};
streamToSubmit.getUsed(), &streamToSubmit, bbEndLocation, dispatchFlags.useSingleSubdevice, (submitCSR || dispatchFlags.hasStallingCmds),
dispatchFlags.hasRelaxedOrderingDependencies};
streamToSubmit.getGraphicsAllocation()->updateTaskCount(this->taskCount + 1, this->osContext->getContextId());
streamToSubmit.getGraphicsAllocation()->updateResidencyTaskCount(this->taskCount + 1, this->osContext->getContextId());
@@ -1178,7 +1179,7 @@ uint32_t CommandStreamReceiverHw<GfxFamily>::flushBcsTask(const BlitPropertiesCo
uint64_t taskStartAddress = commandStream.getGpuBase() + commandStreamStart;
BatchBuffer batchBuffer{commandStream.getGraphicsAllocation(), commandStreamStart, 0, taskStartAddress, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount,
commandStream.getUsed(), &commandStream, endingCmdPtr, false, false};
commandStream.getUsed(), &commandStream, endingCmdPtr, false, false, false};
commandStream.getGraphicsAllocation()->updateTaskCount(newTaskCount, this->osContext->getContextId());
commandStream.getGraphicsAllocation()->updateResidencyTaskCount(newTaskCount, this->osContext->getContextId());
@@ -1290,7 +1291,7 @@ SubmissionStatus CommandStreamReceiverHw<GfxFamily>::flushSmallTask(LinearStream
BatchBuffer batchBuffer{commandStreamTask.getGraphicsAllocation(), commandStreamStartTask, 0, taskStartAddress,
nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount,
commandStreamTask.getUsed(), &commandStreamTask, endingCmdPtr, false, true};
commandStreamTask.getUsed(), &commandStreamTask, endingCmdPtr, false, true, false};
this->latestSentTaskCount = taskCount + 1;
auto submissionStatus = flushHandler(batchBuffer, getResidencyAllocations());

View File

@@ -57,7 +57,7 @@ struct DispatchFlags {
uint64_t sliceCountP, bool blockingP, bool dcFlushP, bool useSLMP, bool guardCommandBufferWithPipeControlP, bool gsba32BitRequiredP,
bool requiresCoherencyP, bool lowPriorityP, bool implicitFlushP, bool outOfOrderExecutionAllowedP, bool epilogueRequiredP,
bool usePerDSSbackedBufferP, bool useSingleSubdeviceP, bool useGlobalAtomicsP, bool areMultipleSubDevicesInContextP, bool memoryMigrationRequiredP, bool textureCacheFlush,
bool hasStallingCmds) : csrDependencies(csrDependenciesP),
bool hasStallingCmds, bool hasRelaxedOrderingDependencies) : csrDependencies(csrDependenciesP),
barrierTimestampPacketNodes(barrierTimestampPacketNodesP),
pipelineSelectArgs(pipelineSelectArgsP),
flushStampReference(flushStampReferenceP),
@@ -86,7 +86,8 @@ struct DispatchFlags {
areMultipleSubDevicesInContext(areMultipleSubDevicesInContextP),
memoryMigrationRequired(memoryMigrationRequiredP),
textureCacheFlush(textureCacheFlush),
hasStallingCmds(hasStallingCmds){};
hasStallingCmds(hasStallingCmds),
hasRelaxedOrderingDependencies(hasRelaxedOrderingDependencies){};
CsrDependencies csrDependencies;
TimestampPacketContainer *barrierTimestampPacketNodes = nullptr;
@@ -119,6 +120,7 @@ struct DispatchFlags {
bool memoryMigrationRequired = false;
bool textureCacheFlush = false;
bool hasStallingCmds = false;
bool hasRelaxedOrderingDependencies = false;
bool disableEUFusion = false;
};

View File

@@ -100,14 +100,15 @@ void NEO::SubmissionAggregator::aggregateCommandBuffers(ResourcePackage &resourc
NEO::BatchBuffer::BatchBuffer(GraphicsAllocation *commandBufferAllocation, size_t startOffset,
size_t chainedBatchBufferStartOffset, uint64_t taskStartAddress, GraphicsAllocation *chainedBatchBuffer,
bool requiresCoherency, bool lowPriority,
QueueThrottle throttle, uint64_t sliceCount,
size_t usedSize, LinearStream *stream, void *endCmdPtr, bool useSingleSubdevice, bool hasStallingCmds)
bool requiresCoherency, bool lowPriority, QueueThrottle throttle, uint64_t sliceCount,
size_t usedSize, LinearStream *stream, void *endCmdPtr, bool useSingleSubdevice, bool hasStallingCmds,
bool hasRelaxedOrderingDependencies)
: commandBufferAllocation(commandBufferAllocation), startOffset(startOffset),
chainedBatchBufferStartOffset(chainedBatchBufferStartOffset), taskStartAddress(taskStartAddress), chainedBatchBuffer(chainedBatchBuffer),
requiresCoherency(requiresCoherency), low_priority(lowPriority),
throttle(throttle), sliceCount(sliceCount),
usedSize(usedSize), stream(stream), endCmdPtr(endCmdPtr), useSingleSubdevice(useSingleSubdevice), hasStallingCmds(hasStallingCmds) {}
usedSize(usedSize), stream(stream), endCmdPtr(endCmdPtr), useSingleSubdevice(useSingleSubdevice), hasStallingCmds(hasStallingCmds),
hasRelaxedOrderingDependencies(hasRelaxedOrderingDependencies) {}
NEO::CommandBuffer::CommandBuffer(Device &device) : device(device) {
flushStamp.reset(new FlushStampTracker(false));

View File

@@ -32,7 +32,8 @@ struct BatchBuffer {
LinearStream *stream,
void *endCmdPtr,
bool useSingleSubdevice,
bool hasStallingCmds);
bool hasStallingCmds,
bool hasRelaxedOrderingDependencies);
BatchBuffer() {}
GraphicsAllocation *commandBufferAllocation = nullptr;
size_t startOffset = 0u;
@@ -52,6 +53,7 @@ struct BatchBuffer {
bool useSingleSubdevice = false;
bool hasStallingCmds = false;
bool hasRelaxedOrderingDependencies = false;
bool ringBufferRestartRequest = false;
};

View File

@@ -114,12 +114,12 @@ class DirectSubmissionHw {
void cpuCachelineFlush(void *ptr, size_t size);
void dispatchSemaphoreSection(uint32_t value, bool firstSubmission);
size_t getSizeSemaphoreSection(bool firstSubmission);
void dispatchSemaphoreSection(uint32_t value);
size_t getSizeSemaphoreSection(bool relaxedOrderingSchedulerRequired);
void dispatchRelaxedOrderingSchedulerSection(uint32_t value);
MOCKABLE_VIRTUAL void dispatchRelaxedOrderingSchedulerSection(uint32_t value);
void dispatchRelaxedOrderingReturnPtrRegs(LinearStream &cmdStream, uint64_t returnPtr);
void dispatchRelaxedOrderingReturnPtrRegs(LinearStream &cmdStream, uint64_t returnPtr, bool hasRelaxedOrderingDependencies);
void dispatchStartSection(uint64_t gpuStartAddress);
size_t getSizeStartSection();
@@ -127,10 +127,10 @@ class DirectSubmissionHw {
void dispatchSwitchRingBufferSection(uint64_t nextBufferGpuAddress);
size_t getSizeSwitchRingBufferSection();
void dispatchRelaxedOrderingQueueStall();
MOCKABLE_VIRTUAL void dispatchRelaxedOrderingQueueStall();
size_t getSizeDispatchRelaxedOrderingQueueStall();
void dispatchTaskStoreSection(uint64_t taskStartSectionVa);
MOCKABLE_VIRTUAL void dispatchTaskStoreSection(uint64_t taskStartSectionVa);
MOCKABLE_VIRTUAL void preinitializeRelaxedOrderingSections();
void initRelaxedOrderingRegisters();
@@ -138,7 +138,7 @@ class DirectSubmissionHw {
void setReturnAddress(void *returnCmd, uint64_t returnAddress);
void *dispatchWorkloadSection(BatchBuffer &batchBuffer);
size_t getSizeDispatch();
size_t getSizeDispatch(bool relaxedOrderingSchedulerRequired);
void dispatchPrefetchMitigation();
size_t getSizePrefetchMitigation();
@@ -148,7 +148,7 @@ class DirectSubmissionHw {
MOCKABLE_VIRTUAL void dispatchStaticRelaxedOrderingScheduler();
size_t getSizeEnd();
size_t getSizeEnd(bool relaxedOrderingSchedulerRequired);
void dispatchPartitionRegisterConfiguration();
size_t getSizePartitionRegisterConfigurationSection();
@@ -226,6 +226,6 @@ class DirectSubmissionHw {
bool dcFlushRequired = false;
bool relaxedOrderingEnabled = false;
bool relaxedOrderingInitialized = false;
bool firstSubmissionAfterRingStart = true;
bool relaxedOrderingSchedulerRequired = false;
};
} // namespace NEO

View File

@@ -403,7 +403,7 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::initialize(bool submitOnInit, bo
initDiagnostic(submitOnInit);
if (ret && submitOnInit) {
size_t startBufferSize = Dispatcher::getSizePreemption() +
getSizeSemaphoreSection(true);
getSizeSemaphoreSection(false);
Dispatcher::dispatchPreemption(ringCommandStream);
if (this->partitionedMode) {
@@ -431,7 +431,7 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::initialize(bool submitOnInit, bo
dispatchDiagnosticModeSection();
startBufferSize += getDiagnosticModeSection();
}
dispatchSemaphoreSection(currentQueueWorkCount, true);
dispatchSemaphoreSection(currentQueueWorkCount);
ringStart = submit(ringCommandStream.getGraphicsAllocation()->getGpuAddress(), startBufferSize);
performDiagnosticMode();
@@ -446,7 +446,7 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::startRingBuffer() {
return true;
}
size_t startSize = getSizeSemaphoreSection(true);
size_t startSize = getSizeSemaphoreSection(false);
if (!this->partitionConfigSet) {
startSize += getSizePartitionRegisterConfigurationSection();
}
@@ -457,7 +457,7 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::startRingBuffer() {
startSize += RelaxedOrderingHelper::getSizeRegistersInit<GfxFamily>();
}
size_t requiredSize = startSize + getSizeDispatch() + getSizeEnd();
size_t requiredSize = startSize + getSizeDispatch(false) + getSizeEnd(false);
if (ringCommandStream.getAvailableSpace() < requiredSize) {
switchRingBuffers();
}
@@ -482,12 +482,10 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::startRingBuffer() {
}
currentQueueWorkCount++;
dispatchSemaphoreSection(currentQueueWorkCount, true);
dispatchSemaphoreSection(currentQueueWorkCount);
ringStart = submit(gpuStartVa, startSize);
firstSubmissionAfterRingStart = true;
return ringStart;
}
@@ -497,7 +495,8 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::stopRingBuffer() {
return true;
}
if (this->relaxedOrderingEnabled && !firstSubmissionAfterRingStart) {
bool relaxedOrderingSchedulerWasRequired = this->relaxedOrderingSchedulerRequired;
if (this->relaxedOrderingEnabled && this->relaxedOrderingSchedulerRequired) {
dispatchRelaxedOrderingQueueStall();
}
@@ -515,7 +514,7 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::stopRingBuffer() {
EncodeNoop<GfxFamily>::emitNoop(ringCommandStream, bytesToPad);
EncodeNoop<GfxFamily>::alignToCacheLine(ringCommandStream);
cpuCachelineFlush(flushPtr, getSizeEnd());
cpuCachelineFlush(flushPtr, getSizeEnd(relaxedOrderingSchedulerWasRequired));
this->unblockGpu();
cpuCachelineFlush(semaphorePtr, MemoryConstants::cacheLineSize);
@@ -526,13 +525,13 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::stopRingBuffer() {
}
template <typename GfxFamily, typename Dispatcher>
inline void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchSemaphoreSection(uint32_t value, bool firstSubmission) {
inline void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchSemaphoreSection(uint32_t value) {
using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT;
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
dispatchDisablePrefetcher(true);
if (this->relaxedOrderingEnabled && !firstSubmission) {
if (this->relaxedOrderingEnabled && this->relaxedOrderingSchedulerRequired) {
dispatchRelaxedOrderingSchedulerSection(value);
} else {
EncodeSempahore<GfxFamily>::addMiSemaphoreWaitCommand(ringCommandStream,
@@ -550,8 +549,8 @@ inline void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchSemaphoreSection(
}
template <typename GfxFamily, typename Dispatcher>
inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeSemaphoreSection(bool firstSubmission) {
size_t semaphoreSize = (this->relaxedOrderingEnabled && !firstSubmission) ? RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::totalSize
inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeSemaphoreSection(bool relaxedOrderingSchedulerRequired) {
size_t semaphoreSize = (this->relaxedOrderingEnabled && relaxedOrderingSchedulerRequired) ? RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::totalSize
: EncodeSempahore<GfxFamily>::getSizeMiSemaphoreWait();
semaphoreSize += getSizePrefetchMitigation();
@@ -597,7 +596,7 @@ inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeSwitchRingBuffer
}
template <typename GfxFamily, typename Dispatcher>
inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeEnd() {
inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeEnd(bool relaxedOrderingSchedulerRequired) {
size_t size = Dispatcher::getSizeStopCommandBuffer() +
Dispatcher::getSizeCacheFlush(*hwInfo) +
(Dispatcher::getSizeStartCommandBuffer() - Dispatcher::getSizeStopCommandBuffer()) +
@@ -605,15 +604,15 @@ inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeEnd() {
if (disableMonitorFence) {
size += Dispatcher::getSizeMonitorFence(*hwInfo);
}
if (this->relaxedOrderingEnabled) {
if (this->relaxedOrderingEnabled && relaxedOrderingSchedulerRequired) {
size += getSizeDispatchRelaxedOrderingQueueStall();
}
return size;
}
template <typename GfxFamily, typename Dispatcher>
inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeDispatch() {
size_t size = getSizeSemaphoreSection(false);
inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeDispatch(bool relaxedOrderingSchedulerRequired) {
size_t size = getSizeSemaphoreSection(relaxedOrderingSchedulerRequired);
if (workloadMode == 0) {
size += getSizeStartSection();
if (this->relaxedOrderingEnabled) {
@@ -673,7 +672,7 @@ void *DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchWorkloadSection(BatchBu
uint64_t returnGpuPointer = ringCommandStream.getCurrentGpuAddressPosition();
if (this->relaxedOrderingEnabled) {
dispatchRelaxedOrderingReturnPtrRegs(relaxedOrderingReturnPtrCmdStream, returnGpuPointer);
dispatchRelaxedOrderingReturnPtrRegs(relaxedOrderingReturnPtrCmdStream, returnGpuPointer, batchBuffer.hasRelaxedOrderingDependencies);
} else {
setReturnAddress(returnCmd, returnGpuPointer);
}
@@ -683,7 +682,7 @@ void *DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchWorkloadSection(BatchBu
}
// mode 2 does not dispatch any commands
if (this->relaxedOrderingEnabled) {
if (this->relaxedOrderingEnabled && batchBuffer.hasRelaxedOrderingDependencies) {
dispatchTaskStoreSection(batchBuffer.taskStartAddress);
}
@@ -698,7 +697,7 @@ void *DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchWorkloadSection(BatchBu
this->useNotifyForPostSync, this->partitionedMode, this->dcFlushRequired);
}
dispatchSemaphoreSection(currentQueueWorkCount + 1, false);
dispatchSemaphoreSection(currentQueueWorkCount + 1);
return currentPosition;
}
@@ -708,25 +707,31 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchRelaxedOrderingQueueStal
EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataRegBatchBufferStart());
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R5, 1, true);
dispatchSemaphoreSection(currentQueueWorkCount, false);
dispatchSemaphoreSection(currentQueueWorkCount);
// patch conditional bb_start with current GPU address
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(bbStartStream, ringCommandStream.getCurrentGpuAddressPosition(),
CS_GPR_R1, 0, CompareOperation::Equal, false);
relaxedOrderingSchedulerRequired = false;
}
template <typename GfxFamily, typename Dispatcher>
size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeDispatchRelaxedOrderingQueueStall() {
return getSizeSemaphoreSection(false) + sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM) +
return getSizeSemaphoreSection(true) + sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM) +
EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataRegBatchBufferStart();
}
template <typename GfxFamily, typename Dispatcher>
void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchRelaxedOrderingReturnPtrRegs(LinearStream &cmdStream, uint64_t returnPtr) {
void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchRelaxedOrderingReturnPtrRegs(LinearStream &cmdStream, uint64_t returnPtr, bool hasRelaxedOrderingDependencies) {
LriHelper<GfxFamily>::program(&cmdStream, CS_GPR_R4, static_cast<uint32_t>(returnPtr & 0xFFFF'FFFFULL), true);
LriHelper<GfxFamily>::program(&cmdStream, CS_GPR_R4 + 4, static_cast<uint32_t>(returnPtr >> 32), true);
uint64_t returnPtrAfterTaskStoreSection = returnPtr + RelaxedOrderingHelper::getSizeTaskStoreSection<GfxFamily>();
uint64_t returnPtrAfterTaskStoreSection = returnPtr;
if (hasRelaxedOrderingDependencies) {
returnPtrAfterTaskStoreSection += RelaxedOrderingHelper::getSizeTaskStoreSection<GfxFamily>();
}
LriHelper<GfxFamily>::program(&cmdStream, CS_GPR_R3, static_cast<uint32_t>(returnPtrAfterTaskStoreSection & 0xFFFF'FFFFULL), true);
LriHelper<GfxFamily>::program(&cmdStream, CS_GPR_R3 + 4, static_cast<uint32_t>(returnPtrAfterTaskStoreSection >> 32), true);
@@ -846,24 +851,32 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchCommandBuffer(BatchBuffe
this->startRingBuffer();
size_t dispatchSize = getSizeDispatch();
bool relaxedOrderingSchedulerWillBeNeeded = (this->relaxedOrderingSchedulerRequired || batchBuffer.hasRelaxedOrderingDependencies);
size_t dispatchSize = getSizeDispatch(relaxedOrderingSchedulerWillBeNeeded);
size_t cycleSize = getSizeSwitchRingBufferSection();
size_t requiredMinimalSize = dispatchSize + cycleSize + getSizeEnd();
size_t requiredMinimalSize = dispatchSize + cycleSize + getSizeEnd(relaxedOrderingSchedulerWillBeNeeded);
if (this->relaxedOrderingEnabled) {
if (batchBuffer.hasStallingCmds && !firstSubmissionAfterRingStart) {
requiredMinimalSize += +RelaxedOrderingHelper::getSizeReturnPtrRegs<GfxFamily>();
if (batchBuffer.hasStallingCmds && this->relaxedOrderingSchedulerRequired) {
requiredMinimalSize += getSizeDispatchRelaxedOrderingQueueStall();
}
requiredMinimalSize += RelaxedOrderingHelper::getSizeTaskStoreSection<GfxFamily>() + RelaxedOrderingHelper::getSizeReturnPtrRegs<GfxFamily>();
if (batchBuffer.hasRelaxedOrderingDependencies) {
requiredMinimalSize += RelaxedOrderingHelper::getSizeTaskStoreSection<GfxFamily>();
}
}
if (ringCommandStream.getAvailableSpace() < requiredMinimalSize) {
switchRingBuffers();
}
if (this->relaxedOrderingEnabled && batchBuffer.hasStallingCmds && !firstSubmissionAfterRingStart) {
if (this->relaxedOrderingEnabled && batchBuffer.hasStallingCmds && this->relaxedOrderingSchedulerRequired) {
dispatchRelaxedOrderingQueueStall();
}
this->relaxedOrderingSchedulerRequired |= batchBuffer.hasRelaxedOrderingDependencies;
handleNewResourcesSubmission();
void *currentPosition = dispatchWorkloadSection(batchBuffer);
@@ -890,8 +903,6 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchCommandBuffer(BatchBuffe
uint64_t flushValue = updateTagValue();
flushStamp.setStamp(flushValue);
firstSubmissionAfterRingStart = false;
return ringStart;
}

View File

@@ -26,7 +26,8 @@ struct BatchBufferHelper {
stream, // stream
nullptr, // endCmdPtr
false, // useSingleSubdevice
false // hasStallingCmds
false, // hasStallingCmds
false // hasRelaxedOrderingDependencies
);
}

View File

@@ -43,7 +43,8 @@ struct DispatchFlagsHelper {
false, // areMultipleSubDevicesInContext
false, // memoryMigrationRequired
false, // textureCacheFlush
false // hasStallingCmds
false, // hasStallingCmds
false // hasRelaxedOrderingDependencies
);
}
};

View File

@@ -32,6 +32,7 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw<GfxFamily, Dispatcher>
using BaseClass::dispatchDisablePrefetcher;
using BaseClass::dispatchPartitionRegisterConfiguration;
using BaseClass::dispatchPrefetchMitigation;
using BaseClass::dispatchRelaxedOrderingReturnPtrRegs;
using BaseClass::dispatchSemaphoreSection;
using BaseClass::dispatchStartSection;
using BaseClass::dispatchSwitchRingBufferSection;
@@ -39,6 +40,7 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw<GfxFamily, Dispatcher>
using BaseClass::getDiagnosticModeSection;
using BaseClass::getSizeDisablePrefetcher;
using BaseClass::getSizeDispatch;
using BaseClass::getSizeDispatchRelaxedOrderingQueueStall;
using BaseClass::getSizeEnd;
using BaseClass::getSizePartitionRegisterConfigurationSection;
using BaseClass::getSizePrefetchMitigation;
@@ -47,6 +49,7 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw<GfxFamily, Dispatcher>
using BaseClass::getSizeSwitchRingBufferSection;
using BaseClass::getSizeSystemMemoryFenceAddress;
using BaseClass::hwInfo;
using BaseClass::isDisablePrefetcherRequired;
using BaseClass::miMemFenceRequired;
using BaseClass::osContext;
using BaseClass::partitionConfigSet;
@@ -57,6 +60,7 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw<GfxFamily, Dispatcher>
using BaseClass::preinitializedTaskStoreSection;
using BaseClass::relaxedOrderingInitialized;
using BaseClass::relaxedOrderingSchedulerAllocation;
using BaseClass::relaxedOrderingSchedulerRequired;
using BaseClass::reserved;
using BaseClass::ringBuffers;
using BaseClass::ringCommandStream;
@@ -98,6 +102,21 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw<GfxFamily, Dispatcher>
BaseClass::dispatchStaticRelaxedOrderingScheduler();
}
void dispatchRelaxedOrderingSchedulerSection(uint32_t value) override {
dispatchRelaxedOrderingSchedulerSectionCalled++;
BaseClass::dispatchRelaxedOrderingSchedulerSection(value);
}
void dispatchRelaxedOrderingQueueStall() override {
dispatchRelaxedOrderingQueueStallCalled++;
BaseClass::dispatchRelaxedOrderingQueueStall();
}
void dispatchTaskStoreSection(uint64_t taskStartSectionVa) override {
dispatchTaskStoreSectionCalled++;
BaseClass::dispatchTaskStoreSection(taskStartSectionVa);
}
bool makeResourcesResident(DirectSubmissionAllocations &allocations) override {
makeResourcesResidentVectorSize = static_cast<uint32_t>(allocations.size());
if (callBaseResident) {
@@ -155,6 +174,9 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw<GfxFamily, Dispatcher>
uint32_t disabledDiagnosticCalled = 0u;
uint32_t preinitializeRelaxedOrderingSectionsCalled = 0;
uint32_t dispatchStaticRelaxedOrderingSchedulerCalled = 0;
uint32_t dispatchRelaxedOrderingSchedulerSectionCalled = 0;
uint32_t dispatchRelaxedOrderingQueueStallCalled = 0;
uint32_t dispatchTaskStoreSectionCalled = 0;
uint32_t makeResourcesResidentVectorSize = 0u;
bool allocateOsResourcesReturn = true;
bool submitReturn = true;

View File

@@ -592,7 +592,9 @@ HWTEST_F(AubFileStreamTests, givenAubCommandStreamReceiverWhenFlushIsCalledThenI
aubCsr.initializeTagAllocation();
LinearStream cs(commandBuffer);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 1, 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false, false};
BatchBuffer batchBuffer = BatchBufferHelper::createDefaultBatchBuffer(cs.getGraphicsAllocation(), &cs, cs.getUsed());
batchBuffer.startOffset = 1;
ResidencyContainer allocationsForResidency;
aubCsr.flush(batchBuffer, allocationsForResidency);

View File

@@ -95,6 +95,6 @@ struct ComputeModeRequirements : public ::testing::Test {
CommandStreamReceiver *csr = nullptr;
std::unique_ptr<MockDevice> device;
DispatchFlags flags{{}, nullptr, {}, nullptr, QueueThrottle::MEDIUM, PreemptionMode::Disabled, GrfConfig::DefaultGrfNumber, L3CachingSettings::l3CacheOn, ThreadArbitrationPolicy::NotPresent, AdditionalKernelExecInfo::NotApplicable, KernelExecutionType::NotApplicable, MemoryCompressionState::NotApplicable, QueueSliceCount::defaultSliceCount, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false};
DispatchFlags flags{{}, nullptr, {}, nullptr, QueueThrottle::MEDIUM, PreemptionMode::Disabled, GrfConfig::DefaultGrfNumber, L3CachingSettings::l3CacheOn, ThreadArbitrationPolicy::NotPresent, AdditionalKernelExecInfo::NotApplicable, KernelExecutionType::NotApplicable, MemoryCompressionState::NotApplicable, QueueSliceCount::defaultSliceCount, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false};
GraphicsAllocation *alloc = nullptr;
};

View File

@@ -467,7 +467,9 @@ HWTEST_F(TbxCommandStreamTests, givenTbxCommandStreamReceiverWhenFlushIsCalledTh
auto commandBuffer = pDevice->executionEnvironment->memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize, pDevice->getDeviceBitfield()});
LinearStream cs(commandBuffer);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 1, 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false, false};
BatchBuffer batchBuffer = BatchBufferHelper::createDefaultBatchBuffer(cs.getGraphicsAllocation(), &cs, cs.getUsed());
batchBuffer.startOffset = 1;
MockGraphicsAllocation allocation(reinterpret_cast<void *>(0x1000), 0x1000);
ResidencyContainer allocationsForResidency = {&allocation};
@@ -493,7 +495,8 @@ HWTEST_F(TbxCommandStreamTests, givenTbxCommandStreamReceiverInBatchedModeWhenFl
auto commandBuffer = pDevice->executionEnvironment->memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize, pDevice->getDeviceBitfield()});
LinearStream cs(commandBuffer);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 1, 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false, false};
BatchBuffer batchBuffer = BatchBufferHelper::createDefaultBatchBuffer(cs.getGraphicsAllocation(), &cs, cs.getUsed());
batchBuffer.startOffset = 1;
ResidencyContainer allocationsForResidency;
tbxCsr.flush(batchBuffer, allocationsForResidency);

View File

@@ -380,7 +380,7 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionStopWhenStopRingIsCalledThen
directSubmission.stopRingBuffer();
size_t expectedDispatchSize = alreadyDispatchedSize + directSubmission.getSizeEnd();
size_t expectedDispatchSize = alreadyDispatchedSize + directSubmission.getSizeEnd(false);
EXPECT_LE(directSubmission.ringCommandStream.getUsed(), expectedDispatchSize);
EXPECT_GE(directSubmission.ringCommandStream.getUsed() + MemoryConstants::cacheLineSize, expectedDispatchSize);
EXPECT_EQ(oldQueueCount + 1, directSubmission.semaphoreData->QueueWorkCount);
@@ -393,7 +393,7 @@ HWTEST_F(DirectSubmissionTest,
using Dispatcher = RenderDispatcher<FamilyType>;
MockDirectSubmissionHw<FamilyType, Dispatcher> regularDirectSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
size_t regularSizeEnd = regularDirectSubmission.getSizeEnd();
size_t regularSizeEnd = regularDirectSubmission.getSizeEnd(false);
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
@@ -405,7 +405,7 @@ HWTEST_F(DirectSubmissionTest,
size_t tagUpdateSize = Dispatcher::getSizeMonitorFence(*directSubmission.hwInfo);
size_t disabledSizeEnd = directSubmission.getSizeEnd();
size_t disabledSizeEnd = directSubmission.getSizeEnd(false);
EXPECT_EQ(disabledSizeEnd, regularSizeEnd + tagUpdateSize);
directSubmission.tagValueSetValue = 0x4343123ull;
@@ -441,7 +441,7 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWhenDispatchSemaphoreThenExp
bool ret = directSubmission.initialize(false, false);
EXPECT_TRUE(ret);
directSubmission.dispatchSemaphoreSection(1u, false);
directSubmission.dispatchSemaphoreSection(1u);
EXPECT_EQ(directSubmission.getSizeSemaphoreSection(false), directSubmission.ringCommandStream.getUsed());
}
@@ -510,7 +510,7 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWhenGetDispatchSizeThenExpec
Dispatcher::getSizeCacheFlush(*directSubmission.hwInfo) +
Dispatcher::getSizeMonitorFence(*directSubmission.hwInfo) +
directSubmission.getSizeSemaphoreSection(false);
size_t actualSize = directSubmission.getSizeDispatch();
size_t actualSize = directSubmission.getSizeDispatch(false);
EXPECT_EQ(expectedSize, actualSize);
}
@@ -526,7 +526,7 @@ HWTEST_F(DirectSubmissionTest,
Dispatcher::getSizeCacheFlush(*directSubmission.hwInfo) +
Dispatcher::getSizeMonitorFence(*directSubmission.hwInfo) +
directSubmission.getSizeSemaphoreSection(false);
size_t actualSize = directSubmission.getSizeDispatch();
size_t actualSize = directSubmission.getSizeDispatch(false);
EXPECT_EQ(expectedSize, actualSize);
}
@@ -541,7 +541,7 @@ HWTEST_F(DirectSubmissionTest,
size_t expectedSize = Dispatcher::getSizeCacheFlush(*directSubmission.hwInfo) +
Dispatcher::getSizeMonitorFence(*directSubmission.hwInfo) +
directSubmission.getSizeSemaphoreSection(false);
size_t actualSize = directSubmission.getSizeDispatch();
size_t actualSize = directSubmission.getSizeDispatch(false);
EXPECT_EQ(expectedSize, actualSize);
}
@@ -554,7 +554,7 @@ HWTEST_F(DirectSubmissionTest,
size_t expectedSize = directSubmission.getSizeStartSection() +
Dispatcher::getSizeMonitorFence(*directSubmission.hwInfo) +
directSubmission.getSizeSemaphoreSection(false);
size_t actualSize = directSubmission.getSizeDispatch();
size_t actualSize = directSubmission.getSizeDispatch(false);
EXPECT_EQ(expectedSize, actualSize);
}
@@ -568,7 +568,7 @@ HWTEST_F(DirectSubmissionTest,
size_t expectedSize = directSubmission.getSizeStartSection() +
Dispatcher::getSizeCacheFlush(*directSubmission.hwInfo) +
directSubmission.getSizeSemaphoreSection(false);
size_t actualSize = directSubmission.getSizeDispatch();
size_t actualSize = directSubmission.getSizeDispatch(false);
EXPECT_EQ(expectedSize, actualSize);
}
@@ -581,7 +581,7 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWhenGetEndSizeThenExpectCorr
Dispatcher::getSizeCacheFlush(*directSubmission.hwInfo) +
(Dispatcher::getSizeStartCommandBuffer() - Dispatcher::getSizeStopCommandBuffer()) +
MemoryConstants::cacheLineSize;
size_t actualSize = directSubmission.getSizeEnd();
size_t actualSize = directSubmission.getSizeEnd(false);
EXPECT_EQ(expectedSize, actualSize);
}
@@ -872,7 +872,7 @@ HWTEST_F(DirectSubmissionTest,
size_t expectedSize = Dispatcher::getSizePreemption() +
directSubmission.getSizeSemaphoreSection(false) +
directSubmission.getDiagnosticModeSection();
expectedSize += expectedExecCount * directSubmission.getSizeDispatch();
expectedSize += expectedExecCount * directSubmission.getSizeDispatch(false);
if (directSubmission.miMemFenceRequired) {
expectedSize += directSubmission.getSizeSystemMemoryFenceAddress();
@@ -969,7 +969,7 @@ HWTEST_F(DirectSubmissionTest,
size_t expectedSize = Dispatcher::getSizePreemption() +
directSubmission.getSizeSemaphoreSection(false);
size_t expectedDispatch = directSubmission.getSizeSemaphoreSection(false);
EXPECT_EQ(expectedDispatch, directSubmission.getSizeDispatch());
EXPECT_EQ(expectedDispatch, directSubmission.getSizeDispatch(false));
expectedSize += expectedExecCount * expectedDispatch;
if (directSubmission.miMemFenceRequired) {

View File

@@ -224,7 +224,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DirectSubmissionDispatchBufferTest,
EXPECT_EQ(1u, directSubmission.submitCount);
EXPECT_EQ(2u, directSubmission.handleResidencyCount);
EXPECT_EQ(directSubmission.getSizeDispatch(), directSubmission.ringCommandStream.getUsed());
EXPECT_EQ(directSubmission.getSizeDispatch(false), directSubmission.ringCommandStream.getUsed());
EXPECT_TRUE(directSubmission.ringStart);
HardwareParse hwParse;
@@ -257,7 +257,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
DebugManager.flags.DirectSubmissionDisableCacheFlush.set(0);
MockDirectSubmissionHw<FamilyType, Dispatcher> regularDirectSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
size_t regularSizeDispatch = regularDirectSubmission.getSizeDispatch();
size_t regularSizeDispatch = regularDirectSubmission.getSizeDispatch(false);
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
directSubmission.disableMonitorFence = true;
@@ -267,7 +267,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
size_t tagUpdateSize = Dispatcher::getSizeMonitorFence(*directSubmission.hwInfo);
size_t disabledSizeDispatch = directSubmission.getSizeDispatch();
size_t disabledSizeDispatch = directSubmission.getSizeDispatch(false);
EXPECT_EQ(disabledSizeDispatch, (regularSizeDispatch - tagUpdateSize));
directSubmission.tagValueSetValue = 0x4343123ull;
@@ -306,7 +306,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
DebugManager.flags.DirectSubmissionDisableCacheFlush.set(0);
MockDirectSubmissionHw<FamilyType, Dispatcher> regularDirectSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
size_t regularSizeDispatch = regularDirectSubmission.getSizeDispatch();
size_t regularSizeDispatch = regularDirectSubmission.getSizeDispatch(false);
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
@@ -316,7 +316,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
size_t flushSize = Dispatcher::getSizeCacheFlush(*directSubmission.hwInfo);
size_t disabledSizeDispatch = directSubmission.getSizeDispatch();
size_t disabledSizeDispatch = directSubmission.getSizeDispatch(false);
EXPECT_EQ(disabledSizeDispatch, (regularSizeDispatch - flushSize));
directSubmission.dispatchWorkloadSection(batchBuffer);
@@ -354,7 +354,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
MockDirectSubmissionHw<FamilyType, Dispatcher> regularDirectSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
size_t regularSizeDispatch = regularDirectSubmission.getSizeDispatch();
size_t regularSizeDispatch = regularDirectSubmission.getSizeDispatch(false);
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
@@ -365,7 +365,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
size_t startSize = directSubmission.getSizeStartSection();
size_t storeDataSize = Dispatcher::getSizeStoreDwordCommand();
size_t debugSizeDispatch = directSubmission.getSizeDispatch();
size_t debugSizeDispatch = directSubmission.getSizeDispatch(false);
EXPECT_EQ(debugSizeDispatch, (regularSizeDispatch - startSize + storeDataSize));
directSubmission.workloadModeOneExpectedValue = 0x40u;
@@ -398,7 +398,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> regularDirectSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
size_t regularSizeDispatch = regularDirectSubmission.getSizeDispatch();
size_t regularSizeDispatch = regularDirectSubmission.getSizeDispatch(false);
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
@@ -408,7 +408,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
size_t startSize = directSubmission.getSizeStartSection();
size_t debugSizeDispatch = directSubmission.getSizeDispatch();
size_t debugSizeDispatch = directSubmission.getSizeDispatch(false);
EXPECT_EQ(debugSizeDispatch, (regularSizeDispatch - startSize));
directSubmission.currentQueueWorkCount = 0x40u;
@@ -463,7 +463,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
EXPECT_EQ(1u, directSubmission.submitCount);
EXPECT_EQ(2u, directSubmission.handleResidencyCount);
EXPECT_EQ(sizeUsed + directSubmission.getSizeDispatch(), directSubmission.ringCommandStream.getUsed());
EXPECT_EQ(sizeUsed + directSubmission.getSizeDispatch(false), directSubmission.ringCommandStream.getUsed());
EXPECT_TRUE(directSubmission.ringStart);
HardwareParse hwParse;
@@ -502,7 +502,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
EXPECT_EQ(oldRingAllocation->getGpuAddress(), directSubmission.submitGpuAddress);
EXPECT_EQ(2u, directSubmission.handleResidencyCount);
size_t dispatchSize = submitSize + directSubmission.getSizeDispatch();
size_t dispatchSize = submitSize + directSubmission.getSizeDispatch(false);
EXPECT_EQ(dispatchSize, directSubmission.ringCommandStream.getUsed());
EXPECT_TRUE(directSubmission.ringStart);
@@ -542,7 +542,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
EXPECT_EQ(1u, directSubmission.submitCount);
EXPECT_EQ(2u, directSubmission.handleResidencyCount);
EXPECT_EQ(directSubmission.getSizeDispatch(), directSubmission.ringCommandStream.getUsed());
EXPECT_EQ(directSubmission.getSizeDispatch(false), directSubmission.ringCommandStream.getUsed());
EXPECT_TRUE(directSubmission.ringStart);
}
@@ -576,7 +576,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
EXPECT_EQ(submitSize, directSubmission.submitSize);
EXPECT_EQ(2u, directSubmission.handleResidencyCount);
size_t dispatchSize = submitSize + directSubmission.getSizeDispatch();
size_t dispatchSize = submitSize + directSubmission.getSizeDispatch(false);
EXPECT_EQ(dispatchSize, directSubmission.ringCommandStream.getUsed());
EXPECT_TRUE(directSubmission.ringStart);
@@ -949,6 +949,7 @@ struct DirectSubmissionRelaxedOrderingTests : public DirectSubmissionDispatchBuf
bool verifyBbStart(typename FamilyType::MI_BATCH_BUFFER_START *cmd, uint64_t startAddress, bool indirect, bool predicate);
DebugManagerStateRestore restore;
FlushStampTracker flushStamp{true};
};
template <typename FamilyType>
@@ -1668,7 +1669,6 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, whenInitializingThenDispatchStat
directSubmission.startRingBuffer();
EXPECT_EQ(1u, directSubmission.dispatchStaticRelaxedOrderingSchedulerCalled);
FlushStampTracker flushStamp(true);
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
EXPECT_EQ(1u, directSubmission.dispatchStaticRelaxedOrderingSchedulerCalled);
}
@@ -1773,7 +1773,7 @@ HWTEST_F(DirectSubmissionRelaxedOrderingTests, whenDispatchingWorkThenDispatchTa
directSubmission.initialize(true, false);
auto offset = directSubmission.ringCommandStream.getUsed() + directSubmission.getSizeStartSection() + RelaxedOrderingHelper::getSizeReturnPtrRegs<FamilyType>();
FlushStampTracker flushStamp(true);
batchBuffer.hasRelaxedOrderingDependencies = true;
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
auto taskStoreSection = ptrOffset(directSubmission.ringCommandStream.getCpuBase(), offset);
@@ -1840,13 +1840,12 @@ HWTEST_F(DirectSubmissionRelaxedOrderingTests, givenNotEnoughSpaceForTaskStoreSe
directSubmission.ringCommandStream.getUsed();
auto sizeToConsume = directSubmission.ringCommandStream.getAvailableSpace() -
(directSubmission.getSizeDispatch() + directSubmission.getSizeEnd() + directSubmission.getSizeSwitchRingBufferSection());
(directSubmission.getSizeDispatch(false) + directSubmission.getSizeEnd(false) + directSubmission.getSizeSwitchRingBufferSection());
directSubmission.ringCommandStream.getSpace(sizeToConsume);
auto oldAllocation = directSubmission.ringCommandStream.getGraphicsAllocation();
FlushStampTracker flushStamp(true);
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
EXPECT_NE(oldAllocation, directSubmission.ringCommandStream.getGraphicsAllocation());
@@ -1867,7 +1866,7 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, whenDispatchingWorkThenDispatchS
EXPECT_FALSE(verifyDynamicSchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, 0, endOffset));
FlushStampTracker flushStamp(true);
batchBuffer.hasRelaxedOrderingDependencies = true;
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
EXPECT_TRUE(verifyDynamicSchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, offset, endOffset));
@@ -1896,8 +1895,8 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenBbWithStallingCmdsWhenDispa
EXPECT_FALSE(verifyDynamicSchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, 0, endOffset));
FlushStampTracker flushStamp(true);
batchBuffer.hasStallingCmds = false;
batchBuffer.hasRelaxedOrderingDependencies = true;
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
EXPECT_TRUE(verifyDynamicSchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, offset, endOffset));
@@ -1909,7 +1908,7 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenBbWithStallingCmdsWhenDispa
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
auto startAddress = ptrOffset(directSubmission.ringCommandStream.getCpuBase(), offset);
auto jumpOffset = directSubmission.getSizeSemaphoreSection(false) + sizeof(typename FamilyType::MI_LOAD_REGISTER_IMM) +
auto jumpOffset = directSubmission.getSizeSemaphoreSection(true) + sizeof(typename FamilyType::MI_LOAD_REGISTER_IMM) +
EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart();
uint64_t expectedJumpAddress = directSubmission.ringCommandStream.getGpuBase() + offset + jumpOffset;
@@ -1958,7 +1957,6 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenFirstBbWithStallingCmdsWhen
EXPECT_FALSE(verifyDynamicSchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, 0, endOffset));
FlushStampTracker flushStamp(true);
batchBuffer.hasStallingCmds = true;
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
@@ -1999,8 +1997,8 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, whenStoppingRingThenProgramSched
EXPECT_FALSE(verifyDynamicSchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, 0, endOffset));
FlushStampTracker flushStamp(true);
batchBuffer.hasStallingCmds = false;
batchBuffer.hasRelaxedOrderingDependencies = true;
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
EXPECT_TRUE(verifyDynamicSchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, offset, endOffset));
@@ -2011,7 +2009,7 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, whenStoppingRingThenProgramSched
directSubmission.stopRingBuffer();
auto startAddress = ptrOffset(directSubmission.ringCommandStream.getCpuBase(), offset);
auto jumpOffset = directSubmission.getSizeSemaphoreSection(false) + sizeof(typename FamilyType::MI_LOAD_REGISTER_IMM) +
auto jumpOffset = directSubmission.getSizeSemaphoreSection(true) + sizeof(typename FamilyType::MI_LOAD_REGISTER_IMM) +
EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart();
uint64_t expectedJumpAddress = directSubmission.ringCommandStream.getGpuBase() + offset + jumpOffset;
@@ -2122,6 +2120,7 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, whenDispatchingWorkloadSectionTh
auto originalBbStart = *reinterpret_cast<MI_BATCH_BUFFER_START *>(batchBuffer.endCmdPtr);
batchBuffer.hasRelaxedOrderingDependencies = true;
directSubmission.dispatchWorkloadSection(batchBuffer);
uint64_t returnPtr = directSubmission.ringCommandStream.getGpuBase() + offset + (4 * sizeof(MI_LOAD_REGISTER_IMM)) + directSubmission.getSizeStartSection();
@@ -2136,3 +2135,411 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, whenDispatchingWorkloadSectionTh
EXPECT_EQ(0, memcmp(&originalBbStart, batchBuffer.endCmdPtr, sizeof(MI_BATCH_BUFFER_START)));
}
HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenBbWithStallingCmdsAndDependenciesWhenDispatchingNextCmdBufferThenProgramSchedulerIfNeeded, IsAtLeastXeHpcCore) {
{
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
directSubmission.initialize(true, false);
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
EXPECT_EQ(0u, directSubmission.dispatchTaskStoreSectionCalled);
batchBuffer.hasStallingCmds = true;
batchBuffer.hasRelaxedOrderingDependencies = true;
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
EXPECT_EQ(1u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
EXPECT_EQ(1u, directSubmission.dispatchTaskStoreSectionCalled);
batchBuffer.hasStallingCmds = false;
batchBuffer.hasRelaxedOrderingDependencies = true;
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
EXPECT_EQ(2u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
EXPECT_EQ(2u, directSubmission.dispatchTaskStoreSectionCalled);
}
{
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
directSubmission.initialize(true, false);
batchBuffer.hasStallingCmds = true;
batchBuffer.hasRelaxedOrderingDependencies = true;
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
batchBuffer.hasStallingCmds = true;
batchBuffer.hasRelaxedOrderingDependencies = true;
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
EXPECT_EQ(3u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
EXPECT_EQ(1u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
EXPECT_EQ(2u, directSubmission.dispatchTaskStoreSectionCalled);
}
{
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
directSubmission.initialize(true, false);
batchBuffer.hasStallingCmds = true;
batchBuffer.hasRelaxedOrderingDependencies = true;
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
batchBuffer.hasStallingCmds = true;
batchBuffer.hasRelaxedOrderingDependencies = false;
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
EXPECT_EQ(2u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
EXPECT_EQ(1u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
EXPECT_EQ(1u, directSubmission.dispatchTaskStoreSectionCalled);
}
{
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
directSubmission.initialize(true, false);
batchBuffer.hasStallingCmds = true;
batchBuffer.hasRelaxedOrderingDependencies = true;
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
batchBuffer.hasStallingCmds = false;
batchBuffer.hasRelaxedOrderingDependencies = false;
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
EXPECT_EQ(2u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
EXPECT_EQ(1u, directSubmission.dispatchTaskStoreSectionCalled);
}
{
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
directSubmission.initialize(true, false);
batchBuffer.hasStallingCmds = true;
batchBuffer.hasRelaxedOrderingDependencies = true;
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
directSubmission.stopRingBuffer();
EXPECT_EQ(2u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
EXPECT_EQ(1u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
EXPECT_EQ(1u, directSubmission.dispatchTaskStoreSectionCalled);
}
}
HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenBbWithNonStallingCmdsAndDependenciesWhenDispatchingNextCmdBufferThenProgramSchedulerIfNeeded, IsAtLeastXeHpcCore) {
{
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
directSubmission.initialize(true, false);
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
EXPECT_EQ(0u, directSubmission.dispatchTaskStoreSectionCalled);
batchBuffer.hasStallingCmds = false;
batchBuffer.hasRelaxedOrderingDependencies = true;
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
EXPECT_EQ(1u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
EXPECT_EQ(1u, directSubmission.dispatchTaskStoreSectionCalled);
batchBuffer.hasStallingCmds = true;
batchBuffer.hasRelaxedOrderingDependencies = true;
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
EXPECT_EQ(3u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
EXPECT_EQ(1u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
EXPECT_EQ(2u, directSubmission.dispatchTaskStoreSectionCalled);
}
{
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
directSubmission.initialize(true, false);
batchBuffer.hasStallingCmds = false;
batchBuffer.hasRelaxedOrderingDependencies = true;
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
batchBuffer.hasStallingCmds = false;
batchBuffer.hasRelaxedOrderingDependencies = false;
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
EXPECT_EQ(2u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
EXPECT_EQ(1u, directSubmission.dispatchTaskStoreSectionCalled);
}
{
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
directSubmission.initialize(true, false);
batchBuffer.hasStallingCmds = false;
batchBuffer.hasRelaxedOrderingDependencies = true;
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
batchBuffer.hasStallingCmds = true;
batchBuffer.hasRelaxedOrderingDependencies = false;
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
EXPECT_EQ(2u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
EXPECT_EQ(1u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
EXPECT_EQ(1u, directSubmission.dispatchTaskStoreSectionCalled);
}
{
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
directSubmission.initialize(true, false);
batchBuffer.hasStallingCmds = false;
batchBuffer.hasRelaxedOrderingDependencies = true;
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
batchBuffer.hasStallingCmds = false;
batchBuffer.hasRelaxedOrderingDependencies = false;
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
EXPECT_EQ(2u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
EXPECT_EQ(1u, directSubmission.dispatchTaskStoreSectionCalled);
}
{
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
directSubmission.initialize(true, false);
batchBuffer.hasStallingCmds = false;
batchBuffer.hasRelaxedOrderingDependencies = true;
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
directSubmission.stopRingBuffer();
EXPECT_EQ(2u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
EXPECT_EQ(1u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
EXPECT_EQ(1u, directSubmission.dispatchTaskStoreSectionCalled);
}
}
HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenBbWithStallingCmdsAndWithoutDependenciesWhenDispatchingNextCmdBufferThenProgramSchedulerIfNeeded, IsAtLeastXeHpcCore) {
{
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
directSubmission.initialize(true, false);
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
EXPECT_EQ(0u, directSubmission.dispatchTaskStoreSectionCalled);
batchBuffer.hasStallingCmds = true;
batchBuffer.hasRelaxedOrderingDependencies = false;
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
EXPECT_EQ(0u, directSubmission.dispatchTaskStoreSectionCalled);
batchBuffer.hasStallingCmds = true;
batchBuffer.hasRelaxedOrderingDependencies = true;
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
EXPECT_EQ(1u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
EXPECT_EQ(1u, directSubmission.dispatchTaskStoreSectionCalled);
}
{
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
directSubmission.initialize(true, false);
batchBuffer.hasStallingCmds = true;
batchBuffer.hasRelaxedOrderingDependencies = false;
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
batchBuffer.hasStallingCmds = true;
batchBuffer.hasRelaxedOrderingDependencies = false;
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
EXPECT_EQ(0u, directSubmission.dispatchTaskStoreSectionCalled);
}
{
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
directSubmission.initialize(true, false);
batchBuffer.hasStallingCmds = true;
batchBuffer.hasRelaxedOrderingDependencies = false;
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
batchBuffer.hasStallingCmds = false;
batchBuffer.hasRelaxedOrderingDependencies = true;
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
EXPECT_EQ(1u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
EXPECT_EQ(1u, directSubmission.dispatchTaskStoreSectionCalled);
}
{
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
directSubmission.initialize(true, false);
batchBuffer.hasStallingCmds = true;
batchBuffer.hasRelaxedOrderingDependencies = false;
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
batchBuffer.hasStallingCmds = false;
batchBuffer.hasRelaxedOrderingDependencies = false;
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
EXPECT_EQ(0u, directSubmission.dispatchTaskStoreSectionCalled);
}
{
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
directSubmission.initialize(true, false);
batchBuffer.hasStallingCmds = true;
batchBuffer.hasRelaxedOrderingDependencies = false;
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
directSubmission.stopRingBuffer();
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
EXPECT_EQ(0u, directSubmission.dispatchTaskStoreSectionCalled);
}
}
HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenBbWithNonStallingCmdsAndWithoutDependenciesWhenDispatchingNextCmdBufferThenProgramSchedulerIfNeeded, IsAtLeastXeHpcCore) {
{
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
directSubmission.initialize(true, false);
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
EXPECT_EQ(0u, directSubmission.dispatchTaskStoreSectionCalled);
batchBuffer.hasStallingCmds = false;
batchBuffer.hasRelaxedOrderingDependencies = false;
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
EXPECT_EQ(0u, directSubmission.dispatchTaskStoreSectionCalled);
batchBuffer.hasStallingCmds = true;
batchBuffer.hasRelaxedOrderingDependencies = true;
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
EXPECT_EQ(1u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
EXPECT_EQ(1u, directSubmission.dispatchTaskStoreSectionCalled);
}
{
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
directSubmission.initialize(true, false);
batchBuffer.hasStallingCmds = false;
batchBuffer.hasRelaxedOrderingDependencies = false;
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
batchBuffer.hasStallingCmds = false;
batchBuffer.hasRelaxedOrderingDependencies = false;
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
EXPECT_EQ(0u, directSubmission.dispatchTaskStoreSectionCalled);
}
{
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
directSubmission.initialize(true, false);
batchBuffer.hasStallingCmds = false;
batchBuffer.hasRelaxedOrderingDependencies = false;
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
batchBuffer.hasStallingCmds = false;
batchBuffer.hasRelaxedOrderingDependencies = true;
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
EXPECT_EQ(1u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
EXPECT_EQ(1u, directSubmission.dispatchTaskStoreSectionCalled);
}
{
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
directSubmission.initialize(true, false);
batchBuffer.hasStallingCmds = false;
batchBuffer.hasRelaxedOrderingDependencies = false;
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
batchBuffer.hasStallingCmds = true;
batchBuffer.hasRelaxedOrderingDependencies = false;
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
EXPECT_EQ(0u, directSubmission.dispatchTaskStoreSectionCalled);
}
{
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
directSubmission.initialize(true, false);
batchBuffer.hasStallingCmds = false;
batchBuffer.hasRelaxedOrderingDependencies = false;
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
directSubmission.stopRingBuffer();
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
EXPECT_EQ(0u, directSubmission.dispatchTaskStoreSectionCalled);
}
}
HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenRelaxedOrderingSchedulerRequiredWhenAskingForCmdsSizeThenReturnCorrectValue, IsAtLeastXeHpcCore) {
using Dispatcher = RenderDispatcher<FamilyType>;
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
size_t expectedBaseSemaphoreSectionSize = directSubmission.getSizePrefetchMitigation();
if (directSubmission.isDisablePrefetcherRequired) {
expectedBaseSemaphoreSectionSize += 2 * directSubmission.getSizeDisablePrefetcher();
}
if (directSubmission.miMemFenceRequired) {
expectedBaseSemaphoreSectionSize += MemorySynchronizationCommands<FamilyType>::getSizeForSingleAdditionalSynchronizationForDirectSubmission(pDevice->getHardwareInfo());
}
EXPECT_EQ(expectedBaseSemaphoreSectionSize + RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<FamilyType>::totalSize, directSubmission.getSizeSemaphoreSection(true));
EXPECT_EQ(expectedBaseSemaphoreSectionSize + EncodeSempahore<FamilyType>::getSizeMiSemaphoreWait(), directSubmission.getSizeSemaphoreSection(false));
size_t expectedBaseEndSize = Dispatcher::getSizeStopCommandBuffer() +
Dispatcher::getSizeCacheFlush(pDevice->getHardwareInfo()) +
(Dispatcher::getSizeStartCommandBuffer() - Dispatcher::getSizeStopCommandBuffer()) +
MemoryConstants::cacheLineSize;
if (directSubmission.disableMonitorFence) {
expectedBaseEndSize += Dispatcher::getSizeMonitorFence(pDevice->getHardwareInfo());
}
EXPECT_EQ(expectedBaseEndSize + directSubmission.getSizeDispatchRelaxedOrderingQueueStall(), directSubmission.getSizeEnd(true));
EXPECT_EQ(expectedBaseEndSize, directSubmission.getSizeEnd(false));
}
HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenSchedulerRequiredWhenDispatchingReturnPtrsThenAddOffset, IsAtLeastXeHpcCore) {
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
directSubmission.initialize(true, false);
uint64_t returnPtr = 0x800100123000;
uint64_t returnPtr2 = returnPtr + RelaxedOrderingHelper::getSizeTaskStoreSection<FamilyType>();
size_t offset = directSubmission.ringCommandStream.getUsed();
directSubmission.dispatchRelaxedOrderingReturnPtrRegs(directSubmission.ringCommandStream, returnPtr, true);
auto lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(ptrOffset(directSubmission.ringCommandStream.getCpuBase(), offset));
EXPECT_TRUE(verifyLri<FamilyType>(lriCmd, CS_GPR_R4, static_cast<uint32_t>(returnPtr & 0xFFFF'FFFFULL)));
EXPECT_TRUE(verifyLri<FamilyType>(++lriCmd, CS_GPR_R4 + 4, static_cast<uint32_t>(returnPtr >> 32)));
EXPECT_TRUE(verifyLri<FamilyType>(++lriCmd, CS_GPR_R3, static_cast<uint32_t>(returnPtr2 & 0xFFFF'FFFFULL)));
EXPECT_TRUE(verifyLri<FamilyType>(++lriCmd, CS_GPR_R3 + 4, static_cast<uint32_t>(returnPtr2 >> 32)));
offset = directSubmission.ringCommandStream.getUsed();
directSubmission.dispatchRelaxedOrderingReturnPtrRegs(directSubmission.ringCommandStream, returnPtr, false);
lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(ptrOffset(directSubmission.ringCommandStream.getCpuBase(), offset));
EXPECT_TRUE(verifyLri<FamilyType>(lriCmd, CS_GPR_R4, static_cast<uint32_t>(returnPtr & 0xFFFF'FFFFULL)));
EXPECT_TRUE(verifyLri<FamilyType>(++lriCmd, CS_GPR_R4 + 4, static_cast<uint32_t>(returnPtr >> 32)));
EXPECT_TRUE(verifyLri<FamilyType>(++lriCmd, CS_GPR_R3, static_cast<uint32_t>(returnPtr & 0xFFFF'FFFFULL)));
EXPECT_TRUE(verifyLri<FamilyType>(++lriCmd, CS_GPR_R3 + 4, static_cast<uint32_t>(returnPtr >> 32)));
}

View File

@@ -288,7 +288,8 @@ TEST_F(WddmCommandStreamTest, GivenOffsetWhenFlushingThenFlushIsSubmittedCorrect
ASSERT_NE(nullptr, commandBuffer);
LinearStream cs(commandBuffer);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), offset, 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false, false};
BatchBuffer batchBuffer = BatchBufferHelper::createDefaultBatchBuffer(cs.getGraphicsAllocation(), &cs, cs.getUsed());
batchBuffer.startOffset = offset;
csr->flush(batchBuffer, csr->getResidencyAllocations());
EXPECT_EQ(1u, wddm->submitResult.called);
EXPECT_TRUE(wddm->submitResult.success);
@@ -1165,7 +1166,7 @@ HWTEST_TEMPLATED_F(WddmCommandStreamMockGdiTest, givenDirectSubmissionEnabledOnR
size_t actualDispatchSize = directSubmission->ringCommandStream.getUsed();
size_t expectedSize = directSubmission->getSizeSemaphoreSection(false) +
Dispatcher::getSizePreemption() +
directSubmission->getSizeDispatch();
directSubmission->getSizeDispatch(false);
if (directSubmission->miMemFenceRequired) {
expectedSize += directSubmission->getSizeSystemMemoryFenceAddress();
@@ -1206,7 +1207,7 @@ HWTEST_TEMPLATED_F(WddmCommandStreamMockGdiTest, givenDirectSubmissionEnabledOnB
size_t actualDispatchSize = directSubmission->ringCommandStream.getUsed();
size_t expectedSize = directSubmission->getSizeSemaphoreSection(false) +
Dispatcher::getSizePreemption() +
directSubmission->getSizeDispatch();
directSubmission->getSizeDispatch(false);
if (directSubmission->miMemFenceRequired) {
expectedSize += directSubmission->getSizeSystemMemoryFenceAddress();