mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-21 09:14:47 +08:00
RelaxedOrdering: Improve dependencies tracking
Avoid not needed scheduler programming Related-To: NEO-7458 Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
ad6237478f
commit
3f962bf3e8
@@ -124,12 +124,12 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *waitEventHandles) override;
|
||||
|
||||
MOCKABLE_VIRTUAL ze_result_t executeCommandListImmediateWithFlushTask(bool performMigration, bool hasStallingCmds);
|
||||
MOCKABLE_VIRTUAL ze_result_t executeCommandListImmediateWithFlushTask(bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies);
|
||||
|
||||
void checkAvailableSpace();
|
||||
void updateDispatchFlagsWithRequiredStreamState(NEO::DispatchFlags &dispatchFlags);
|
||||
|
||||
ze_result_t flushImmediate(ze_result_t inputRet, bool performMigration, bool hasStallingCmds, ze_event_handle_t hSignalEvent);
|
||||
ze_result_t flushImmediate(ze_result_t inputRet, bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, ze_event_handle_t hSignalEvent);
|
||||
|
||||
void createLogicalStateHelper() override {}
|
||||
NEO::LogicalStateHelper *getLogicalStateHelper() const override;
|
||||
|
||||
@@ -69,7 +69,7 @@ void CommandListCoreFamilyImmediate<gfxCoreFamily>::updateDispatchFlagsWithRequi
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommandListImmediateWithFlushTask(bool performMigration, bool hasStallingCmds) {
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommandListImmediateWithFlushTask(bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies) {
|
||||
NEO::DispatchFlags dispatchFlags(
|
||||
{}, // csrDependencies
|
||||
nullptr, // barrierTimestampPacketNodes
|
||||
@@ -100,7 +100,8 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommandListImm
|
||||
this->device->getNEODevice()->getNumGenericSubDevices() > 1, // areMultipleSubDevicesInContext
|
||||
false, // memoryMigrationRequired
|
||||
false, // textureCacheFlush
|
||||
hasStallingCmds // hasStallingCmds
|
||||
hasStallingCmds, // hasStallingCmds
|
||||
hasRelaxedOrderingDependencies // hasRelaxedOrderingDependencies
|
||||
);
|
||||
this->updateDispatchFlagsWithRequiredStreamState(dispatchFlags);
|
||||
|
||||
@@ -260,7 +261,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchKernel(
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(kernelHandle, threadGroupDimensions,
|
||||
hSignalEvent, numWaitEvents, phWaitEvents,
|
||||
launchParams);
|
||||
return flushImmediate(ret, true, false, hSignalEvent);
|
||||
return flushImmediate(ret, true, false, (numWaitEvents > 0), hSignalEvent);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -273,7 +274,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchKernelInd
|
||||
}
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(kernelHandle, pDispatchArgumentsBuffer,
|
||||
hSignalEvent, numWaitEvents, phWaitEvents);
|
||||
return flushImmediate(ret, true, false, hSignalEvent);
|
||||
return flushImmediate(ret, true, false, (numWaitEvents > 0), hSignalEvent);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -289,7 +290,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendBarrier(
|
||||
ret = CommandListCoreFamily<gfxCoreFamily>::appendBarrier(hSignalEvent, numWaitEvents, phWaitEvents);
|
||||
|
||||
this->dependenciesPresent = true;
|
||||
return flushImmediate(ret, true, true, hSignalEvent);
|
||||
return flushImmediate(ret, true, true, (numWaitEvents > 0), hSignalEvent);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -323,7 +324,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopy(
|
||||
ret = CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(dstptr, srcptr, size, hSignalEvent,
|
||||
numWaitEvents, phWaitEvents);
|
||||
}
|
||||
return flushImmediate(ret, true, false, hSignalEvent);
|
||||
return flushImmediate(ret, true, false, (numWaitEvents > 0), hSignalEvent);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -366,7 +367,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopyRegio
|
||||
hSignalEvent, numWaitEvents, phWaitEvents);
|
||||
}
|
||||
|
||||
return flushImmediate(ret, true, false, hSignalEvent);
|
||||
return flushImmediate(ret, true, false, (numWaitEvents > 0), hSignalEvent);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -381,7 +382,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryFill(void
|
||||
}
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(ptr, pattern, patternSize, size, hSignalEvent, numWaitEvents, phWaitEvents);
|
||||
|
||||
return flushImmediate(ret, true, false, hSignalEvent);
|
||||
return flushImmediate(ret, true, false, (numWaitEvents > 0), hSignalEvent);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -393,7 +394,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendSignalEvent(ze_
|
||||
checkAvailableSpace();
|
||||
}
|
||||
ret = CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(hSignalEvent);
|
||||
return flushImmediate(ret, true, true, hSignalEvent);
|
||||
return flushImmediate(ret, true, true, false, hSignalEvent);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -405,7 +406,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendEventReset(ze_e
|
||||
checkAvailableSpace();
|
||||
}
|
||||
ret = CommandListCoreFamily<gfxCoreFamily>::appendEventReset(hSignalEvent);
|
||||
return flushImmediate(ret, true, true, hSignalEvent);
|
||||
return flushImmediate(ret, true, true, false, hSignalEvent);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -431,7 +432,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendPageFaultCopy(N
|
||||
} else {
|
||||
ret = CommandListCoreFamily<gfxCoreFamily>::appendPageFaultCopy(dstAllocation, srcAllocation, size, flushHost);
|
||||
}
|
||||
return flushImmediate(ret, false, false, nullptr);
|
||||
return flushImmediate(ret, false, false, false, nullptr);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -448,7 +449,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendWaitOnEvents(ui
|
||||
}
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(numEvents, phWaitEvents);
|
||||
this->dependenciesPresent = true;
|
||||
return flushImmediate(ret, true, true, nullptr);
|
||||
return flushImmediate(ret, true, true, (numEvents > 0), nullptr);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -461,7 +462,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendWriteGlobalTime
|
||||
}
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(dstptr, hSignalEvent, numWaitEvents, phWaitEvents);
|
||||
|
||||
return flushImmediate(ret, true, true, hSignalEvent);
|
||||
return flushImmediate(ret, true, true, (numWaitEvents > 0), hSignalEvent);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -497,7 +498,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyRegion
|
||||
}
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendImageCopyRegion(hDstImage, hSrcImage, pDstRegion, pSrcRegion, hSignalEvent,
|
||||
numWaitEvents, phWaitEvents);
|
||||
return flushImmediate(ret, true, false, hSignalEvent);
|
||||
return flushImmediate(ret, true, false, (numWaitEvents > 0), hSignalEvent);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -515,7 +516,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyFromMe
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemory(hDstImage, srcPtr, pDstRegion, hSignalEvent,
|
||||
numWaitEvents, phWaitEvents);
|
||||
|
||||
return flushImmediate(ret, true, false, hSignalEvent);
|
||||
return flushImmediate(ret, true, false, (numWaitEvents > 0), hSignalEvent);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -533,7 +534,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyToMemo
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemory(dstPtr, hSrcImage, pSrcRegion, hSignalEvent,
|
||||
numWaitEvents, phWaitEvents);
|
||||
|
||||
return flushImmediate(ret, true, false, hSignalEvent);
|
||||
return flushImmediate(ret, true, false, (numWaitEvents > 0), hSignalEvent);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -547,7 +548,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryRangesBar
|
||||
checkAvailableSpace();
|
||||
}
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendMemoryRangesBarrier(numRanges, pRangeSizes, pRanges, hSignalEvent, numWaitEvents, phWaitEvents);
|
||||
return flushImmediate(ret, true, true, hSignalEvent);
|
||||
return flushImmediate(ret, true, true, (numWaitEvents > 0), hSignalEvent);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -560,14 +561,15 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchCooperati
|
||||
checkAvailableSpace();
|
||||
}
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchCooperativeKernel(kernelHandle, launchKernelArgs, hSignalEvent, numWaitEvents, waitEventHandles);
|
||||
return flushImmediate(ret, true, false, hSignalEvent);
|
||||
return flushImmediate(ret, true, false, (numWaitEvents > 0), hSignalEvent);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediate(ze_result_t inputRet, bool performMigration, bool hasStallingCmds, ze_event_handle_t hSignalEvent) {
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediate(ze_result_t inputRet, bool performMigration, bool hasStallingCmds,
|
||||
bool hasRelaxedOrderingDependencies, ze_event_handle_t hSignalEvent) {
|
||||
if (inputRet == ZE_RESULT_SUCCESS) {
|
||||
if (this->isFlushTaskSubmissionEnabled) {
|
||||
inputRet = executeCommandListImmediateWithFlushTask(performMigration, hasStallingCmds);
|
||||
inputRet = executeCommandListImmediateWithFlushTask(performMigration, hasStallingCmds, hasRelaxedOrderingDependencies);
|
||||
} else {
|
||||
inputRet = executeCommandListImmediate(performMigration);
|
||||
}
|
||||
|
||||
@@ -99,7 +99,7 @@ NEO::SubmissionStatus CommandQueueImp::submitBatchBuffer(size_t offset, NEO::Res
|
||||
|
||||
NEO::BatchBuffer batchBuffer(commandStream.getGraphicsAllocation(), offset, 0, 0, nullptr, false, false,
|
||||
NEO::QueueThrottle::HIGH, NEO::QueueSliceCount::defaultSliceCount,
|
||||
commandStream.getUsed(), &commandStream, endingCmdPtr, isCooperative, false);
|
||||
commandStream.getUsed(), &commandStream, endingCmdPtr, isCooperative, false, false);
|
||||
|
||||
commandStream.getGraphicsAllocation()->updateTaskCount(csr->peekTaskCount() + 1, csr->getOsContext().getContextId());
|
||||
commandStream.getGraphicsAllocation()->updateResidencyTaskCount(csr->peekTaskCount() + 1, csr->getOsContext().getContextId());
|
||||
|
||||
@@ -515,7 +515,7 @@ class MockCommandListImmediateHw : public WhiteBox<::L0::CommandListCoreFamilyIm
|
||||
return executeCommandListImmediateReturnValue;
|
||||
}
|
||||
|
||||
ze_result_t executeCommandListImmediateWithFlushTask(bool performMigration, bool hasStallingCmds) override {
|
||||
ze_result_t executeCommandListImmediateWithFlushTask(bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies) override {
|
||||
++executeCommandListImmediateWithFlushTaskCalledCount;
|
||||
return executeCommandListImmediateWithFlushTaskReturnValue;
|
||||
}
|
||||
|
||||
@@ -1068,6 +1068,118 @@ HWTEST2_F(CommandListCreate, givenDirectSubmissionAndImmCmdListWhenDispatchingTh
|
||||
driverHandle->releaseImportedPointer(dstPtr);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListCreate, givenDirectSubmissionAndImmCmdListWhenDispatchingThenPassRelaxedOrderingDependenciesInfo, IsAtLeastXeHpcCore) {
|
||||
ze_command_queue_desc_t desc = {};
|
||||
desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
|
||||
ze_result_t returnValue;
|
||||
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue));
|
||||
ASSERT_NE(nullptr, commandList);
|
||||
|
||||
ze_event_pool_desc_t eventPoolDesc = {};
|
||||
eventPoolDesc.count = 1;
|
||||
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||
|
||||
ze_event_desc_t eventDesc = {};
|
||||
eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST;
|
||||
|
||||
ze_event_handle_t event = nullptr;
|
||||
|
||||
std::unique_ptr<L0::EventPool> eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, eventPool->createEvent(&eventDesc, &event));
|
||||
std::unique_ptr<L0::Event> eventObject(L0::Event::fromHandle(event));
|
||||
|
||||
Mock<::L0::Kernel> kernel;
|
||||
ze_group_count_t groupCount{1, 1, 1};
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
|
||||
uint8_t srcPtr[64] = {};
|
||||
uint8_t dstPtr[64] = {};
|
||||
const ze_copy_region_t region = {0U, 0U, 0U, 1, 1, 0U};
|
||||
|
||||
driverHandle->importExternalPointer(dstPtr, MemoryConstants::pageSize);
|
||||
|
||||
auto ultCsr = static_cast<NEO::UltCommandStreamReceiver<FamilyType> *>(commandList->csr);
|
||||
ultCsr->recordFlusheBatchBuffer = true;
|
||||
|
||||
auto verifyFlags = [&ultCsr](ze_result_t result, bool dispatchFlag, bool bbFlag) {
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(ultCsr->recordedDispatchFlags.hasRelaxedOrderingDependencies, dispatchFlag);
|
||||
EXPECT_EQ(ultCsr->latestFlushedBatchBuffer.hasRelaxedOrderingDependencies, bbFlag);
|
||||
};
|
||||
|
||||
for (bool hasEventDependencies : {true, false}) {
|
||||
ze_event_handle_t *waitlist = hasEventDependencies ? &event : nullptr;
|
||||
uint32_t numWaitlistEvents = hasEventDependencies ? 1 : 0;
|
||||
|
||||
verifyFlags(commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, numWaitlistEvents, waitlist, launchParams),
|
||||
hasEventDependencies, hasEventDependencies);
|
||||
|
||||
verifyFlags(commandList->appendLaunchKernelIndirect(kernel.toHandle(), &groupCount, nullptr, numWaitlistEvents, waitlist),
|
||||
hasEventDependencies, hasEventDependencies);
|
||||
|
||||
verifyFlags(commandList->appendBarrier(nullptr, numWaitlistEvents, waitlist),
|
||||
hasEventDependencies, hasEventDependencies);
|
||||
|
||||
verifyFlags(commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, numWaitlistEvents, waitlist),
|
||||
hasEventDependencies, hasEventDependencies);
|
||||
|
||||
verifyFlags(commandList->appendMemoryCopyRegion(dstPtr, ®ion, 0, 0, srcPtr, ®ion, 0, 0, nullptr, numWaitlistEvents, waitlist),
|
||||
hasEventDependencies, hasEventDependencies);
|
||||
|
||||
verifyFlags(commandList->appendMemoryFill(dstPtr, srcPtr, 8, 1, nullptr, numWaitlistEvents, waitlist),
|
||||
hasEventDependencies, hasEventDependencies);
|
||||
|
||||
verifyFlags(commandList->appendEventReset(event), false, false);
|
||||
|
||||
verifyFlags(commandList->appendSignalEvent(event), false, false);
|
||||
|
||||
verifyFlags(commandList->appendPageFaultCopy(kernel.getIsaAllocation(), kernel.getIsaAllocation(), 1, false),
|
||||
false, false);
|
||||
|
||||
verifyFlags(commandList->appendWaitOnEvents(1, &event), true, true);
|
||||
|
||||
verifyFlags(commandList->appendWriteGlobalTimestamp(reinterpret_cast<uint64_t *>(dstPtr), nullptr, numWaitlistEvents, waitlist),
|
||||
hasEventDependencies, hasEventDependencies);
|
||||
|
||||
if constexpr (FamilyType::supportsSampler) {
|
||||
auto kernel = device->getBuiltinFunctionsLib()->getImageFunction(ImageBuiltin::CopyImageRegion);
|
||||
auto mockBuiltinKernel = static_cast<Mock<::L0::Kernel> *>(kernel);
|
||||
mockBuiltinKernel->setArgRedescribedImageCallBase = false;
|
||||
|
||||
auto image = std::make_unique<WhiteBox<::L0::ImageCoreFamily<gfxCoreFamily>>>();
|
||||
ze_image_region_t imgRegion = {1, 1, 1, 1, 1, 1};
|
||||
ze_image_desc_t zeDesc = {};
|
||||
zeDesc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC;
|
||||
image->initialize(device, &zeDesc);
|
||||
|
||||
verifyFlags(commandList->appendImageCopyRegion(image->toHandle(), image->toHandle(), &imgRegion, &imgRegion, nullptr, numWaitlistEvents, waitlist),
|
||||
hasEventDependencies, hasEventDependencies);
|
||||
|
||||
verifyFlags(commandList->appendImageCopyFromMemory(image->toHandle(), dstPtr, &imgRegion, nullptr, numWaitlistEvents, waitlist),
|
||||
hasEventDependencies, hasEventDependencies);
|
||||
|
||||
verifyFlags(commandList->appendImageCopyToMemory(dstPtr, image->toHandle(), &imgRegion, nullptr, numWaitlistEvents, waitlist),
|
||||
hasEventDependencies, hasEventDependencies);
|
||||
}
|
||||
|
||||
size_t rangeSizes = 1;
|
||||
const void **ranges = reinterpret_cast<const void **>(&dstPtr[0]);
|
||||
verifyFlags(commandList->appendMemoryRangesBarrier(1, &rangeSizes, ranges, nullptr, numWaitlistEvents, waitlist),
|
||||
hasEventDependencies, hasEventDependencies);
|
||||
}
|
||||
|
||||
for (bool hasEventDependencies : {true, false}) {
|
||||
ze_event_handle_t *waitlist = hasEventDependencies ? &event : nullptr;
|
||||
uint32_t numWaitlistEvents = hasEventDependencies ? 1 : 0;
|
||||
verifyFlags(commandList->appendLaunchCooperativeKernel(kernel.toHandle(), &groupCount, nullptr, numWaitlistEvents, waitlist),
|
||||
hasEventDependencies, hasEventDependencies);
|
||||
}
|
||||
|
||||
driverHandle->releaseImportedPointer(dstPtr);
|
||||
}
|
||||
|
||||
TEST_F(CommandListCreate, GivenGpuHangWhenCreatingImmCmdListWithSyncModeAndAppendBarrierThenAppendBarrierReturnsDeviceLost) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.EnableFlushTaskSubmission.set(1);
|
||||
|
||||
@@ -73,7 +73,7 @@ HWTEST2_F(CommandListExecuteImmediate, whenExecutingCommandListImmediateWithFlus
|
||||
commandListImmediate.requiredStreamState.stateComputeMode.isCoherencyRequired.value = 1;
|
||||
commandListImmediate.requiredStreamState.stateComputeMode.largeGrfMode.value = 1;
|
||||
commandListImmediate.requiredStreamState.stateComputeMode.threadArbitrationPolicy.value = NEO::ThreadArbitrationPolicy::RoundRobin;
|
||||
commandListImmediate.executeCommandListImmediateWithFlushTask(false, false);
|
||||
commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false);
|
||||
|
||||
NEO::StateComputeModePropertiesSupport scmPropertiesSupport = {};
|
||||
hwInfoConfig.fillScmPropertiesSupportStructure(scmPropertiesSupport);
|
||||
@@ -102,7 +102,7 @@ HWTEST2_F(CommandListExecuteImmediate, whenExecutingCommandListImmediateWithFlus
|
||||
commandListImmediate.requiredStreamState.stateComputeMode.isCoherencyRequired.value = 0;
|
||||
commandListImmediate.requiredStreamState.stateComputeMode.largeGrfMode.value = 0;
|
||||
commandListImmediate.requiredStreamState.stateComputeMode.threadArbitrationPolicy.value = NEO::ThreadArbitrationPolicy::AgeBased;
|
||||
commandListImmediate.executeCommandListImmediateWithFlushTask(false, false);
|
||||
commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false);
|
||||
|
||||
expectedLargeGrfMode = scmPropertiesSupport.largeGrfMode ? 0 : -1;
|
||||
expectedIsCoherencyRequired = scmPropertiesSupport.coherencyRequired ? 0 : -1;
|
||||
@@ -128,7 +128,7 @@ HWTEST2_F(CommandListExecuteImmediate, whenExecutingCommandListImmediateWithFlus
|
||||
auto &commandListImmediate = static_cast<MockCommandListImmediate<gfxCoreFamily> &>(*commandList);
|
||||
|
||||
commandListImmediate.containsAnyKernel = true;
|
||||
commandListImmediate.executeCommandListImmediateWithFlushTask(false, false);
|
||||
commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false);
|
||||
EXPECT_FALSE(commandListImmediate.containsAnyKernel);
|
||||
}
|
||||
|
||||
@@ -139,7 +139,7 @@ HWTEST2_F(CommandListExecuteImmediate, whenExecutingCommandListImmediateWithFlus
|
||||
commandList.reset(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue));
|
||||
auto &commandListImmediate = static_cast<MockCommandListImmediate<gfxCoreFamily> &>(*commandList);
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, commandListImmediate.executeCommandListImmediateWithFlushTask(false, false));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false));
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListExecuteImmediate, givenOutOfHostMemoryErrorOnFlushWhenExecutingCommandListImmediateWithFlushTaskThenProperErrorIsReturned, IsAtLeastSkl) {
|
||||
@@ -151,7 +151,7 @@ HWTEST2_F(CommandListExecuteImmediate, givenOutOfHostMemoryErrorOnFlushWhenExecu
|
||||
|
||||
auto &commandStreamReceiver = neoDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
commandStreamReceiver.flushReturnValue = SubmissionStatus::OUT_OF_HOST_MEMORY;
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY, commandListImmediate.executeCommandListImmediateWithFlushTask(false, false));
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY, commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false));
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListExecuteImmediate, givenOutOfDeviceMemoryErrorOnFlushWhenExecutingCommandListImmediateWithFlushTaskThenProperErrorIsReturned, IsAtLeastSkl) {
|
||||
@@ -163,7 +163,7 @@ HWTEST2_F(CommandListExecuteImmediate, givenOutOfDeviceMemoryErrorOnFlushWhenExe
|
||||
|
||||
auto &commandStreamReceiver = neoDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
commandStreamReceiver.flushReturnValue = SubmissionStatus::OUT_OF_MEMORY;
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY, commandListImmediate.executeCommandListImmediateWithFlushTask(false, false));
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY, commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false));
|
||||
}
|
||||
|
||||
using CommandListTest = Test<DeviceFixture>;
|
||||
@@ -351,7 +351,7 @@ HWTEST2_F(CommandListTest, givenImmediateCommandListWhenFlushImmediateThenOverri
|
||||
MockCommandStreamReceiver mockCommandStreamReceiver(*neoDevice->executionEnvironment, neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield());
|
||||
cmdList.csr = event->csr;
|
||||
event->csr = &mockCommandStreamReceiver;
|
||||
cmdList.flushImmediate(ZE_RESULT_SUCCESS, false, false, event->toHandle());
|
||||
cmdList.flushImmediate(ZE_RESULT_SUCCESS, false, false, false, event->toHandle());
|
||||
EXPECT_EQ(event->csr, cmdList.csr);
|
||||
}
|
||||
|
||||
@@ -772,7 +772,7 @@ HWTEST2_F(CommandListTest, givenCmdListWithIndirectAccessWhenExecutingCommandLis
|
||||
auto oldCommandQueue = commandList->cmdQImmediate;
|
||||
commandList->cmdQImmediate = &mockCommandQueue;
|
||||
commandListImmediate.indirectAllocationsAllowed = true;
|
||||
commandListImmediate.executeCommandListImmediateWithFlushTask(false, false);
|
||||
commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false);
|
||||
EXPECT_EQ(mockCommandQueue.handleIndirectAllocationResidencyCalledTimes, 1u);
|
||||
commandList->cmdQImmediate = oldCommandQueue;
|
||||
}
|
||||
@@ -791,7 +791,7 @@ HWTEST2_F(CommandListTest, givenCmdListWithNoIndirectAccessWhenExecutingCommandL
|
||||
auto oldCommandQueue = commandList->cmdQImmediate;
|
||||
commandList->cmdQImmediate = &mockCommandQueue;
|
||||
commandListImmediate.indirectAllocationsAllowed = false;
|
||||
commandListImmediate.executeCommandListImmediateWithFlushTask(false, false);
|
||||
commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false);
|
||||
EXPECT_EQ(mockCommandQueue.handleIndirectAllocationResidencyCalledTimes, 0u);
|
||||
commandList->cmdQImmediate = oldCommandQueue;
|
||||
}
|
||||
|
||||
@@ -804,7 +804,8 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
||||
kernel->areMultipleSubDevicesInContext(), // areMultipleSubDevicesInContext
|
||||
kernel->requiresMemoryMigration(), // memoryMigrationRequired
|
||||
isTextureCacheFlushNeeded(commandType), // textureCacheFlush
|
||||
false); // hasStallingCmds
|
||||
false, // hasStallingCmds
|
||||
false); // hasRelaxedOrderingDependencies
|
||||
|
||||
dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = mediaSamplerRequired;
|
||||
dispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode = systolicPipelineSelectMode;
|
||||
@@ -1050,7 +1051,8 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
|
||||
context->containsMultipleSubDevices(rootDeviceIndex), // areMultipleSubDevicesInContext
|
||||
false, // memoryMigrationRequired
|
||||
false, // textureCacheFlush
|
||||
false); // hasStallingCmds
|
||||
false, // hasStallingCmds
|
||||
false); // hasRelaxedOrderingDependencies
|
||||
|
||||
const bool isHandlingBarrier = getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired();
|
||||
|
||||
|
||||
@@ -80,7 +80,8 @@ CompletionStamp &CommandMapUnmap::submit(uint32_t taskLevel, bool terminated) {
|
||||
false, // areMultipleSubDevicesInContext
|
||||
false, // memoryMigrationRequired
|
||||
false, // textureCacheFlush
|
||||
false); // hasStallingCmds
|
||||
false, // hasStallingCmds
|
||||
false); // hasRelaxedOrderingDependencies
|
||||
|
||||
DEBUG_BREAK_IF(taskLevel >= CompletionStamp::notReady);
|
||||
|
||||
@@ -210,7 +211,8 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
|
||||
kernel->areMultipleSubDevicesInContext(), // areMultipleSubDevicesInContext
|
||||
kernel->requiresMemoryMigration(), // memoryMigrationRequired
|
||||
commandQueue.isTextureCacheFlushNeeded(this->commandType), // textureCacheFlush
|
||||
false); // hasStallingCmds
|
||||
false, // hasStallingCmds
|
||||
false); // hasRelaxedOrderingDependencies
|
||||
|
||||
if (commandQueue.getContext().getRootDeviceIndices().size() > 1) {
|
||||
eventsRequest.fillCsrDependenciesForTaskCountContainer(dispatchFlags.csrDependencies, commandStreamReceiver);
|
||||
@@ -385,7 +387,8 @@ CompletionStamp &CommandWithoutKernel::submit(uint32_t taskLevel, bool terminate
|
||||
commandQueue.getContext().containsMultipleSubDevices(rootDeviceIndex), // areMultipleSubDevicesInContext
|
||||
false, // memoryMigrationRequired
|
||||
false, // textureCacheFlush
|
||||
false); // hasStallingCmds
|
||||
false, // hasStallingCmds
|
||||
false); // hasRelaxedOrderingDependencies
|
||||
|
||||
if (commandQueue.getContext().getRootDeviceIndices().size() > 1) {
|
||||
eventsRequest.fillCsrDependenciesForTaskCountContainer(dispatchFlags.csrDependencies, commandStreamReceiver);
|
||||
|
||||
@@ -615,7 +615,8 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||
auto &streamToSubmit = submitCommandStreamFromCsr ? commandStreamCSR : commandStreamTask;
|
||||
BatchBuffer batchBuffer{streamToSubmit.getGraphicsAllocation(), startOffset, chainedBatchBufferStartOffset, taskStartAddress, chainedBatchBuffer,
|
||||
dispatchFlags.requiresCoherency, dispatchFlags.lowPriority, dispatchFlags.throttle, dispatchFlags.sliceCount,
|
||||
streamToSubmit.getUsed(), &streamToSubmit, bbEndLocation, dispatchFlags.useSingleSubdevice, (submitCSR || dispatchFlags.hasStallingCmds)};
|
||||
streamToSubmit.getUsed(), &streamToSubmit, bbEndLocation, dispatchFlags.useSingleSubdevice, (submitCSR || dispatchFlags.hasStallingCmds),
|
||||
dispatchFlags.hasRelaxedOrderingDependencies};
|
||||
streamToSubmit.getGraphicsAllocation()->updateTaskCount(this->taskCount + 1, this->osContext->getContextId());
|
||||
streamToSubmit.getGraphicsAllocation()->updateResidencyTaskCount(this->taskCount + 1, this->osContext->getContextId());
|
||||
|
||||
@@ -1178,7 +1179,7 @@ uint32_t CommandStreamReceiverHw<GfxFamily>::flushBcsTask(const BlitPropertiesCo
|
||||
uint64_t taskStartAddress = commandStream.getGpuBase() + commandStreamStart;
|
||||
|
||||
BatchBuffer batchBuffer{commandStream.getGraphicsAllocation(), commandStreamStart, 0, taskStartAddress, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount,
|
||||
commandStream.getUsed(), &commandStream, endingCmdPtr, false, false};
|
||||
commandStream.getUsed(), &commandStream, endingCmdPtr, false, false, false};
|
||||
|
||||
commandStream.getGraphicsAllocation()->updateTaskCount(newTaskCount, this->osContext->getContextId());
|
||||
commandStream.getGraphicsAllocation()->updateResidencyTaskCount(newTaskCount, this->osContext->getContextId());
|
||||
@@ -1290,7 +1291,7 @@ SubmissionStatus CommandStreamReceiverHw<GfxFamily>::flushSmallTask(LinearStream
|
||||
|
||||
BatchBuffer batchBuffer{commandStreamTask.getGraphicsAllocation(), commandStreamStartTask, 0, taskStartAddress,
|
||||
nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount,
|
||||
commandStreamTask.getUsed(), &commandStreamTask, endingCmdPtr, false, true};
|
||||
commandStreamTask.getUsed(), &commandStreamTask, endingCmdPtr, false, true, false};
|
||||
|
||||
this->latestSentTaskCount = taskCount + 1;
|
||||
auto submissionStatus = flushHandler(batchBuffer, getResidencyAllocations());
|
||||
|
||||
@@ -57,7 +57,7 @@ struct DispatchFlags {
|
||||
uint64_t sliceCountP, bool blockingP, bool dcFlushP, bool useSLMP, bool guardCommandBufferWithPipeControlP, bool gsba32BitRequiredP,
|
||||
bool requiresCoherencyP, bool lowPriorityP, bool implicitFlushP, bool outOfOrderExecutionAllowedP, bool epilogueRequiredP,
|
||||
bool usePerDSSbackedBufferP, bool useSingleSubdeviceP, bool useGlobalAtomicsP, bool areMultipleSubDevicesInContextP, bool memoryMigrationRequiredP, bool textureCacheFlush,
|
||||
bool hasStallingCmds) : csrDependencies(csrDependenciesP),
|
||||
bool hasStallingCmds, bool hasRelaxedOrderingDependencies) : csrDependencies(csrDependenciesP),
|
||||
barrierTimestampPacketNodes(barrierTimestampPacketNodesP),
|
||||
pipelineSelectArgs(pipelineSelectArgsP),
|
||||
flushStampReference(flushStampReferenceP),
|
||||
@@ -86,7 +86,8 @@ struct DispatchFlags {
|
||||
areMultipleSubDevicesInContext(areMultipleSubDevicesInContextP),
|
||||
memoryMigrationRequired(memoryMigrationRequiredP),
|
||||
textureCacheFlush(textureCacheFlush),
|
||||
hasStallingCmds(hasStallingCmds){};
|
||||
hasStallingCmds(hasStallingCmds),
|
||||
hasRelaxedOrderingDependencies(hasRelaxedOrderingDependencies){};
|
||||
|
||||
CsrDependencies csrDependencies;
|
||||
TimestampPacketContainer *barrierTimestampPacketNodes = nullptr;
|
||||
@@ -119,6 +120,7 @@ struct DispatchFlags {
|
||||
bool memoryMigrationRequired = false;
|
||||
bool textureCacheFlush = false;
|
||||
bool hasStallingCmds = false;
|
||||
bool hasRelaxedOrderingDependencies = false;
|
||||
bool disableEUFusion = false;
|
||||
};
|
||||
|
||||
|
||||
@@ -100,14 +100,15 @@ void NEO::SubmissionAggregator::aggregateCommandBuffers(ResourcePackage &resourc
|
||||
|
||||
NEO::BatchBuffer::BatchBuffer(GraphicsAllocation *commandBufferAllocation, size_t startOffset,
|
||||
size_t chainedBatchBufferStartOffset, uint64_t taskStartAddress, GraphicsAllocation *chainedBatchBuffer,
|
||||
bool requiresCoherency, bool lowPriority,
|
||||
QueueThrottle throttle, uint64_t sliceCount,
|
||||
size_t usedSize, LinearStream *stream, void *endCmdPtr, bool useSingleSubdevice, bool hasStallingCmds)
|
||||
bool requiresCoherency, bool lowPriority, QueueThrottle throttle, uint64_t sliceCount,
|
||||
size_t usedSize, LinearStream *stream, void *endCmdPtr, bool useSingleSubdevice, bool hasStallingCmds,
|
||||
bool hasRelaxedOrderingDependencies)
|
||||
: commandBufferAllocation(commandBufferAllocation), startOffset(startOffset),
|
||||
chainedBatchBufferStartOffset(chainedBatchBufferStartOffset), taskStartAddress(taskStartAddress), chainedBatchBuffer(chainedBatchBuffer),
|
||||
requiresCoherency(requiresCoherency), low_priority(lowPriority),
|
||||
throttle(throttle), sliceCount(sliceCount),
|
||||
usedSize(usedSize), stream(stream), endCmdPtr(endCmdPtr), useSingleSubdevice(useSingleSubdevice), hasStallingCmds(hasStallingCmds) {}
|
||||
usedSize(usedSize), stream(stream), endCmdPtr(endCmdPtr), useSingleSubdevice(useSingleSubdevice), hasStallingCmds(hasStallingCmds),
|
||||
hasRelaxedOrderingDependencies(hasRelaxedOrderingDependencies) {}
|
||||
|
||||
NEO::CommandBuffer::CommandBuffer(Device &device) : device(device) {
|
||||
flushStamp.reset(new FlushStampTracker(false));
|
||||
|
||||
@@ -32,7 +32,8 @@ struct BatchBuffer {
|
||||
LinearStream *stream,
|
||||
void *endCmdPtr,
|
||||
bool useSingleSubdevice,
|
||||
bool hasStallingCmds);
|
||||
bool hasStallingCmds,
|
||||
bool hasRelaxedOrderingDependencies);
|
||||
BatchBuffer() {}
|
||||
GraphicsAllocation *commandBufferAllocation = nullptr;
|
||||
size_t startOffset = 0u;
|
||||
@@ -52,6 +53,7 @@ struct BatchBuffer {
|
||||
|
||||
bool useSingleSubdevice = false;
|
||||
bool hasStallingCmds = false;
|
||||
bool hasRelaxedOrderingDependencies = false;
|
||||
bool ringBufferRestartRequest = false;
|
||||
};
|
||||
|
||||
|
||||
@@ -114,12 +114,12 @@ class DirectSubmissionHw {
|
||||
|
||||
void cpuCachelineFlush(void *ptr, size_t size);
|
||||
|
||||
void dispatchSemaphoreSection(uint32_t value, bool firstSubmission);
|
||||
size_t getSizeSemaphoreSection(bool firstSubmission);
|
||||
void dispatchSemaphoreSection(uint32_t value);
|
||||
size_t getSizeSemaphoreSection(bool relaxedOrderingSchedulerRequired);
|
||||
|
||||
void dispatchRelaxedOrderingSchedulerSection(uint32_t value);
|
||||
MOCKABLE_VIRTUAL void dispatchRelaxedOrderingSchedulerSection(uint32_t value);
|
||||
|
||||
void dispatchRelaxedOrderingReturnPtrRegs(LinearStream &cmdStream, uint64_t returnPtr);
|
||||
void dispatchRelaxedOrderingReturnPtrRegs(LinearStream &cmdStream, uint64_t returnPtr, bool hasRelaxedOrderingDependencies);
|
||||
|
||||
void dispatchStartSection(uint64_t gpuStartAddress);
|
||||
size_t getSizeStartSection();
|
||||
@@ -127,10 +127,10 @@ class DirectSubmissionHw {
|
||||
void dispatchSwitchRingBufferSection(uint64_t nextBufferGpuAddress);
|
||||
size_t getSizeSwitchRingBufferSection();
|
||||
|
||||
void dispatchRelaxedOrderingQueueStall();
|
||||
MOCKABLE_VIRTUAL void dispatchRelaxedOrderingQueueStall();
|
||||
size_t getSizeDispatchRelaxedOrderingQueueStall();
|
||||
|
||||
void dispatchTaskStoreSection(uint64_t taskStartSectionVa);
|
||||
MOCKABLE_VIRTUAL void dispatchTaskStoreSection(uint64_t taskStartSectionVa);
|
||||
MOCKABLE_VIRTUAL void preinitializeRelaxedOrderingSections();
|
||||
|
||||
void initRelaxedOrderingRegisters();
|
||||
@@ -138,7 +138,7 @@ class DirectSubmissionHw {
|
||||
void setReturnAddress(void *returnCmd, uint64_t returnAddress);
|
||||
|
||||
void *dispatchWorkloadSection(BatchBuffer &batchBuffer);
|
||||
size_t getSizeDispatch();
|
||||
size_t getSizeDispatch(bool relaxedOrderingSchedulerRequired);
|
||||
|
||||
void dispatchPrefetchMitigation();
|
||||
size_t getSizePrefetchMitigation();
|
||||
@@ -148,7 +148,7 @@ class DirectSubmissionHw {
|
||||
|
||||
MOCKABLE_VIRTUAL void dispatchStaticRelaxedOrderingScheduler();
|
||||
|
||||
size_t getSizeEnd();
|
||||
size_t getSizeEnd(bool relaxedOrderingSchedulerRequired);
|
||||
|
||||
void dispatchPartitionRegisterConfiguration();
|
||||
size_t getSizePartitionRegisterConfigurationSection();
|
||||
@@ -226,6 +226,6 @@ class DirectSubmissionHw {
|
||||
bool dcFlushRequired = false;
|
||||
bool relaxedOrderingEnabled = false;
|
||||
bool relaxedOrderingInitialized = false;
|
||||
bool firstSubmissionAfterRingStart = true;
|
||||
bool relaxedOrderingSchedulerRequired = false;
|
||||
};
|
||||
} // namespace NEO
|
||||
|
||||
@@ -403,7 +403,7 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::initialize(bool submitOnInit, bo
|
||||
initDiagnostic(submitOnInit);
|
||||
if (ret && submitOnInit) {
|
||||
size_t startBufferSize = Dispatcher::getSizePreemption() +
|
||||
getSizeSemaphoreSection(true);
|
||||
getSizeSemaphoreSection(false);
|
||||
|
||||
Dispatcher::dispatchPreemption(ringCommandStream);
|
||||
if (this->partitionedMode) {
|
||||
@@ -431,7 +431,7 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::initialize(bool submitOnInit, bo
|
||||
dispatchDiagnosticModeSection();
|
||||
startBufferSize += getDiagnosticModeSection();
|
||||
}
|
||||
dispatchSemaphoreSection(currentQueueWorkCount, true);
|
||||
dispatchSemaphoreSection(currentQueueWorkCount);
|
||||
|
||||
ringStart = submit(ringCommandStream.getGraphicsAllocation()->getGpuAddress(), startBufferSize);
|
||||
performDiagnosticMode();
|
||||
@@ -446,7 +446,7 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::startRingBuffer() {
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t startSize = getSizeSemaphoreSection(true);
|
||||
size_t startSize = getSizeSemaphoreSection(false);
|
||||
if (!this->partitionConfigSet) {
|
||||
startSize += getSizePartitionRegisterConfigurationSection();
|
||||
}
|
||||
@@ -457,7 +457,7 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::startRingBuffer() {
|
||||
startSize += RelaxedOrderingHelper::getSizeRegistersInit<GfxFamily>();
|
||||
}
|
||||
|
||||
size_t requiredSize = startSize + getSizeDispatch() + getSizeEnd();
|
||||
size_t requiredSize = startSize + getSizeDispatch(false) + getSizeEnd(false);
|
||||
if (ringCommandStream.getAvailableSpace() < requiredSize) {
|
||||
switchRingBuffers();
|
||||
}
|
||||
@@ -482,12 +482,10 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::startRingBuffer() {
|
||||
}
|
||||
|
||||
currentQueueWorkCount++;
|
||||
dispatchSemaphoreSection(currentQueueWorkCount, true);
|
||||
dispatchSemaphoreSection(currentQueueWorkCount);
|
||||
|
||||
ringStart = submit(gpuStartVa, startSize);
|
||||
|
||||
firstSubmissionAfterRingStart = true;
|
||||
|
||||
return ringStart;
|
||||
}
|
||||
|
||||
@@ -497,7 +495,8 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::stopRingBuffer() {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (this->relaxedOrderingEnabled && !firstSubmissionAfterRingStart) {
|
||||
bool relaxedOrderingSchedulerWasRequired = this->relaxedOrderingSchedulerRequired;
|
||||
if (this->relaxedOrderingEnabled && this->relaxedOrderingSchedulerRequired) {
|
||||
dispatchRelaxedOrderingQueueStall();
|
||||
}
|
||||
|
||||
@@ -515,7 +514,7 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::stopRingBuffer() {
|
||||
EncodeNoop<GfxFamily>::emitNoop(ringCommandStream, bytesToPad);
|
||||
EncodeNoop<GfxFamily>::alignToCacheLine(ringCommandStream);
|
||||
|
||||
cpuCachelineFlush(flushPtr, getSizeEnd());
|
||||
cpuCachelineFlush(flushPtr, getSizeEnd(relaxedOrderingSchedulerWasRequired));
|
||||
this->unblockGpu();
|
||||
cpuCachelineFlush(semaphorePtr, MemoryConstants::cacheLineSize);
|
||||
|
||||
@@ -526,13 +525,13 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::stopRingBuffer() {
|
||||
}
|
||||
|
||||
template <typename GfxFamily, typename Dispatcher>
|
||||
inline void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchSemaphoreSection(uint32_t value, bool firstSubmission) {
|
||||
inline void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchSemaphoreSection(uint32_t value) {
|
||||
using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT;
|
||||
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
|
||||
|
||||
dispatchDisablePrefetcher(true);
|
||||
|
||||
if (this->relaxedOrderingEnabled && !firstSubmission) {
|
||||
if (this->relaxedOrderingEnabled && this->relaxedOrderingSchedulerRequired) {
|
||||
dispatchRelaxedOrderingSchedulerSection(value);
|
||||
} else {
|
||||
EncodeSempahore<GfxFamily>::addMiSemaphoreWaitCommand(ringCommandStream,
|
||||
@@ -550,8 +549,8 @@ inline void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchSemaphoreSection(
|
||||
}
|
||||
|
||||
template <typename GfxFamily, typename Dispatcher>
|
||||
inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeSemaphoreSection(bool firstSubmission) {
|
||||
size_t semaphoreSize = (this->relaxedOrderingEnabled && !firstSubmission) ? RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::totalSize
|
||||
inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeSemaphoreSection(bool relaxedOrderingSchedulerRequired) {
|
||||
size_t semaphoreSize = (this->relaxedOrderingEnabled && relaxedOrderingSchedulerRequired) ? RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::totalSize
|
||||
: EncodeSempahore<GfxFamily>::getSizeMiSemaphoreWait();
|
||||
semaphoreSize += getSizePrefetchMitigation();
|
||||
|
||||
@@ -597,7 +596,7 @@ inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeSwitchRingBuffer
|
||||
}
|
||||
|
||||
template <typename GfxFamily, typename Dispatcher>
|
||||
inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeEnd() {
|
||||
inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeEnd(bool relaxedOrderingSchedulerRequired) {
|
||||
size_t size = Dispatcher::getSizeStopCommandBuffer() +
|
||||
Dispatcher::getSizeCacheFlush(*hwInfo) +
|
||||
(Dispatcher::getSizeStartCommandBuffer() - Dispatcher::getSizeStopCommandBuffer()) +
|
||||
@@ -605,15 +604,15 @@ inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeEnd() {
|
||||
if (disableMonitorFence) {
|
||||
size += Dispatcher::getSizeMonitorFence(*hwInfo);
|
||||
}
|
||||
if (this->relaxedOrderingEnabled) {
|
||||
if (this->relaxedOrderingEnabled && relaxedOrderingSchedulerRequired) {
|
||||
size += getSizeDispatchRelaxedOrderingQueueStall();
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
template <typename GfxFamily, typename Dispatcher>
|
||||
inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeDispatch() {
|
||||
size_t size = getSizeSemaphoreSection(false);
|
||||
inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeDispatch(bool relaxedOrderingSchedulerRequired) {
|
||||
size_t size = getSizeSemaphoreSection(relaxedOrderingSchedulerRequired);
|
||||
if (workloadMode == 0) {
|
||||
size += getSizeStartSection();
|
||||
if (this->relaxedOrderingEnabled) {
|
||||
@@ -673,7 +672,7 @@ void *DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchWorkloadSection(BatchBu
|
||||
uint64_t returnGpuPointer = ringCommandStream.getCurrentGpuAddressPosition();
|
||||
|
||||
if (this->relaxedOrderingEnabled) {
|
||||
dispatchRelaxedOrderingReturnPtrRegs(relaxedOrderingReturnPtrCmdStream, returnGpuPointer);
|
||||
dispatchRelaxedOrderingReturnPtrRegs(relaxedOrderingReturnPtrCmdStream, returnGpuPointer, batchBuffer.hasRelaxedOrderingDependencies);
|
||||
} else {
|
||||
setReturnAddress(returnCmd, returnGpuPointer);
|
||||
}
|
||||
@@ -683,7 +682,7 @@ void *DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchWorkloadSection(BatchBu
|
||||
}
|
||||
// mode 2 does not dispatch any commands
|
||||
|
||||
if (this->relaxedOrderingEnabled) {
|
||||
if (this->relaxedOrderingEnabled && batchBuffer.hasRelaxedOrderingDependencies) {
|
||||
dispatchTaskStoreSection(batchBuffer.taskStartAddress);
|
||||
}
|
||||
|
||||
@@ -698,7 +697,7 @@ void *DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchWorkloadSection(BatchBu
|
||||
this->useNotifyForPostSync, this->partitionedMode, this->dcFlushRequired);
|
||||
}
|
||||
|
||||
dispatchSemaphoreSection(currentQueueWorkCount + 1, false);
|
||||
dispatchSemaphoreSection(currentQueueWorkCount + 1);
|
||||
return currentPosition;
|
||||
}
|
||||
|
||||
@@ -708,25 +707,31 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchRelaxedOrderingQueueStal
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataRegBatchBufferStart());
|
||||
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R5, 1, true);
|
||||
dispatchSemaphoreSection(currentQueueWorkCount, false);
|
||||
dispatchSemaphoreSection(currentQueueWorkCount);
|
||||
|
||||
// patch conditional bb_start with current GPU address
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(bbStartStream, ringCommandStream.getCurrentGpuAddressPosition(),
|
||||
CS_GPR_R1, 0, CompareOperation::Equal, false);
|
||||
|
||||
relaxedOrderingSchedulerRequired = false;
|
||||
}
|
||||
|
||||
template <typename GfxFamily, typename Dispatcher>
|
||||
size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeDispatchRelaxedOrderingQueueStall() {
|
||||
return getSizeSemaphoreSection(false) + sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM) +
|
||||
return getSizeSemaphoreSection(true) + sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM) +
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataRegBatchBufferStart();
|
||||
}
|
||||
|
||||
template <typename GfxFamily, typename Dispatcher>
|
||||
void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchRelaxedOrderingReturnPtrRegs(LinearStream &cmdStream, uint64_t returnPtr) {
|
||||
void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchRelaxedOrderingReturnPtrRegs(LinearStream &cmdStream, uint64_t returnPtr, bool hasRelaxedOrderingDependencies) {
|
||||
LriHelper<GfxFamily>::program(&cmdStream, CS_GPR_R4, static_cast<uint32_t>(returnPtr & 0xFFFF'FFFFULL), true);
|
||||
LriHelper<GfxFamily>::program(&cmdStream, CS_GPR_R4 + 4, static_cast<uint32_t>(returnPtr >> 32), true);
|
||||
|
||||
uint64_t returnPtrAfterTaskStoreSection = returnPtr + RelaxedOrderingHelper::getSizeTaskStoreSection<GfxFamily>();
|
||||
uint64_t returnPtrAfterTaskStoreSection = returnPtr;
|
||||
|
||||
if (hasRelaxedOrderingDependencies) {
|
||||
returnPtrAfterTaskStoreSection += RelaxedOrderingHelper::getSizeTaskStoreSection<GfxFamily>();
|
||||
}
|
||||
|
||||
LriHelper<GfxFamily>::program(&cmdStream, CS_GPR_R3, static_cast<uint32_t>(returnPtrAfterTaskStoreSection & 0xFFFF'FFFFULL), true);
|
||||
LriHelper<GfxFamily>::program(&cmdStream, CS_GPR_R3 + 4, static_cast<uint32_t>(returnPtrAfterTaskStoreSection >> 32), true);
|
||||
@@ -846,24 +851,32 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchCommandBuffer(BatchBuffe
|
||||
|
||||
this->startRingBuffer();
|
||||
|
||||
size_t dispatchSize = getSizeDispatch();
|
||||
bool relaxedOrderingSchedulerWillBeNeeded = (this->relaxedOrderingSchedulerRequired || batchBuffer.hasRelaxedOrderingDependencies);
|
||||
|
||||
size_t dispatchSize = getSizeDispatch(relaxedOrderingSchedulerWillBeNeeded);
|
||||
size_t cycleSize = getSizeSwitchRingBufferSection();
|
||||
size_t requiredMinimalSize = dispatchSize + cycleSize + getSizeEnd();
|
||||
size_t requiredMinimalSize = dispatchSize + cycleSize + getSizeEnd(relaxedOrderingSchedulerWillBeNeeded);
|
||||
if (this->relaxedOrderingEnabled) {
|
||||
if (batchBuffer.hasStallingCmds && !firstSubmissionAfterRingStart) {
|
||||
requiredMinimalSize += +RelaxedOrderingHelper::getSizeReturnPtrRegs<GfxFamily>();
|
||||
|
||||
if (batchBuffer.hasStallingCmds && this->relaxedOrderingSchedulerRequired) {
|
||||
requiredMinimalSize += getSizeDispatchRelaxedOrderingQueueStall();
|
||||
}
|
||||
requiredMinimalSize += RelaxedOrderingHelper::getSizeTaskStoreSection<GfxFamily>() + RelaxedOrderingHelper::getSizeReturnPtrRegs<GfxFamily>();
|
||||
if (batchBuffer.hasRelaxedOrderingDependencies) {
|
||||
requiredMinimalSize += RelaxedOrderingHelper::getSizeTaskStoreSection<GfxFamily>();
|
||||
}
|
||||
}
|
||||
|
||||
if (ringCommandStream.getAvailableSpace() < requiredMinimalSize) {
|
||||
switchRingBuffers();
|
||||
}
|
||||
|
||||
if (this->relaxedOrderingEnabled && batchBuffer.hasStallingCmds && !firstSubmissionAfterRingStart) {
|
||||
if (this->relaxedOrderingEnabled && batchBuffer.hasStallingCmds && this->relaxedOrderingSchedulerRequired) {
|
||||
dispatchRelaxedOrderingQueueStall();
|
||||
}
|
||||
|
||||
this->relaxedOrderingSchedulerRequired |= batchBuffer.hasRelaxedOrderingDependencies;
|
||||
|
||||
handleNewResourcesSubmission();
|
||||
|
||||
void *currentPosition = dispatchWorkloadSection(batchBuffer);
|
||||
@@ -890,8 +903,6 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchCommandBuffer(BatchBuffe
|
||||
uint64_t flushValue = updateTagValue();
|
||||
flushStamp.setStamp(flushValue);
|
||||
|
||||
firstSubmissionAfterRingStart = false;
|
||||
|
||||
return ringStart;
|
||||
}
|
||||
|
||||
|
||||
@@ -26,7 +26,8 @@ struct BatchBufferHelper {
|
||||
stream, // stream
|
||||
nullptr, // endCmdPtr
|
||||
false, // useSingleSubdevice
|
||||
false // hasStallingCmds
|
||||
false, // hasStallingCmds
|
||||
false // hasRelaxedOrderingDependencies
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -43,7 +43,8 @@ struct DispatchFlagsHelper {
|
||||
false, // areMultipleSubDevicesInContext
|
||||
false, // memoryMigrationRequired
|
||||
false, // textureCacheFlush
|
||||
false // hasStallingCmds
|
||||
false, // hasStallingCmds
|
||||
false // hasRelaxedOrderingDependencies
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -32,6 +32,7 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw<GfxFamily, Dispatcher>
|
||||
using BaseClass::dispatchDisablePrefetcher;
|
||||
using BaseClass::dispatchPartitionRegisterConfiguration;
|
||||
using BaseClass::dispatchPrefetchMitigation;
|
||||
using BaseClass::dispatchRelaxedOrderingReturnPtrRegs;
|
||||
using BaseClass::dispatchSemaphoreSection;
|
||||
using BaseClass::dispatchStartSection;
|
||||
using BaseClass::dispatchSwitchRingBufferSection;
|
||||
@@ -39,6 +40,7 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw<GfxFamily, Dispatcher>
|
||||
using BaseClass::getDiagnosticModeSection;
|
||||
using BaseClass::getSizeDisablePrefetcher;
|
||||
using BaseClass::getSizeDispatch;
|
||||
using BaseClass::getSizeDispatchRelaxedOrderingQueueStall;
|
||||
using BaseClass::getSizeEnd;
|
||||
using BaseClass::getSizePartitionRegisterConfigurationSection;
|
||||
using BaseClass::getSizePrefetchMitigation;
|
||||
@@ -47,6 +49,7 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw<GfxFamily, Dispatcher>
|
||||
using BaseClass::getSizeSwitchRingBufferSection;
|
||||
using BaseClass::getSizeSystemMemoryFenceAddress;
|
||||
using BaseClass::hwInfo;
|
||||
using BaseClass::isDisablePrefetcherRequired;
|
||||
using BaseClass::miMemFenceRequired;
|
||||
using BaseClass::osContext;
|
||||
using BaseClass::partitionConfigSet;
|
||||
@@ -57,6 +60,7 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw<GfxFamily, Dispatcher>
|
||||
using BaseClass::preinitializedTaskStoreSection;
|
||||
using BaseClass::relaxedOrderingInitialized;
|
||||
using BaseClass::relaxedOrderingSchedulerAllocation;
|
||||
using BaseClass::relaxedOrderingSchedulerRequired;
|
||||
using BaseClass::reserved;
|
||||
using BaseClass::ringBuffers;
|
||||
using BaseClass::ringCommandStream;
|
||||
@@ -98,6 +102,21 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw<GfxFamily, Dispatcher>
|
||||
BaseClass::dispatchStaticRelaxedOrderingScheduler();
|
||||
}
|
||||
|
||||
void dispatchRelaxedOrderingSchedulerSection(uint32_t value) override {
|
||||
dispatchRelaxedOrderingSchedulerSectionCalled++;
|
||||
BaseClass::dispatchRelaxedOrderingSchedulerSection(value);
|
||||
}
|
||||
|
||||
void dispatchRelaxedOrderingQueueStall() override {
|
||||
dispatchRelaxedOrderingQueueStallCalled++;
|
||||
BaseClass::dispatchRelaxedOrderingQueueStall();
|
||||
}
|
||||
|
||||
void dispatchTaskStoreSection(uint64_t taskStartSectionVa) override {
|
||||
dispatchTaskStoreSectionCalled++;
|
||||
BaseClass::dispatchTaskStoreSection(taskStartSectionVa);
|
||||
}
|
||||
|
||||
bool makeResourcesResident(DirectSubmissionAllocations &allocations) override {
|
||||
makeResourcesResidentVectorSize = static_cast<uint32_t>(allocations.size());
|
||||
if (callBaseResident) {
|
||||
@@ -155,6 +174,9 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw<GfxFamily, Dispatcher>
|
||||
uint32_t disabledDiagnosticCalled = 0u;
|
||||
uint32_t preinitializeRelaxedOrderingSectionsCalled = 0;
|
||||
uint32_t dispatchStaticRelaxedOrderingSchedulerCalled = 0;
|
||||
uint32_t dispatchRelaxedOrderingSchedulerSectionCalled = 0;
|
||||
uint32_t dispatchRelaxedOrderingQueueStallCalled = 0;
|
||||
uint32_t dispatchTaskStoreSectionCalled = 0;
|
||||
uint32_t makeResourcesResidentVectorSize = 0u;
|
||||
bool allocateOsResourcesReturn = true;
|
||||
bool submitReturn = true;
|
||||
|
||||
@@ -592,7 +592,9 @@ HWTEST_F(AubFileStreamTests, givenAubCommandStreamReceiverWhenFlushIsCalledThenI
|
||||
aubCsr.initializeTagAllocation();
|
||||
|
||||
LinearStream cs(commandBuffer);
|
||||
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 1, 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false, false};
|
||||
BatchBuffer batchBuffer = BatchBufferHelper::createDefaultBatchBuffer(cs.getGraphicsAllocation(), &cs, cs.getUsed());
|
||||
batchBuffer.startOffset = 1;
|
||||
|
||||
ResidencyContainer allocationsForResidency;
|
||||
|
||||
aubCsr.flush(batchBuffer, allocationsForResidency);
|
||||
|
||||
@@ -95,6 +95,6 @@ struct ComputeModeRequirements : public ::testing::Test {
|
||||
|
||||
CommandStreamReceiver *csr = nullptr;
|
||||
std::unique_ptr<MockDevice> device;
|
||||
DispatchFlags flags{{}, nullptr, {}, nullptr, QueueThrottle::MEDIUM, PreemptionMode::Disabled, GrfConfig::DefaultGrfNumber, L3CachingSettings::l3CacheOn, ThreadArbitrationPolicy::NotPresent, AdditionalKernelExecInfo::NotApplicable, KernelExecutionType::NotApplicable, MemoryCompressionState::NotApplicable, QueueSliceCount::defaultSliceCount, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false};
|
||||
DispatchFlags flags{{}, nullptr, {}, nullptr, QueueThrottle::MEDIUM, PreemptionMode::Disabled, GrfConfig::DefaultGrfNumber, L3CachingSettings::l3CacheOn, ThreadArbitrationPolicy::NotPresent, AdditionalKernelExecInfo::NotApplicable, KernelExecutionType::NotApplicable, MemoryCompressionState::NotApplicable, QueueSliceCount::defaultSliceCount, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false};
|
||||
GraphicsAllocation *alloc = nullptr;
|
||||
};
|
||||
|
||||
@@ -467,7 +467,9 @@ HWTEST_F(TbxCommandStreamTests, givenTbxCommandStreamReceiverWhenFlushIsCalledTh
|
||||
auto commandBuffer = pDevice->executionEnvironment->memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize, pDevice->getDeviceBitfield()});
|
||||
|
||||
LinearStream cs(commandBuffer);
|
||||
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 1, 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false, false};
|
||||
BatchBuffer batchBuffer = BatchBufferHelper::createDefaultBatchBuffer(cs.getGraphicsAllocation(), &cs, cs.getUsed());
|
||||
batchBuffer.startOffset = 1;
|
||||
|
||||
MockGraphicsAllocation allocation(reinterpret_cast<void *>(0x1000), 0x1000);
|
||||
ResidencyContainer allocationsForResidency = {&allocation};
|
||||
|
||||
@@ -493,7 +495,8 @@ HWTEST_F(TbxCommandStreamTests, givenTbxCommandStreamReceiverInBatchedModeWhenFl
|
||||
auto commandBuffer = pDevice->executionEnvironment->memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize, pDevice->getDeviceBitfield()});
|
||||
|
||||
LinearStream cs(commandBuffer);
|
||||
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 1, 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false, false};
|
||||
BatchBuffer batchBuffer = BatchBufferHelper::createDefaultBatchBuffer(cs.getGraphicsAllocation(), &cs, cs.getUsed());
|
||||
batchBuffer.startOffset = 1;
|
||||
ResidencyContainer allocationsForResidency;
|
||||
|
||||
tbxCsr.flush(batchBuffer, allocationsForResidency);
|
||||
|
||||
@@ -380,7 +380,7 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionStopWhenStopRingIsCalledThen
|
||||
|
||||
directSubmission.stopRingBuffer();
|
||||
|
||||
size_t expectedDispatchSize = alreadyDispatchedSize + directSubmission.getSizeEnd();
|
||||
size_t expectedDispatchSize = alreadyDispatchedSize + directSubmission.getSizeEnd(false);
|
||||
EXPECT_LE(directSubmission.ringCommandStream.getUsed(), expectedDispatchSize);
|
||||
EXPECT_GE(directSubmission.ringCommandStream.getUsed() + MemoryConstants::cacheLineSize, expectedDispatchSize);
|
||||
EXPECT_EQ(oldQueueCount + 1, directSubmission.semaphoreData->QueueWorkCount);
|
||||
@@ -393,7 +393,7 @@ HWTEST_F(DirectSubmissionTest,
|
||||
using Dispatcher = RenderDispatcher<FamilyType>;
|
||||
|
||||
MockDirectSubmissionHw<FamilyType, Dispatcher> regularDirectSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
size_t regularSizeEnd = regularDirectSubmission.getSizeEnd();
|
||||
size_t regularSizeEnd = regularDirectSubmission.getSizeEnd(false);
|
||||
|
||||
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
|
||||
@@ -405,7 +405,7 @@ HWTEST_F(DirectSubmissionTest,
|
||||
|
||||
size_t tagUpdateSize = Dispatcher::getSizeMonitorFence(*directSubmission.hwInfo);
|
||||
|
||||
size_t disabledSizeEnd = directSubmission.getSizeEnd();
|
||||
size_t disabledSizeEnd = directSubmission.getSizeEnd(false);
|
||||
EXPECT_EQ(disabledSizeEnd, regularSizeEnd + tagUpdateSize);
|
||||
|
||||
directSubmission.tagValueSetValue = 0x4343123ull;
|
||||
@@ -441,7 +441,7 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWhenDispatchSemaphoreThenExp
|
||||
bool ret = directSubmission.initialize(false, false);
|
||||
EXPECT_TRUE(ret);
|
||||
|
||||
directSubmission.dispatchSemaphoreSection(1u, false);
|
||||
directSubmission.dispatchSemaphoreSection(1u);
|
||||
EXPECT_EQ(directSubmission.getSizeSemaphoreSection(false), directSubmission.ringCommandStream.getUsed());
|
||||
}
|
||||
|
||||
@@ -510,7 +510,7 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWhenGetDispatchSizeThenExpec
|
||||
Dispatcher::getSizeCacheFlush(*directSubmission.hwInfo) +
|
||||
Dispatcher::getSizeMonitorFence(*directSubmission.hwInfo) +
|
||||
directSubmission.getSizeSemaphoreSection(false);
|
||||
size_t actualSize = directSubmission.getSizeDispatch();
|
||||
size_t actualSize = directSubmission.getSizeDispatch(false);
|
||||
EXPECT_EQ(expectedSize, actualSize);
|
||||
}
|
||||
|
||||
@@ -526,7 +526,7 @@ HWTEST_F(DirectSubmissionTest,
|
||||
Dispatcher::getSizeCacheFlush(*directSubmission.hwInfo) +
|
||||
Dispatcher::getSizeMonitorFence(*directSubmission.hwInfo) +
|
||||
directSubmission.getSizeSemaphoreSection(false);
|
||||
size_t actualSize = directSubmission.getSizeDispatch();
|
||||
size_t actualSize = directSubmission.getSizeDispatch(false);
|
||||
EXPECT_EQ(expectedSize, actualSize);
|
||||
}
|
||||
|
||||
@@ -541,7 +541,7 @@ HWTEST_F(DirectSubmissionTest,
|
||||
size_t expectedSize = Dispatcher::getSizeCacheFlush(*directSubmission.hwInfo) +
|
||||
Dispatcher::getSizeMonitorFence(*directSubmission.hwInfo) +
|
||||
directSubmission.getSizeSemaphoreSection(false);
|
||||
size_t actualSize = directSubmission.getSizeDispatch();
|
||||
size_t actualSize = directSubmission.getSizeDispatch(false);
|
||||
EXPECT_EQ(expectedSize, actualSize);
|
||||
}
|
||||
|
||||
@@ -554,7 +554,7 @@ HWTEST_F(DirectSubmissionTest,
|
||||
size_t expectedSize = directSubmission.getSizeStartSection() +
|
||||
Dispatcher::getSizeMonitorFence(*directSubmission.hwInfo) +
|
||||
directSubmission.getSizeSemaphoreSection(false);
|
||||
size_t actualSize = directSubmission.getSizeDispatch();
|
||||
size_t actualSize = directSubmission.getSizeDispatch(false);
|
||||
EXPECT_EQ(expectedSize, actualSize);
|
||||
}
|
||||
|
||||
@@ -568,7 +568,7 @@ HWTEST_F(DirectSubmissionTest,
|
||||
size_t expectedSize = directSubmission.getSizeStartSection() +
|
||||
Dispatcher::getSizeCacheFlush(*directSubmission.hwInfo) +
|
||||
directSubmission.getSizeSemaphoreSection(false);
|
||||
size_t actualSize = directSubmission.getSizeDispatch();
|
||||
size_t actualSize = directSubmission.getSizeDispatch(false);
|
||||
EXPECT_EQ(expectedSize, actualSize);
|
||||
}
|
||||
|
||||
@@ -581,7 +581,7 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWhenGetEndSizeThenExpectCorr
|
||||
Dispatcher::getSizeCacheFlush(*directSubmission.hwInfo) +
|
||||
(Dispatcher::getSizeStartCommandBuffer() - Dispatcher::getSizeStopCommandBuffer()) +
|
||||
MemoryConstants::cacheLineSize;
|
||||
size_t actualSize = directSubmission.getSizeEnd();
|
||||
size_t actualSize = directSubmission.getSizeEnd(false);
|
||||
EXPECT_EQ(expectedSize, actualSize);
|
||||
}
|
||||
|
||||
@@ -872,7 +872,7 @@ HWTEST_F(DirectSubmissionTest,
|
||||
size_t expectedSize = Dispatcher::getSizePreemption() +
|
||||
directSubmission.getSizeSemaphoreSection(false) +
|
||||
directSubmission.getDiagnosticModeSection();
|
||||
expectedSize += expectedExecCount * directSubmission.getSizeDispatch();
|
||||
expectedSize += expectedExecCount * directSubmission.getSizeDispatch(false);
|
||||
|
||||
if (directSubmission.miMemFenceRequired) {
|
||||
expectedSize += directSubmission.getSizeSystemMemoryFenceAddress();
|
||||
@@ -969,7 +969,7 @@ HWTEST_F(DirectSubmissionTest,
|
||||
size_t expectedSize = Dispatcher::getSizePreemption() +
|
||||
directSubmission.getSizeSemaphoreSection(false);
|
||||
size_t expectedDispatch = directSubmission.getSizeSemaphoreSection(false);
|
||||
EXPECT_EQ(expectedDispatch, directSubmission.getSizeDispatch());
|
||||
EXPECT_EQ(expectedDispatch, directSubmission.getSizeDispatch(false));
|
||||
expectedSize += expectedExecCount * expectedDispatch;
|
||||
|
||||
if (directSubmission.miMemFenceRequired) {
|
||||
|
||||
@@ -224,7 +224,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DirectSubmissionDispatchBufferTest,
|
||||
EXPECT_EQ(1u, directSubmission.submitCount);
|
||||
EXPECT_EQ(2u, directSubmission.handleResidencyCount);
|
||||
|
||||
EXPECT_EQ(directSubmission.getSizeDispatch(), directSubmission.ringCommandStream.getUsed());
|
||||
EXPECT_EQ(directSubmission.getSizeDispatch(false), directSubmission.ringCommandStream.getUsed());
|
||||
EXPECT_TRUE(directSubmission.ringStart);
|
||||
|
||||
HardwareParse hwParse;
|
||||
@@ -257,7 +257,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
|
||||
DebugManager.flags.DirectSubmissionDisableCacheFlush.set(0);
|
||||
|
||||
MockDirectSubmissionHw<FamilyType, Dispatcher> regularDirectSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
size_t regularSizeDispatch = regularDirectSubmission.getSizeDispatch();
|
||||
size_t regularSizeDispatch = regularDirectSubmission.getSizeDispatch(false);
|
||||
|
||||
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
directSubmission.disableMonitorFence = true;
|
||||
@@ -267,7 +267,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
|
||||
|
||||
size_t tagUpdateSize = Dispatcher::getSizeMonitorFence(*directSubmission.hwInfo);
|
||||
|
||||
size_t disabledSizeDispatch = directSubmission.getSizeDispatch();
|
||||
size_t disabledSizeDispatch = directSubmission.getSizeDispatch(false);
|
||||
EXPECT_EQ(disabledSizeDispatch, (regularSizeDispatch - tagUpdateSize));
|
||||
|
||||
directSubmission.tagValueSetValue = 0x4343123ull;
|
||||
@@ -306,7 +306,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
|
||||
DebugManager.flags.DirectSubmissionDisableCacheFlush.set(0);
|
||||
|
||||
MockDirectSubmissionHw<FamilyType, Dispatcher> regularDirectSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
size_t regularSizeDispatch = regularDirectSubmission.getSizeDispatch();
|
||||
size_t regularSizeDispatch = regularDirectSubmission.getSizeDispatch(false);
|
||||
|
||||
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
|
||||
@@ -316,7 +316,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
|
||||
|
||||
size_t flushSize = Dispatcher::getSizeCacheFlush(*directSubmission.hwInfo);
|
||||
|
||||
size_t disabledSizeDispatch = directSubmission.getSizeDispatch();
|
||||
size_t disabledSizeDispatch = directSubmission.getSizeDispatch(false);
|
||||
EXPECT_EQ(disabledSizeDispatch, (regularSizeDispatch - flushSize));
|
||||
|
||||
directSubmission.dispatchWorkloadSection(batchBuffer);
|
||||
@@ -354,7 +354,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
|
||||
|
||||
MockDirectSubmissionHw<FamilyType, Dispatcher> regularDirectSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
|
||||
size_t regularSizeDispatch = regularDirectSubmission.getSizeDispatch();
|
||||
size_t regularSizeDispatch = regularDirectSubmission.getSizeDispatch(false);
|
||||
|
||||
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
|
||||
@@ -365,7 +365,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
|
||||
size_t startSize = directSubmission.getSizeStartSection();
|
||||
size_t storeDataSize = Dispatcher::getSizeStoreDwordCommand();
|
||||
|
||||
size_t debugSizeDispatch = directSubmission.getSizeDispatch();
|
||||
size_t debugSizeDispatch = directSubmission.getSizeDispatch(false);
|
||||
EXPECT_EQ(debugSizeDispatch, (regularSizeDispatch - startSize + storeDataSize));
|
||||
|
||||
directSubmission.workloadModeOneExpectedValue = 0x40u;
|
||||
@@ -398,7 +398,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
|
||||
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
|
||||
|
||||
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> regularDirectSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
size_t regularSizeDispatch = regularDirectSubmission.getSizeDispatch();
|
||||
size_t regularSizeDispatch = regularDirectSubmission.getSizeDispatch(false);
|
||||
|
||||
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
|
||||
@@ -408,7 +408,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
|
||||
|
||||
size_t startSize = directSubmission.getSizeStartSection();
|
||||
|
||||
size_t debugSizeDispatch = directSubmission.getSizeDispatch();
|
||||
size_t debugSizeDispatch = directSubmission.getSizeDispatch(false);
|
||||
EXPECT_EQ(debugSizeDispatch, (regularSizeDispatch - startSize));
|
||||
|
||||
directSubmission.currentQueueWorkCount = 0x40u;
|
||||
@@ -463,7 +463,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
|
||||
EXPECT_EQ(1u, directSubmission.submitCount);
|
||||
EXPECT_EQ(2u, directSubmission.handleResidencyCount);
|
||||
|
||||
EXPECT_EQ(sizeUsed + directSubmission.getSizeDispatch(), directSubmission.ringCommandStream.getUsed());
|
||||
EXPECT_EQ(sizeUsed + directSubmission.getSizeDispatch(false), directSubmission.ringCommandStream.getUsed());
|
||||
EXPECT_TRUE(directSubmission.ringStart);
|
||||
|
||||
HardwareParse hwParse;
|
||||
@@ -502,7 +502,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
|
||||
EXPECT_EQ(oldRingAllocation->getGpuAddress(), directSubmission.submitGpuAddress);
|
||||
EXPECT_EQ(2u, directSubmission.handleResidencyCount);
|
||||
|
||||
size_t dispatchSize = submitSize + directSubmission.getSizeDispatch();
|
||||
size_t dispatchSize = submitSize + directSubmission.getSizeDispatch(false);
|
||||
|
||||
EXPECT_EQ(dispatchSize, directSubmission.ringCommandStream.getUsed());
|
||||
EXPECT_TRUE(directSubmission.ringStart);
|
||||
@@ -542,7 +542,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
|
||||
EXPECT_EQ(1u, directSubmission.submitCount);
|
||||
EXPECT_EQ(2u, directSubmission.handleResidencyCount);
|
||||
|
||||
EXPECT_EQ(directSubmission.getSizeDispatch(), directSubmission.ringCommandStream.getUsed());
|
||||
EXPECT_EQ(directSubmission.getSizeDispatch(false), directSubmission.ringCommandStream.getUsed());
|
||||
EXPECT_TRUE(directSubmission.ringStart);
|
||||
}
|
||||
|
||||
@@ -576,7 +576,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
|
||||
EXPECT_EQ(submitSize, directSubmission.submitSize);
|
||||
EXPECT_EQ(2u, directSubmission.handleResidencyCount);
|
||||
|
||||
size_t dispatchSize = submitSize + directSubmission.getSizeDispatch();
|
||||
size_t dispatchSize = submitSize + directSubmission.getSizeDispatch(false);
|
||||
|
||||
EXPECT_EQ(dispatchSize, directSubmission.ringCommandStream.getUsed());
|
||||
EXPECT_TRUE(directSubmission.ringStart);
|
||||
@@ -949,6 +949,7 @@ struct DirectSubmissionRelaxedOrderingTests : public DirectSubmissionDispatchBuf
|
||||
bool verifyBbStart(typename FamilyType::MI_BATCH_BUFFER_START *cmd, uint64_t startAddress, bool indirect, bool predicate);
|
||||
|
||||
DebugManagerStateRestore restore;
|
||||
FlushStampTracker flushStamp{true};
|
||||
};
|
||||
|
||||
template <typename FamilyType>
|
||||
@@ -1668,7 +1669,6 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, whenInitializingThenDispatchStat
|
||||
directSubmission.startRingBuffer();
|
||||
EXPECT_EQ(1u, directSubmission.dispatchStaticRelaxedOrderingSchedulerCalled);
|
||||
|
||||
FlushStampTracker flushStamp(true);
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
EXPECT_EQ(1u, directSubmission.dispatchStaticRelaxedOrderingSchedulerCalled);
|
||||
}
|
||||
@@ -1773,7 +1773,7 @@ HWTEST_F(DirectSubmissionRelaxedOrderingTests, whenDispatchingWorkThenDispatchTa
|
||||
directSubmission.initialize(true, false);
|
||||
auto offset = directSubmission.ringCommandStream.getUsed() + directSubmission.getSizeStartSection() + RelaxedOrderingHelper::getSizeReturnPtrRegs<FamilyType>();
|
||||
|
||||
FlushStampTracker flushStamp(true);
|
||||
batchBuffer.hasRelaxedOrderingDependencies = true;
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
|
||||
auto taskStoreSection = ptrOffset(directSubmission.ringCommandStream.getCpuBase(), offset);
|
||||
@@ -1840,13 +1840,12 @@ HWTEST_F(DirectSubmissionRelaxedOrderingTests, givenNotEnoughSpaceForTaskStoreSe
|
||||
directSubmission.ringCommandStream.getUsed();
|
||||
|
||||
auto sizeToConsume = directSubmission.ringCommandStream.getAvailableSpace() -
|
||||
(directSubmission.getSizeDispatch() + directSubmission.getSizeEnd() + directSubmission.getSizeSwitchRingBufferSection());
|
||||
(directSubmission.getSizeDispatch(false) + directSubmission.getSizeEnd(false) + directSubmission.getSizeSwitchRingBufferSection());
|
||||
|
||||
directSubmission.ringCommandStream.getSpace(sizeToConsume);
|
||||
|
||||
auto oldAllocation = directSubmission.ringCommandStream.getGraphicsAllocation();
|
||||
|
||||
FlushStampTracker flushStamp(true);
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
|
||||
EXPECT_NE(oldAllocation, directSubmission.ringCommandStream.getGraphicsAllocation());
|
||||
@@ -1867,7 +1866,7 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, whenDispatchingWorkThenDispatchS
|
||||
|
||||
EXPECT_FALSE(verifyDynamicSchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, 0, endOffset));
|
||||
|
||||
FlushStampTracker flushStamp(true);
|
||||
batchBuffer.hasRelaxedOrderingDependencies = true;
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
|
||||
EXPECT_TRUE(verifyDynamicSchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, offset, endOffset));
|
||||
@@ -1896,8 +1895,8 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenBbWithStallingCmdsWhenDispa
|
||||
|
||||
EXPECT_FALSE(verifyDynamicSchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, 0, endOffset));
|
||||
|
||||
FlushStampTracker flushStamp(true);
|
||||
batchBuffer.hasStallingCmds = false;
|
||||
batchBuffer.hasRelaxedOrderingDependencies = true;
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
|
||||
EXPECT_TRUE(verifyDynamicSchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, offset, endOffset));
|
||||
@@ -1909,7 +1908,7 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenBbWithStallingCmdsWhenDispa
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
|
||||
auto startAddress = ptrOffset(directSubmission.ringCommandStream.getCpuBase(), offset);
|
||||
auto jumpOffset = directSubmission.getSizeSemaphoreSection(false) + sizeof(typename FamilyType::MI_LOAD_REGISTER_IMM) +
|
||||
auto jumpOffset = directSubmission.getSizeSemaphoreSection(true) + sizeof(typename FamilyType::MI_LOAD_REGISTER_IMM) +
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart();
|
||||
uint64_t expectedJumpAddress = directSubmission.ringCommandStream.getGpuBase() + offset + jumpOffset;
|
||||
|
||||
@@ -1958,7 +1957,6 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenFirstBbWithStallingCmdsWhen
|
||||
|
||||
EXPECT_FALSE(verifyDynamicSchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, 0, endOffset));
|
||||
|
||||
FlushStampTracker flushStamp(true);
|
||||
batchBuffer.hasStallingCmds = true;
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
|
||||
@@ -1999,8 +1997,8 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, whenStoppingRingThenProgramSched
|
||||
|
||||
EXPECT_FALSE(verifyDynamicSchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, 0, endOffset));
|
||||
|
||||
FlushStampTracker flushStamp(true);
|
||||
batchBuffer.hasStallingCmds = false;
|
||||
batchBuffer.hasRelaxedOrderingDependencies = true;
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
|
||||
EXPECT_TRUE(verifyDynamicSchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, offset, endOffset));
|
||||
@@ -2011,7 +2009,7 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, whenStoppingRingThenProgramSched
|
||||
directSubmission.stopRingBuffer();
|
||||
|
||||
auto startAddress = ptrOffset(directSubmission.ringCommandStream.getCpuBase(), offset);
|
||||
auto jumpOffset = directSubmission.getSizeSemaphoreSection(false) + sizeof(typename FamilyType::MI_LOAD_REGISTER_IMM) +
|
||||
auto jumpOffset = directSubmission.getSizeSemaphoreSection(true) + sizeof(typename FamilyType::MI_LOAD_REGISTER_IMM) +
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart();
|
||||
uint64_t expectedJumpAddress = directSubmission.ringCommandStream.getGpuBase() + offset + jumpOffset;
|
||||
|
||||
@@ -2122,6 +2120,7 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, whenDispatchingWorkloadSectionTh
|
||||
|
||||
auto originalBbStart = *reinterpret_cast<MI_BATCH_BUFFER_START *>(batchBuffer.endCmdPtr);
|
||||
|
||||
batchBuffer.hasRelaxedOrderingDependencies = true;
|
||||
directSubmission.dispatchWorkloadSection(batchBuffer);
|
||||
|
||||
uint64_t returnPtr = directSubmission.ringCommandStream.getGpuBase() + offset + (4 * sizeof(MI_LOAD_REGISTER_IMM)) + directSubmission.getSizeStartSection();
|
||||
@@ -2136,3 +2135,411 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, whenDispatchingWorkloadSectionTh
|
||||
|
||||
EXPECT_EQ(0, memcmp(&originalBbStart, batchBuffer.endCmdPtr, sizeof(MI_BATCH_BUFFER_START)));
|
||||
}
|
||||
|
||||
HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenBbWithStallingCmdsAndDependenciesWhenDispatchingNextCmdBufferThenProgramSchedulerIfNeeded, IsAtLeastXeHpcCore) {
|
||||
{
|
||||
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
directSubmission.initialize(true, false);
|
||||
|
||||
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||
EXPECT_EQ(0u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||
|
||||
batchBuffer.hasStallingCmds = true;
|
||||
batchBuffer.hasRelaxedOrderingDependencies = true;
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
EXPECT_EQ(1u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||
EXPECT_EQ(1u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||
|
||||
batchBuffer.hasStallingCmds = false;
|
||||
batchBuffer.hasRelaxedOrderingDependencies = true;
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
EXPECT_EQ(2u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||
EXPECT_EQ(2u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||
}
|
||||
|
||||
{
|
||||
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
directSubmission.initialize(true, false);
|
||||
|
||||
batchBuffer.hasStallingCmds = true;
|
||||
batchBuffer.hasRelaxedOrderingDependencies = true;
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
|
||||
batchBuffer.hasStallingCmds = true;
|
||||
batchBuffer.hasRelaxedOrderingDependencies = true;
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
EXPECT_EQ(3u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||
EXPECT_EQ(1u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||
EXPECT_EQ(2u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||
}
|
||||
|
||||
{
|
||||
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
directSubmission.initialize(true, false);
|
||||
|
||||
batchBuffer.hasStallingCmds = true;
|
||||
batchBuffer.hasRelaxedOrderingDependencies = true;
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
|
||||
batchBuffer.hasStallingCmds = true;
|
||||
batchBuffer.hasRelaxedOrderingDependencies = false;
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
EXPECT_EQ(2u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||
EXPECT_EQ(1u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||
EXPECT_EQ(1u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||
}
|
||||
|
||||
{
|
||||
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
directSubmission.initialize(true, false);
|
||||
|
||||
batchBuffer.hasStallingCmds = true;
|
||||
batchBuffer.hasRelaxedOrderingDependencies = true;
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
|
||||
batchBuffer.hasStallingCmds = false;
|
||||
batchBuffer.hasRelaxedOrderingDependencies = false;
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
EXPECT_EQ(2u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||
EXPECT_EQ(1u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||
}
|
||||
|
||||
{
|
||||
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
directSubmission.initialize(true, false);
|
||||
|
||||
batchBuffer.hasStallingCmds = true;
|
||||
batchBuffer.hasRelaxedOrderingDependencies = true;
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
|
||||
directSubmission.stopRingBuffer();
|
||||
EXPECT_EQ(2u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||
EXPECT_EQ(1u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||
EXPECT_EQ(1u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenBbWithNonStallingCmdsAndDependenciesWhenDispatchingNextCmdBufferThenProgramSchedulerIfNeeded, IsAtLeastXeHpcCore) {
|
||||
{
|
||||
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
directSubmission.initialize(true, false);
|
||||
|
||||
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||
EXPECT_EQ(0u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||
|
||||
batchBuffer.hasStallingCmds = false;
|
||||
batchBuffer.hasRelaxedOrderingDependencies = true;
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
EXPECT_EQ(1u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||
EXPECT_EQ(1u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||
|
||||
batchBuffer.hasStallingCmds = true;
|
||||
batchBuffer.hasRelaxedOrderingDependencies = true;
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
EXPECT_EQ(3u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||
EXPECT_EQ(1u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||
EXPECT_EQ(2u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||
}
|
||||
|
||||
{
|
||||
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
directSubmission.initialize(true, false);
|
||||
|
||||
batchBuffer.hasStallingCmds = false;
|
||||
batchBuffer.hasRelaxedOrderingDependencies = true;
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
|
||||
batchBuffer.hasStallingCmds = false;
|
||||
batchBuffer.hasRelaxedOrderingDependencies = false;
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
EXPECT_EQ(2u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||
EXPECT_EQ(1u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||
}
|
||||
|
||||
{
|
||||
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
directSubmission.initialize(true, false);
|
||||
|
||||
batchBuffer.hasStallingCmds = false;
|
||||
batchBuffer.hasRelaxedOrderingDependencies = true;
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
|
||||
batchBuffer.hasStallingCmds = true;
|
||||
batchBuffer.hasRelaxedOrderingDependencies = false;
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
EXPECT_EQ(2u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||
EXPECT_EQ(1u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||
EXPECT_EQ(1u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||
}
|
||||
|
||||
{
|
||||
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
directSubmission.initialize(true, false);
|
||||
|
||||
batchBuffer.hasStallingCmds = false;
|
||||
batchBuffer.hasRelaxedOrderingDependencies = true;
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
|
||||
batchBuffer.hasStallingCmds = false;
|
||||
batchBuffer.hasRelaxedOrderingDependencies = false;
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
EXPECT_EQ(2u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||
EXPECT_EQ(1u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||
}
|
||||
|
||||
{
|
||||
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
directSubmission.initialize(true, false);
|
||||
|
||||
batchBuffer.hasStallingCmds = false;
|
||||
batchBuffer.hasRelaxedOrderingDependencies = true;
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
|
||||
directSubmission.stopRingBuffer();
|
||||
EXPECT_EQ(2u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||
EXPECT_EQ(1u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||
EXPECT_EQ(1u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenBbWithStallingCmdsAndWithoutDependenciesWhenDispatchingNextCmdBufferThenProgramSchedulerIfNeeded, IsAtLeastXeHpcCore) {
|
||||
{
|
||||
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
directSubmission.initialize(true, false);
|
||||
|
||||
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||
EXPECT_EQ(0u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||
|
||||
batchBuffer.hasStallingCmds = true;
|
||||
batchBuffer.hasRelaxedOrderingDependencies = false;
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||
EXPECT_EQ(0u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||
|
||||
batchBuffer.hasStallingCmds = true;
|
||||
batchBuffer.hasRelaxedOrderingDependencies = true;
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
EXPECT_EQ(1u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||
EXPECT_EQ(1u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||
}
|
||||
|
||||
{
|
||||
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
directSubmission.initialize(true, false);
|
||||
|
||||
batchBuffer.hasStallingCmds = true;
|
||||
batchBuffer.hasRelaxedOrderingDependencies = false;
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
|
||||
batchBuffer.hasStallingCmds = true;
|
||||
batchBuffer.hasRelaxedOrderingDependencies = false;
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||
EXPECT_EQ(0u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||
}
|
||||
|
||||
{
|
||||
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
directSubmission.initialize(true, false);
|
||||
|
||||
batchBuffer.hasStallingCmds = true;
|
||||
batchBuffer.hasRelaxedOrderingDependencies = false;
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
|
||||
batchBuffer.hasStallingCmds = false;
|
||||
batchBuffer.hasRelaxedOrderingDependencies = true;
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
EXPECT_EQ(1u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||
EXPECT_EQ(1u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||
}
|
||||
|
||||
{
|
||||
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
directSubmission.initialize(true, false);
|
||||
|
||||
batchBuffer.hasStallingCmds = true;
|
||||
batchBuffer.hasRelaxedOrderingDependencies = false;
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
|
||||
batchBuffer.hasStallingCmds = false;
|
||||
batchBuffer.hasRelaxedOrderingDependencies = false;
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||
EXPECT_EQ(0u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||
}
|
||||
|
||||
{
|
||||
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
directSubmission.initialize(true, false);
|
||||
|
||||
batchBuffer.hasStallingCmds = true;
|
||||
batchBuffer.hasRelaxedOrderingDependencies = false;
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
|
||||
directSubmission.stopRingBuffer();
|
||||
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||
EXPECT_EQ(0u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenBbWithNonStallingCmdsAndWithoutDependenciesWhenDispatchingNextCmdBufferThenProgramSchedulerIfNeeded, IsAtLeastXeHpcCore) {
|
||||
{
|
||||
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
directSubmission.initialize(true, false);
|
||||
|
||||
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||
EXPECT_EQ(0u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||
|
||||
batchBuffer.hasStallingCmds = false;
|
||||
batchBuffer.hasRelaxedOrderingDependencies = false;
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||
EXPECT_EQ(0u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||
|
||||
batchBuffer.hasStallingCmds = true;
|
||||
batchBuffer.hasRelaxedOrderingDependencies = true;
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
EXPECT_EQ(1u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||
EXPECT_EQ(1u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||
}
|
||||
|
||||
{
|
||||
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
directSubmission.initialize(true, false);
|
||||
|
||||
batchBuffer.hasStallingCmds = false;
|
||||
batchBuffer.hasRelaxedOrderingDependencies = false;
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
|
||||
batchBuffer.hasStallingCmds = false;
|
||||
batchBuffer.hasRelaxedOrderingDependencies = false;
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||
EXPECT_EQ(0u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||
}
|
||||
|
||||
{
|
||||
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
directSubmission.initialize(true, false);
|
||||
|
||||
batchBuffer.hasStallingCmds = false;
|
||||
batchBuffer.hasRelaxedOrderingDependencies = false;
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
|
||||
batchBuffer.hasStallingCmds = false;
|
||||
batchBuffer.hasRelaxedOrderingDependencies = true;
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
EXPECT_EQ(1u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||
EXPECT_EQ(1u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||
}
|
||||
|
||||
{
|
||||
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
directSubmission.initialize(true, false);
|
||||
|
||||
batchBuffer.hasStallingCmds = false;
|
||||
batchBuffer.hasRelaxedOrderingDependencies = false;
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
|
||||
batchBuffer.hasStallingCmds = true;
|
||||
batchBuffer.hasRelaxedOrderingDependencies = false;
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||
EXPECT_EQ(0u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||
}
|
||||
|
||||
{
|
||||
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
directSubmission.initialize(true, false);
|
||||
|
||||
batchBuffer.hasStallingCmds = false;
|
||||
batchBuffer.hasRelaxedOrderingDependencies = false;
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
|
||||
directSubmission.stopRingBuffer();
|
||||
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||
EXPECT_EQ(0u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenRelaxedOrderingSchedulerRequiredWhenAskingForCmdsSizeThenReturnCorrectValue, IsAtLeastXeHpcCore) {
|
||||
using Dispatcher = RenderDispatcher<FamilyType>;
|
||||
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
|
||||
size_t expectedBaseSemaphoreSectionSize = directSubmission.getSizePrefetchMitigation();
|
||||
if (directSubmission.isDisablePrefetcherRequired) {
|
||||
expectedBaseSemaphoreSectionSize += 2 * directSubmission.getSizeDisablePrefetcher();
|
||||
}
|
||||
|
||||
if (directSubmission.miMemFenceRequired) {
|
||||
expectedBaseSemaphoreSectionSize += MemorySynchronizationCommands<FamilyType>::getSizeForSingleAdditionalSynchronizationForDirectSubmission(pDevice->getHardwareInfo());
|
||||
}
|
||||
|
||||
EXPECT_EQ(expectedBaseSemaphoreSectionSize + RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<FamilyType>::totalSize, directSubmission.getSizeSemaphoreSection(true));
|
||||
EXPECT_EQ(expectedBaseSemaphoreSectionSize + EncodeSempahore<FamilyType>::getSizeMiSemaphoreWait(), directSubmission.getSizeSemaphoreSection(false));
|
||||
|
||||
size_t expectedBaseEndSize = Dispatcher::getSizeStopCommandBuffer() +
|
||||
Dispatcher::getSizeCacheFlush(pDevice->getHardwareInfo()) +
|
||||
(Dispatcher::getSizeStartCommandBuffer() - Dispatcher::getSizeStopCommandBuffer()) +
|
||||
MemoryConstants::cacheLineSize;
|
||||
if (directSubmission.disableMonitorFence) {
|
||||
expectedBaseEndSize += Dispatcher::getSizeMonitorFence(pDevice->getHardwareInfo());
|
||||
}
|
||||
|
||||
EXPECT_EQ(expectedBaseEndSize + directSubmission.getSizeDispatchRelaxedOrderingQueueStall(), directSubmission.getSizeEnd(true));
|
||||
EXPECT_EQ(expectedBaseEndSize, directSubmission.getSizeEnd(false));
|
||||
}
|
||||
|
||||
HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenSchedulerRequiredWhenDispatchingReturnPtrsThenAddOffset, IsAtLeastXeHpcCore) {
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
|
||||
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
directSubmission.initialize(true, false);
|
||||
|
||||
uint64_t returnPtr = 0x800100123000;
|
||||
uint64_t returnPtr2 = returnPtr + RelaxedOrderingHelper::getSizeTaskStoreSection<FamilyType>();
|
||||
|
||||
size_t offset = directSubmission.ringCommandStream.getUsed();
|
||||
|
||||
directSubmission.dispatchRelaxedOrderingReturnPtrRegs(directSubmission.ringCommandStream, returnPtr, true);
|
||||
|
||||
auto lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(ptrOffset(directSubmission.ringCommandStream.getCpuBase(), offset));
|
||||
EXPECT_TRUE(verifyLri<FamilyType>(lriCmd, CS_GPR_R4, static_cast<uint32_t>(returnPtr & 0xFFFF'FFFFULL)));
|
||||
EXPECT_TRUE(verifyLri<FamilyType>(++lriCmd, CS_GPR_R4 + 4, static_cast<uint32_t>(returnPtr >> 32)));
|
||||
|
||||
EXPECT_TRUE(verifyLri<FamilyType>(++lriCmd, CS_GPR_R3, static_cast<uint32_t>(returnPtr2 & 0xFFFF'FFFFULL)));
|
||||
EXPECT_TRUE(verifyLri<FamilyType>(++lriCmd, CS_GPR_R3 + 4, static_cast<uint32_t>(returnPtr2 >> 32)));
|
||||
|
||||
offset = directSubmission.ringCommandStream.getUsed();
|
||||
|
||||
directSubmission.dispatchRelaxedOrderingReturnPtrRegs(directSubmission.ringCommandStream, returnPtr, false);
|
||||
|
||||
lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(ptrOffset(directSubmission.ringCommandStream.getCpuBase(), offset));
|
||||
EXPECT_TRUE(verifyLri<FamilyType>(lriCmd, CS_GPR_R4, static_cast<uint32_t>(returnPtr & 0xFFFF'FFFFULL)));
|
||||
EXPECT_TRUE(verifyLri<FamilyType>(++lriCmd, CS_GPR_R4 + 4, static_cast<uint32_t>(returnPtr >> 32)));
|
||||
|
||||
EXPECT_TRUE(verifyLri<FamilyType>(++lriCmd, CS_GPR_R3, static_cast<uint32_t>(returnPtr & 0xFFFF'FFFFULL)));
|
||||
EXPECT_TRUE(verifyLri<FamilyType>(++lriCmd, CS_GPR_R3 + 4, static_cast<uint32_t>(returnPtr >> 32)));
|
||||
}
|
||||
@@ -288,7 +288,8 @@ TEST_F(WddmCommandStreamTest, GivenOffsetWhenFlushingThenFlushIsSubmittedCorrect
|
||||
ASSERT_NE(nullptr, commandBuffer);
|
||||
LinearStream cs(commandBuffer);
|
||||
|
||||
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), offset, 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false, false};
|
||||
BatchBuffer batchBuffer = BatchBufferHelper::createDefaultBatchBuffer(cs.getGraphicsAllocation(), &cs, cs.getUsed());
|
||||
batchBuffer.startOffset = offset;
|
||||
csr->flush(batchBuffer, csr->getResidencyAllocations());
|
||||
EXPECT_EQ(1u, wddm->submitResult.called);
|
||||
EXPECT_TRUE(wddm->submitResult.success);
|
||||
@@ -1165,7 +1166,7 @@ HWTEST_TEMPLATED_F(WddmCommandStreamMockGdiTest, givenDirectSubmissionEnabledOnR
|
||||
size_t actualDispatchSize = directSubmission->ringCommandStream.getUsed();
|
||||
size_t expectedSize = directSubmission->getSizeSemaphoreSection(false) +
|
||||
Dispatcher::getSizePreemption() +
|
||||
directSubmission->getSizeDispatch();
|
||||
directSubmission->getSizeDispatch(false);
|
||||
|
||||
if (directSubmission->miMemFenceRequired) {
|
||||
expectedSize += directSubmission->getSizeSystemMemoryFenceAddress();
|
||||
@@ -1206,7 +1207,7 @@ HWTEST_TEMPLATED_F(WddmCommandStreamMockGdiTest, givenDirectSubmissionEnabledOnB
|
||||
size_t actualDispatchSize = directSubmission->ringCommandStream.getUsed();
|
||||
size_t expectedSize = directSubmission->getSizeSemaphoreSection(false) +
|
||||
Dispatcher::getSizePreemption() +
|
||||
directSubmission->getSizeDispatch();
|
||||
directSubmission->getSizeDispatch(false);
|
||||
|
||||
if (directSubmission->miMemFenceRequired) {
|
||||
expectedSize += directSubmission->getSizeSystemMemoryFenceAddress();
|
||||
|
||||
Reference in New Issue
Block a user