mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-21 09:14:47 +08:00
RelaxedOrdering: Improve dependencies tracking
Avoid not needed scheduler programming Related-To: NEO-7458 Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
ad6237478f
commit
3f962bf3e8
@@ -124,12 +124,12 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
|
|||||||
uint32_t numWaitEvents,
|
uint32_t numWaitEvents,
|
||||||
ze_event_handle_t *waitEventHandles) override;
|
ze_event_handle_t *waitEventHandles) override;
|
||||||
|
|
||||||
MOCKABLE_VIRTUAL ze_result_t executeCommandListImmediateWithFlushTask(bool performMigration, bool hasStallingCmds);
|
MOCKABLE_VIRTUAL ze_result_t executeCommandListImmediateWithFlushTask(bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies);
|
||||||
|
|
||||||
void checkAvailableSpace();
|
void checkAvailableSpace();
|
||||||
void updateDispatchFlagsWithRequiredStreamState(NEO::DispatchFlags &dispatchFlags);
|
void updateDispatchFlagsWithRequiredStreamState(NEO::DispatchFlags &dispatchFlags);
|
||||||
|
|
||||||
ze_result_t flushImmediate(ze_result_t inputRet, bool performMigration, bool hasStallingCmds, ze_event_handle_t hSignalEvent);
|
ze_result_t flushImmediate(ze_result_t inputRet, bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, ze_event_handle_t hSignalEvent);
|
||||||
|
|
||||||
void createLogicalStateHelper() override {}
|
void createLogicalStateHelper() override {}
|
||||||
NEO::LogicalStateHelper *getLogicalStateHelper() const override;
|
NEO::LogicalStateHelper *getLogicalStateHelper() const override;
|
||||||
|
|||||||
@@ -69,7 +69,7 @@ void CommandListCoreFamilyImmediate<gfxCoreFamily>::updateDispatchFlagsWithRequi
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommandListImmediateWithFlushTask(bool performMigration, bool hasStallingCmds) {
|
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommandListImmediateWithFlushTask(bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies) {
|
||||||
NEO::DispatchFlags dispatchFlags(
|
NEO::DispatchFlags dispatchFlags(
|
||||||
{}, // csrDependencies
|
{}, // csrDependencies
|
||||||
nullptr, // barrierTimestampPacketNodes
|
nullptr, // barrierTimestampPacketNodes
|
||||||
@@ -100,7 +100,8 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommandListImm
|
|||||||
this->device->getNEODevice()->getNumGenericSubDevices() > 1, // areMultipleSubDevicesInContext
|
this->device->getNEODevice()->getNumGenericSubDevices() > 1, // areMultipleSubDevicesInContext
|
||||||
false, // memoryMigrationRequired
|
false, // memoryMigrationRequired
|
||||||
false, // textureCacheFlush
|
false, // textureCacheFlush
|
||||||
hasStallingCmds // hasStallingCmds
|
hasStallingCmds, // hasStallingCmds
|
||||||
|
hasRelaxedOrderingDependencies // hasRelaxedOrderingDependencies
|
||||||
);
|
);
|
||||||
this->updateDispatchFlagsWithRequiredStreamState(dispatchFlags);
|
this->updateDispatchFlagsWithRequiredStreamState(dispatchFlags);
|
||||||
|
|
||||||
@@ -260,7 +261,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchKernel(
|
|||||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(kernelHandle, threadGroupDimensions,
|
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(kernelHandle, threadGroupDimensions,
|
||||||
hSignalEvent, numWaitEvents, phWaitEvents,
|
hSignalEvent, numWaitEvents, phWaitEvents,
|
||||||
launchParams);
|
launchParams);
|
||||||
return flushImmediate(ret, true, false, hSignalEvent);
|
return flushImmediate(ret, true, false, (numWaitEvents > 0), hSignalEvent);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
@@ -273,7 +274,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchKernelInd
|
|||||||
}
|
}
|
||||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(kernelHandle, pDispatchArgumentsBuffer,
|
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(kernelHandle, pDispatchArgumentsBuffer,
|
||||||
hSignalEvent, numWaitEvents, phWaitEvents);
|
hSignalEvent, numWaitEvents, phWaitEvents);
|
||||||
return flushImmediate(ret, true, false, hSignalEvent);
|
return flushImmediate(ret, true, false, (numWaitEvents > 0), hSignalEvent);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
@@ -289,7 +290,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendBarrier(
|
|||||||
ret = CommandListCoreFamily<gfxCoreFamily>::appendBarrier(hSignalEvent, numWaitEvents, phWaitEvents);
|
ret = CommandListCoreFamily<gfxCoreFamily>::appendBarrier(hSignalEvent, numWaitEvents, phWaitEvents);
|
||||||
|
|
||||||
this->dependenciesPresent = true;
|
this->dependenciesPresent = true;
|
||||||
return flushImmediate(ret, true, true, hSignalEvent);
|
return flushImmediate(ret, true, true, (numWaitEvents > 0), hSignalEvent);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
@@ -323,7 +324,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopy(
|
|||||||
ret = CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(dstptr, srcptr, size, hSignalEvent,
|
ret = CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(dstptr, srcptr, size, hSignalEvent,
|
||||||
numWaitEvents, phWaitEvents);
|
numWaitEvents, phWaitEvents);
|
||||||
}
|
}
|
||||||
return flushImmediate(ret, true, false, hSignalEvent);
|
return flushImmediate(ret, true, false, (numWaitEvents > 0), hSignalEvent);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
@@ -366,7 +367,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryCopyRegio
|
|||||||
hSignalEvent, numWaitEvents, phWaitEvents);
|
hSignalEvent, numWaitEvents, phWaitEvents);
|
||||||
}
|
}
|
||||||
|
|
||||||
return flushImmediate(ret, true, false, hSignalEvent);
|
return flushImmediate(ret, true, false, (numWaitEvents > 0), hSignalEvent);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
@@ -381,7 +382,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryFill(void
|
|||||||
}
|
}
|
||||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(ptr, pattern, patternSize, size, hSignalEvent, numWaitEvents, phWaitEvents);
|
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(ptr, pattern, patternSize, size, hSignalEvent, numWaitEvents, phWaitEvents);
|
||||||
|
|
||||||
return flushImmediate(ret, true, false, hSignalEvent);
|
return flushImmediate(ret, true, false, (numWaitEvents > 0), hSignalEvent);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
@@ -393,7 +394,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendSignalEvent(ze_
|
|||||||
checkAvailableSpace();
|
checkAvailableSpace();
|
||||||
}
|
}
|
||||||
ret = CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(hSignalEvent);
|
ret = CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(hSignalEvent);
|
||||||
return flushImmediate(ret, true, true, hSignalEvent);
|
return flushImmediate(ret, true, true, false, hSignalEvent);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
@@ -405,7 +406,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendEventReset(ze_e
|
|||||||
checkAvailableSpace();
|
checkAvailableSpace();
|
||||||
}
|
}
|
||||||
ret = CommandListCoreFamily<gfxCoreFamily>::appendEventReset(hSignalEvent);
|
ret = CommandListCoreFamily<gfxCoreFamily>::appendEventReset(hSignalEvent);
|
||||||
return flushImmediate(ret, true, true, hSignalEvent);
|
return flushImmediate(ret, true, true, false, hSignalEvent);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
@@ -431,7 +432,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendPageFaultCopy(N
|
|||||||
} else {
|
} else {
|
||||||
ret = CommandListCoreFamily<gfxCoreFamily>::appendPageFaultCopy(dstAllocation, srcAllocation, size, flushHost);
|
ret = CommandListCoreFamily<gfxCoreFamily>::appendPageFaultCopy(dstAllocation, srcAllocation, size, flushHost);
|
||||||
}
|
}
|
||||||
return flushImmediate(ret, false, false, nullptr);
|
return flushImmediate(ret, false, false, false, nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
@@ -448,7 +449,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendWaitOnEvents(ui
|
|||||||
}
|
}
|
||||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(numEvents, phWaitEvents);
|
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendWaitOnEvents(numEvents, phWaitEvents);
|
||||||
this->dependenciesPresent = true;
|
this->dependenciesPresent = true;
|
||||||
return flushImmediate(ret, true, true, nullptr);
|
return flushImmediate(ret, true, true, (numEvents > 0), nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
@@ -461,7 +462,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendWriteGlobalTime
|
|||||||
}
|
}
|
||||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(dstptr, hSignalEvent, numWaitEvents, phWaitEvents);
|
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendWriteGlobalTimestamp(dstptr, hSignalEvent, numWaitEvents, phWaitEvents);
|
||||||
|
|
||||||
return flushImmediate(ret, true, true, hSignalEvent);
|
return flushImmediate(ret, true, true, (numWaitEvents > 0), hSignalEvent);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
@@ -497,7 +498,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyRegion
|
|||||||
}
|
}
|
||||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendImageCopyRegion(hDstImage, hSrcImage, pDstRegion, pSrcRegion, hSignalEvent,
|
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendImageCopyRegion(hDstImage, hSrcImage, pDstRegion, pSrcRegion, hSignalEvent,
|
||||||
numWaitEvents, phWaitEvents);
|
numWaitEvents, phWaitEvents);
|
||||||
return flushImmediate(ret, true, false, hSignalEvent);
|
return flushImmediate(ret, true, false, (numWaitEvents > 0), hSignalEvent);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
@@ -515,7 +516,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyFromMe
|
|||||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemory(hDstImage, srcPtr, pDstRegion, hSignalEvent,
|
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemory(hDstImage, srcPtr, pDstRegion, hSignalEvent,
|
||||||
numWaitEvents, phWaitEvents);
|
numWaitEvents, phWaitEvents);
|
||||||
|
|
||||||
return flushImmediate(ret, true, false, hSignalEvent);
|
return flushImmediate(ret, true, false, (numWaitEvents > 0), hSignalEvent);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
@@ -533,7 +534,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyToMemo
|
|||||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemory(dstPtr, hSrcImage, pSrcRegion, hSignalEvent,
|
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemory(dstPtr, hSrcImage, pSrcRegion, hSignalEvent,
|
||||||
numWaitEvents, phWaitEvents);
|
numWaitEvents, phWaitEvents);
|
||||||
|
|
||||||
return flushImmediate(ret, true, false, hSignalEvent);
|
return flushImmediate(ret, true, false, (numWaitEvents > 0), hSignalEvent);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
@@ -547,7 +548,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryRangesBar
|
|||||||
checkAvailableSpace();
|
checkAvailableSpace();
|
||||||
}
|
}
|
||||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendMemoryRangesBarrier(numRanges, pRangeSizes, pRanges, hSignalEvent, numWaitEvents, phWaitEvents);
|
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendMemoryRangesBarrier(numRanges, pRangeSizes, pRanges, hSignalEvent, numWaitEvents, phWaitEvents);
|
||||||
return flushImmediate(ret, true, true, hSignalEvent);
|
return flushImmediate(ret, true, true, (numWaitEvents > 0), hSignalEvent);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
@@ -560,14 +561,15 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendLaunchCooperati
|
|||||||
checkAvailableSpace();
|
checkAvailableSpace();
|
||||||
}
|
}
|
||||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchCooperativeKernel(kernelHandle, launchKernelArgs, hSignalEvent, numWaitEvents, waitEventHandles);
|
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchCooperativeKernel(kernelHandle, launchKernelArgs, hSignalEvent, numWaitEvents, waitEventHandles);
|
||||||
return flushImmediate(ret, true, false, hSignalEvent);
|
return flushImmediate(ret, true, false, (numWaitEvents > 0), hSignalEvent);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediate(ze_result_t inputRet, bool performMigration, bool hasStallingCmds, ze_event_handle_t hSignalEvent) {
|
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediate(ze_result_t inputRet, bool performMigration, bool hasStallingCmds,
|
||||||
|
bool hasRelaxedOrderingDependencies, ze_event_handle_t hSignalEvent) {
|
||||||
if (inputRet == ZE_RESULT_SUCCESS) {
|
if (inputRet == ZE_RESULT_SUCCESS) {
|
||||||
if (this->isFlushTaskSubmissionEnabled) {
|
if (this->isFlushTaskSubmissionEnabled) {
|
||||||
inputRet = executeCommandListImmediateWithFlushTask(performMigration, hasStallingCmds);
|
inputRet = executeCommandListImmediateWithFlushTask(performMigration, hasStallingCmds, hasRelaxedOrderingDependencies);
|
||||||
} else {
|
} else {
|
||||||
inputRet = executeCommandListImmediate(performMigration);
|
inputRet = executeCommandListImmediate(performMigration);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -99,7 +99,7 @@ NEO::SubmissionStatus CommandQueueImp::submitBatchBuffer(size_t offset, NEO::Res
|
|||||||
|
|
||||||
NEO::BatchBuffer batchBuffer(commandStream.getGraphicsAllocation(), offset, 0, 0, nullptr, false, false,
|
NEO::BatchBuffer batchBuffer(commandStream.getGraphicsAllocation(), offset, 0, 0, nullptr, false, false,
|
||||||
NEO::QueueThrottle::HIGH, NEO::QueueSliceCount::defaultSliceCount,
|
NEO::QueueThrottle::HIGH, NEO::QueueSliceCount::defaultSliceCount,
|
||||||
commandStream.getUsed(), &commandStream, endingCmdPtr, isCooperative, false);
|
commandStream.getUsed(), &commandStream, endingCmdPtr, isCooperative, false, false);
|
||||||
|
|
||||||
commandStream.getGraphicsAllocation()->updateTaskCount(csr->peekTaskCount() + 1, csr->getOsContext().getContextId());
|
commandStream.getGraphicsAllocation()->updateTaskCount(csr->peekTaskCount() + 1, csr->getOsContext().getContextId());
|
||||||
commandStream.getGraphicsAllocation()->updateResidencyTaskCount(csr->peekTaskCount() + 1, csr->getOsContext().getContextId());
|
commandStream.getGraphicsAllocation()->updateResidencyTaskCount(csr->peekTaskCount() + 1, csr->getOsContext().getContextId());
|
||||||
|
|||||||
@@ -515,7 +515,7 @@ class MockCommandListImmediateHw : public WhiteBox<::L0::CommandListCoreFamilyIm
|
|||||||
return executeCommandListImmediateReturnValue;
|
return executeCommandListImmediateReturnValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
ze_result_t executeCommandListImmediateWithFlushTask(bool performMigration, bool hasStallingCmds) override {
|
ze_result_t executeCommandListImmediateWithFlushTask(bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies) override {
|
||||||
++executeCommandListImmediateWithFlushTaskCalledCount;
|
++executeCommandListImmediateWithFlushTaskCalledCount;
|
||||||
return executeCommandListImmediateWithFlushTaskReturnValue;
|
return executeCommandListImmediateWithFlushTaskReturnValue;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1068,6 +1068,118 @@ HWTEST2_F(CommandListCreate, givenDirectSubmissionAndImmCmdListWhenDispatchingTh
|
|||||||
driverHandle->releaseImportedPointer(dstPtr);
|
driverHandle->releaseImportedPointer(dstPtr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(CommandListCreate, givenDirectSubmissionAndImmCmdListWhenDispatchingThenPassRelaxedOrderingDependenciesInfo, IsAtLeastXeHpcCore) {
|
||||||
|
ze_command_queue_desc_t desc = {};
|
||||||
|
desc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
|
||||||
|
ze_result_t returnValue;
|
||||||
|
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue));
|
||||||
|
ASSERT_NE(nullptr, commandList);
|
||||||
|
|
||||||
|
ze_event_pool_desc_t eventPoolDesc = {};
|
||||||
|
eventPoolDesc.count = 1;
|
||||||
|
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
|
||||||
|
|
||||||
|
ze_event_desc_t eventDesc = {};
|
||||||
|
eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST;
|
||||||
|
|
||||||
|
ze_event_handle_t event = nullptr;
|
||||||
|
|
||||||
|
std::unique_ptr<L0::EventPool> eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||||
|
|
||||||
|
ASSERT_EQ(ZE_RESULT_SUCCESS, eventPool->createEvent(&eventDesc, &event));
|
||||||
|
std::unique_ptr<L0::Event> eventObject(L0::Event::fromHandle(event));
|
||||||
|
|
||||||
|
Mock<::L0::Kernel> kernel;
|
||||||
|
ze_group_count_t groupCount{1, 1, 1};
|
||||||
|
CmdListKernelLaunchParams launchParams = {};
|
||||||
|
|
||||||
|
uint8_t srcPtr[64] = {};
|
||||||
|
uint8_t dstPtr[64] = {};
|
||||||
|
const ze_copy_region_t region = {0U, 0U, 0U, 1, 1, 0U};
|
||||||
|
|
||||||
|
driverHandle->importExternalPointer(dstPtr, MemoryConstants::pageSize);
|
||||||
|
|
||||||
|
auto ultCsr = static_cast<NEO::UltCommandStreamReceiver<FamilyType> *>(commandList->csr);
|
||||||
|
ultCsr->recordFlusheBatchBuffer = true;
|
||||||
|
|
||||||
|
auto verifyFlags = [&ultCsr](ze_result_t result, bool dispatchFlag, bool bbFlag) {
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
|
EXPECT_EQ(ultCsr->recordedDispatchFlags.hasRelaxedOrderingDependencies, dispatchFlag);
|
||||||
|
EXPECT_EQ(ultCsr->latestFlushedBatchBuffer.hasRelaxedOrderingDependencies, bbFlag);
|
||||||
|
};
|
||||||
|
|
||||||
|
for (bool hasEventDependencies : {true, false}) {
|
||||||
|
ze_event_handle_t *waitlist = hasEventDependencies ? &event : nullptr;
|
||||||
|
uint32_t numWaitlistEvents = hasEventDependencies ? 1 : 0;
|
||||||
|
|
||||||
|
verifyFlags(commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, numWaitlistEvents, waitlist, launchParams),
|
||||||
|
hasEventDependencies, hasEventDependencies);
|
||||||
|
|
||||||
|
verifyFlags(commandList->appendLaunchKernelIndirect(kernel.toHandle(), &groupCount, nullptr, numWaitlistEvents, waitlist),
|
||||||
|
hasEventDependencies, hasEventDependencies);
|
||||||
|
|
||||||
|
verifyFlags(commandList->appendBarrier(nullptr, numWaitlistEvents, waitlist),
|
||||||
|
hasEventDependencies, hasEventDependencies);
|
||||||
|
|
||||||
|
verifyFlags(commandList->appendMemoryCopy(dstPtr, srcPtr, 8, nullptr, numWaitlistEvents, waitlist),
|
||||||
|
hasEventDependencies, hasEventDependencies);
|
||||||
|
|
||||||
|
verifyFlags(commandList->appendMemoryCopyRegion(dstPtr, ®ion, 0, 0, srcPtr, ®ion, 0, 0, nullptr, numWaitlistEvents, waitlist),
|
||||||
|
hasEventDependencies, hasEventDependencies);
|
||||||
|
|
||||||
|
verifyFlags(commandList->appendMemoryFill(dstPtr, srcPtr, 8, 1, nullptr, numWaitlistEvents, waitlist),
|
||||||
|
hasEventDependencies, hasEventDependencies);
|
||||||
|
|
||||||
|
verifyFlags(commandList->appendEventReset(event), false, false);
|
||||||
|
|
||||||
|
verifyFlags(commandList->appendSignalEvent(event), false, false);
|
||||||
|
|
||||||
|
verifyFlags(commandList->appendPageFaultCopy(kernel.getIsaAllocation(), kernel.getIsaAllocation(), 1, false),
|
||||||
|
false, false);
|
||||||
|
|
||||||
|
verifyFlags(commandList->appendWaitOnEvents(1, &event), true, true);
|
||||||
|
|
||||||
|
verifyFlags(commandList->appendWriteGlobalTimestamp(reinterpret_cast<uint64_t *>(dstPtr), nullptr, numWaitlistEvents, waitlist),
|
||||||
|
hasEventDependencies, hasEventDependencies);
|
||||||
|
|
||||||
|
if constexpr (FamilyType::supportsSampler) {
|
||||||
|
auto kernel = device->getBuiltinFunctionsLib()->getImageFunction(ImageBuiltin::CopyImageRegion);
|
||||||
|
auto mockBuiltinKernel = static_cast<Mock<::L0::Kernel> *>(kernel);
|
||||||
|
mockBuiltinKernel->setArgRedescribedImageCallBase = false;
|
||||||
|
|
||||||
|
auto image = std::make_unique<WhiteBox<::L0::ImageCoreFamily<gfxCoreFamily>>>();
|
||||||
|
ze_image_region_t imgRegion = {1, 1, 1, 1, 1, 1};
|
||||||
|
ze_image_desc_t zeDesc = {};
|
||||||
|
zeDesc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC;
|
||||||
|
image->initialize(device, &zeDesc);
|
||||||
|
|
||||||
|
verifyFlags(commandList->appendImageCopyRegion(image->toHandle(), image->toHandle(), &imgRegion, &imgRegion, nullptr, numWaitlistEvents, waitlist),
|
||||||
|
hasEventDependencies, hasEventDependencies);
|
||||||
|
|
||||||
|
verifyFlags(commandList->appendImageCopyFromMemory(image->toHandle(), dstPtr, &imgRegion, nullptr, numWaitlistEvents, waitlist),
|
||||||
|
hasEventDependencies, hasEventDependencies);
|
||||||
|
|
||||||
|
verifyFlags(commandList->appendImageCopyToMemory(dstPtr, image->toHandle(), &imgRegion, nullptr, numWaitlistEvents, waitlist),
|
||||||
|
hasEventDependencies, hasEventDependencies);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t rangeSizes = 1;
|
||||||
|
const void **ranges = reinterpret_cast<const void **>(&dstPtr[0]);
|
||||||
|
verifyFlags(commandList->appendMemoryRangesBarrier(1, &rangeSizes, ranges, nullptr, numWaitlistEvents, waitlist),
|
||||||
|
hasEventDependencies, hasEventDependencies);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (bool hasEventDependencies : {true, false}) {
|
||||||
|
ze_event_handle_t *waitlist = hasEventDependencies ? &event : nullptr;
|
||||||
|
uint32_t numWaitlistEvents = hasEventDependencies ? 1 : 0;
|
||||||
|
verifyFlags(commandList->appendLaunchCooperativeKernel(kernel.toHandle(), &groupCount, nullptr, numWaitlistEvents, waitlist),
|
||||||
|
hasEventDependencies, hasEventDependencies);
|
||||||
|
}
|
||||||
|
|
||||||
|
driverHandle->releaseImportedPointer(dstPtr);
|
||||||
|
}
|
||||||
|
|
||||||
TEST_F(CommandListCreate, GivenGpuHangWhenCreatingImmCmdListWithSyncModeAndAppendBarrierThenAppendBarrierReturnsDeviceLost) {
|
TEST_F(CommandListCreate, GivenGpuHangWhenCreatingImmCmdListWithSyncModeAndAppendBarrierThenAppendBarrierReturnsDeviceLost) {
|
||||||
DebugManagerStateRestore restorer;
|
DebugManagerStateRestore restorer;
|
||||||
DebugManager.flags.EnableFlushTaskSubmission.set(1);
|
DebugManager.flags.EnableFlushTaskSubmission.set(1);
|
||||||
|
|||||||
@@ -73,7 +73,7 @@ HWTEST2_F(CommandListExecuteImmediate, whenExecutingCommandListImmediateWithFlus
|
|||||||
commandListImmediate.requiredStreamState.stateComputeMode.isCoherencyRequired.value = 1;
|
commandListImmediate.requiredStreamState.stateComputeMode.isCoherencyRequired.value = 1;
|
||||||
commandListImmediate.requiredStreamState.stateComputeMode.largeGrfMode.value = 1;
|
commandListImmediate.requiredStreamState.stateComputeMode.largeGrfMode.value = 1;
|
||||||
commandListImmediate.requiredStreamState.stateComputeMode.threadArbitrationPolicy.value = NEO::ThreadArbitrationPolicy::RoundRobin;
|
commandListImmediate.requiredStreamState.stateComputeMode.threadArbitrationPolicy.value = NEO::ThreadArbitrationPolicy::RoundRobin;
|
||||||
commandListImmediate.executeCommandListImmediateWithFlushTask(false, false);
|
commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false);
|
||||||
|
|
||||||
NEO::StateComputeModePropertiesSupport scmPropertiesSupport = {};
|
NEO::StateComputeModePropertiesSupport scmPropertiesSupport = {};
|
||||||
hwInfoConfig.fillScmPropertiesSupportStructure(scmPropertiesSupport);
|
hwInfoConfig.fillScmPropertiesSupportStructure(scmPropertiesSupport);
|
||||||
@@ -102,7 +102,7 @@ HWTEST2_F(CommandListExecuteImmediate, whenExecutingCommandListImmediateWithFlus
|
|||||||
commandListImmediate.requiredStreamState.stateComputeMode.isCoherencyRequired.value = 0;
|
commandListImmediate.requiredStreamState.stateComputeMode.isCoherencyRequired.value = 0;
|
||||||
commandListImmediate.requiredStreamState.stateComputeMode.largeGrfMode.value = 0;
|
commandListImmediate.requiredStreamState.stateComputeMode.largeGrfMode.value = 0;
|
||||||
commandListImmediate.requiredStreamState.stateComputeMode.threadArbitrationPolicy.value = NEO::ThreadArbitrationPolicy::AgeBased;
|
commandListImmediate.requiredStreamState.stateComputeMode.threadArbitrationPolicy.value = NEO::ThreadArbitrationPolicy::AgeBased;
|
||||||
commandListImmediate.executeCommandListImmediateWithFlushTask(false, false);
|
commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false);
|
||||||
|
|
||||||
expectedLargeGrfMode = scmPropertiesSupport.largeGrfMode ? 0 : -1;
|
expectedLargeGrfMode = scmPropertiesSupport.largeGrfMode ? 0 : -1;
|
||||||
expectedIsCoherencyRequired = scmPropertiesSupport.coherencyRequired ? 0 : -1;
|
expectedIsCoherencyRequired = scmPropertiesSupport.coherencyRequired ? 0 : -1;
|
||||||
@@ -128,7 +128,7 @@ HWTEST2_F(CommandListExecuteImmediate, whenExecutingCommandListImmediateWithFlus
|
|||||||
auto &commandListImmediate = static_cast<MockCommandListImmediate<gfxCoreFamily> &>(*commandList);
|
auto &commandListImmediate = static_cast<MockCommandListImmediate<gfxCoreFamily> &>(*commandList);
|
||||||
|
|
||||||
commandListImmediate.containsAnyKernel = true;
|
commandListImmediate.containsAnyKernel = true;
|
||||||
commandListImmediate.executeCommandListImmediateWithFlushTask(false, false);
|
commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false);
|
||||||
EXPECT_FALSE(commandListImmediate.containsAnyKernel);
|
EXPECT_FALSE(commandListImmediate.containsAnyKernel);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -139,7 +139,7 @@ HWTEST2_F(CommandListExecuteImmediate, whenExecutingCommandListImmediateWithFlus
|
|||||||
commandList.reset(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue));
|
commandList.reset(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue));
|
||||||
auto &commandListImmediate = static_cast<MockCommandListImmediate<gfxCoreFamily> &>(*commandList);
|
auto &commandListImmediate = static_cast<MockCommandListImmediate<gfxCoreFamily> &>(*commandList);
|
||||||
|
|
||||||
EXPECT_EQ(ZE_RESULT_SUCCESS, commandListImmediate.executeCommandListImmediateWithFlushTask(false, false));
|
EXPECT_EQ(ZE_RESULT_SUCCESS, commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false));
|
||||||
}
|
}
|
||||||
|
|
||||||
HWTEST2_F(CommandListExecuteImmediate, givenOutOfHostMemoryErrorOnFlushWhenExecutingCommandListImmediateWithFlushTaskThenProperErrorIsReturned, IsAtLeastSkl) {
|
HWTEST2_F(CommandListExecuteImmediate, givenOutOfHostMemoryErrorOnFlushWhenExecutingCommandListImmediateWithFlushTaskThenProperErrorIsReturned, IsAtLeastSkl) {
|
||||||
@@ -151,7 +151,7 @@ HWTEST2_F(CommandListExecuteImmediate, givenOutOfHostMemoryErrorOnFlushWhenExecu
|
|||||||
|
|
||||||
auto &commandStreamReceiver = neoDevice->getUltCommandStreamReceiver<FamilyType>();
|
auto &commandStreamReceiver = neoDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||||
commandStreamReceiver.flushReturnValue = SubmissionStatus::OUT_OF_HOST_MEMORY;
|
commandStreamReceiver.flushReturnValue = SubmissionStatus::OUT_OF_HOST_MEMORY;
|
||||||
EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY, commandListImmediate.executeCommandListImmediateWithFlushTask(false, false));
|
EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY, commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false));
|
||||||
}
|
}
|
||||||
|
|
||||||
HWTEST2_F(CommandListExecuteImmediate, givenOutOfDeviceMemoryErrorOnFlushWhenExecutingCommandListImmediateWithFlushTaskThenProperErrorIsReturned, IsAtLeastSkl) {
|
HWTEST2_F(CommandListExecuteImmediate, givenOutOfDeviceMemoryErrorOnFlushWhenExecutingCommandListImmediateWithFlushTaskThenProperErrorIsReturned, IsAtLeastSkl) {
|
||||||
@@ -163,7 +163,7 @@ HWTEST2_F(CommandListExecuteImmediate, givenOutOfDeviceMemoryErrorOnFlushWhenExe
|
|||||||
|
|
||||||
auto &commandStreamReceiver = neoDevice->getUltCommandStreamReceiver<FamilyType>();
|
auto &commandStreamReceiver = neoDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||||
commandStreamReceiver.flushReturnValue = SubmissionStatus::OUT_OF_MEMORY;
|
commandStreamReceiver.flushReturnValue = SubmissionStatus::OUT_OF_MEMORY;
|
||||||
EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY, commandListImmediate.executeCommandListImmediateWithFlushTask(false, false));
|
EXPECT_EQ(ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY, commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false));
|
||||||
}
|
}
|
||||||
|
|
||||||
using CommandListTest = Test<DeviceFixture>;
|
using CommandListTest = Test<DeviceFixture>;
|
||||||
@@ -351,7 +351,7 @@ HWTEST2_F(CommandListTest, givenImmediateCommandListWhenFlushImmediateThenOverri
|
|||||||
MockCommandStreamReceiver mockCommandStreamReceiver(*neoDevice->executionEnvironment, neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield());
|
MockCommandStreamReceiver mockCommandStreamReceiver(*neoDevice->executionEnvironment, neoDevice->getRootDeviceIndex(), neoDevice->getDeviceBitfield());
|
||||||
cmdList.csr = event->csr;
|
cmdList.csr = event->csr;
|
||||||
event->csr = &mockCommandStreamReceiver;
|
event->csr = &mockCommandStreamReceiver;
|
||||||
cmdList.flushImmediate(ZE_RESULT_SUCCESS, false, false, event->toHandle());
|
cmdList.flushImmediate(ZE_RESULT_SUCCESS, false, false, false, event->toHandle());
|
||||||
EXPECT_EQ(event->csr, cmdList.csr);
|
EXPECT_EQ(event->csr, cmdList.csr);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -772,7 +772,7 @@ HWTEST2_F(CommandListTest, givenCmdListWithIndirectAccessWhenExecutingCommandLis
|
|||||||
auto oldCommandQueue = commandList->cmdQImmediate;
|
auto oldCommandQueue = commandList->cmdQImmediate;
|
||||||
commandList->cmdQImmediate = &mockCommandQueue;
|
commandList->cmdQImmediate = &mockCommandQueue;
|
||||||
commandListImmediate.indirectAllocationsAllowed = true;
|
commandListImmediate.indirectAllocationsAllowed = true;
|
||||||
commandListImmediate.executeCommandListImmediateWithFlushTask(false, false);
|
commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false);
|
||||||
EXPECT_EQ(mockCommandQueue.handleIndirectAllocationResidencyCalledTimes, 1u);
|
EXPECT_EQ(mockCommandQueue.handleIndirectAllocationResidencyCalledTimes, 1u);
|
||||||
commandList->cmdQImmediate = oldCommandQueue;
|
commandList->cmdQImmediate = oldCommandQueue;
|
||||||
}
|
}
|
||||||
@@ -791,7 +791,7 @@ HWTEST2_F(CommandListTest, givenCmdListWithNoIndirectAccessWhenExecutingCommandL
|
|||||||
auto oldCommandQueue = commandList->cmdQImmediate;
|
auto oldCommandQueue = commandList->cmdQImmediate;
|
||||||
commandList->cmdQImmediate = &mockCommandQueue;
|
commandList->cmdQImmediate = &mockCommandQueue;
|
||||||
commandListImmediate.indirectAllocationsAllowed = false;
|
commandListImmediate.indirectAllocationsAllowed = false;
|
||||||
commandListImmediate.executeCommandListImmediateWithFlushTask(false, false);
|
commandListImmediate.executeCommandListImmediateWithFlushTask(false, false, false);
|
||||||
EXPECT_EQ(mockCommandQueue.handleIndirectAllocationResidencyCalledTimes, 0u);
|
EXPECT_EQ(mockCommandQueue.handleIndirectAllocationResidencyCalledTimes, 0u);
|
||||||
commandList->cmdQImmediate = oldCommandQueue;
|
commandList->cmdQImmediate = oldCommandQueue;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -804,7 +804,8 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
|
|||||||
kernel->areMultipleSubDevicesInContext(), // areMultipleSubDevicesInContext
|
kernel->areMultipleSubDevicesInContext(), // areMultipleSubDevicesInContext
|
||||||
kernel->requiresMemoryMigration(), // memoryMigrationRequired
|
kernel->requiresMemoryMigration(), // memoryMigrationRequired
|
||||||
isTextureCacheFlushNeeded(commandType), // textureCacheFlush
|
isTextureCacheFlushNeeded(commandType), // textureCacheFlush
|
||||||
false); // hasStallingCmds
|
false, // hasStallingCmds
|
||||||
|
false); // hasRelaxedOrderingDependencies
|
||||||
|
|
||||||
dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = mediaSamplerRequired;
|
dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = mediaSamplerRequired;
|
||||||
dispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode = systolicPipelineSelectMode;
|
dispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode = systolicPipelineSelectMode;
|
||||||
@@ -1050,7 +1051,8 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
|
|||||||
context->containsMultipleSubDevices(rootDeviceIndex), // areMultipleSubDevicesInContext
|
context->containsMultipleSubDevices(rootDeviceIndex), // areMultipleSubDevicesInContext
|
||||||
false, // memoryMigrationRequired
|
false, // memoryMigrationRequired
|
||||||
false, // textureCacheFlush
|
false, // textureCacheFlush
|
||||||
false); // hasStallingCmds
|
false, // hasStallingCmds
|
||||||
|
false); // hasRelaxedOrderingDependencies
|
||||||
|
|
||||||
const bool isHandlingBarrier = getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired();
|
const bool isHandlingBarrier = getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired();
|
||||||
|
|
||||||
|
|||||||
@@ -80,7 +80,8 @@ CompletionStamp &CommandMapUnmap::submit(uint32_t taskLevel, bool terminated) {
|
|||||||
false, // areMultipleSubDevicesInContext
|
false, // areMultipleSubDevicesInContext
|
||||||
false, // memoryMigrationRequired
|
false, // memoryMigrationRequired
|
||||||
false, // textureCacheFlush
|
false, // textureCacheFlush
|
||||||
false); // hasStallingCmds
|
false, // hasStallingCmds
|
||||||
|
false); // hasRelaxedOrderingDependencies
|
||||||
|
|
||||||
DEBUG_BREAK_IF(taskLevel >= CompletionStamp::notReady);
|
DEBUG_BREAK_IF(taskLevel >= CompletionStamp::notReady);
|
||||||
|
|
||||||
@@ -210,7 +211,8 @@ CompletionStamp &CommandComputeKernel::submit(uint32_t taskLevel, bool terminate
|
|||||||
kernel->areMultipleSubDevicesInContext(), // areMultipleSubDevicesInContext
|
kernel->areMultipleSubDevicesInContext(), // areMultipleSubDevicesInContext
|
||||||
kernel->requiresMemoryMigration(), // memoryMigrationRequired
|
kernel->requiresMemoryMigration(), // memoryMigrationRequired
|
||||||
commandQueue.isTextureCacheFlushNeeded(this->commandType), // textureCacheFlush
|
commandQueue.isTextureCacheFlushNeeded(this->commandType), // textureCacheFlush
|
||||||
false); // hasStallingCmds
|
false, // hasStallingCmds
|
||||||
|
false); // hasRelaxedOrderingDependencies
|
||||||
|
|
||||||
if (commandQueue.getContext().getRootDeviceIndices().size() > 1) {
|
if (commandQueue.getContext().getRootDeviceIndices().size() > 1) {
|
||||||
eventsRequest.fillCsrDependenciesForTaskCountContainer(dispatchFlags.csrDependencies, commandStreamReceiver);
|
eventsRequest.fillCsrDependenciesForTaskCountContainer(dispatchFlags.csrDependencies, commandStreamReceiver);
|
||||||
@@ -385,7 +387,8 @@ CompletionStamp &CommandWithoutKernel::submit(uint32_t taskLevel, bool terminate
|
|||||||
commandQueue.getContext().containsMultipleSubDevices(rootDeviceIndex), // areMultipleSubDevicesInContext
|
commandQueue.getContext().containsMultipleSubDevices(rootDeviceIndex), // areMultipleSubDevicesInContext
|
||||||
false, // memoryMigrationRequired
|
false, // memoryMigrationRequired
|
||||||
false, // textureCacheFlush
|
false, // textureCacheFlush
|
||||||
false); // hasStallingCmds
|
false, // hasStallingCmds
|
||||||
|
false); // hasRelaxedOrderingDependencies
|
||||||
|
|
||||||
if (commandQueue.getContext().getRootDeviceIndices().size() > 1) {
|
if (commandQueue.getContext().getRootDeviceIndices().size() > 1) {
|
||||||
eventsRequest.fillCsrDependenciesForTaskCountContainer(dispatchFlags.csrDependencies, commandStreamReceiver);
|
eventsRequest.fillCsrDependenciesForTaskCountContainer(dispatchFlags.csrDependencies, commandStreamReceiver);
|
||||||
|
|||||||
@@ -615,7 +615,8 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
|||||||
auto &streamToSubmit = submitCommandStreamFromCsr ? commandStreamCSR : commandStreamTask;
|
auto &streamToSubmit = submitCommandStreamFromCsr ? commandStreamCSR : commandStreamTask;
|
||||||
BatchBuffer batchBuffer{streamToSubmit.getGraphicsAllocation(), startOffset, chainedBatchBufferStartOffset, taskStartAddress, chainedBatchBuffer,
|
BatchBuffer batchBuffer{streamToSubmit.getGraphicsAllocation(), startOffset, chainedBatchBufferStartOffset, taskStartAddress, chainedBatchBuffer,
|
||||||
dispatchFlags.requiresCoherency, dispatchFlags.lowPriority, dispatchFlags.throttle, dispatchFlags.sliceCount,
|
dispatchFlags.requiresCoherency, dispatchFlags.lowPriority, dispatchFlags.throttle, dispatchFlags.sliceCount,
|
||||||
streamToSubmit.getUsed(), &streamToSubmit, bbEndLocation, dispatchFlags.useSingleSubdevice, (submitCSR || dispatchFlags.hasStallingCmds)};
|
streamToSubmit.getUsed(), &streamToSubmit, bbEndLocation, dispatchFlags.useSingleSubdevice, (submitCSR || dispatchFlags.hasStallingCmds),
|
||||||
|
dispatchFlags.hasRelaxedOrderingDependencies};
|
||||||
streamToSubmit.getGraphicsAllocation()->updateTaskCount(this->taskCount + 1, this->osContext->getContextId());
|
streamToSubmit.getGraphicsAllocation()->updateTaskCount(this->taskCount + 1, this->osContext->getContextId());
|
||||||
streamToSubmit.getGraphicsAllocation()->updateResidencyTaskCount(this->taskCount + 1, this->osContext->getContextId());
|
streamToSubmit.getGraphicsAllocation()->updateResidencyTaskCount(this->taskCount + 1, this->osContext->getContextId());
|
||||||
|
|
||||||
@@ -1178,7 +1179,7 @@ uint32_t CommandStreamReceiverHw<GfxFamily>::flushBcsTask(const BlitPropertiesCo
|
|||||||
uint64_t taskStartAddress = commandStream.getGpuBase() + commandStreamStart;
|
uint64_t taskStartAddress = commandStream.getGpuBase() + commandStreamStart;
|
||||||
|
|
||||||
BatchBuffer batchBuffer{commandStream.getGraphicsAllocation(), commandStreamStart, 0, taskStartAddress, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount,
|
BatchBuffer batchBuffer{commandStream.getGraphicsAllocation(), commandStreamStart, 0, taskStartAddress, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount,
|
||||||
commandStream.getUsed(), &commandStream, endingCmdPtr, false, false};
|
commandStream.getUsed(), &commandStream, endingCmdPtr, false, false, false};
|
||||||
|
|
||||||
commandStream.getGraphicsAllocation()->updateTaskCount(newTaskCount, this->osContext->getContextId());
|
commandStream.getGraphicsAllocation()->updateTaskCount(newTaskCount, this->osContext->getContextId());
|
||||||
commandStream.getGraphicsAllocation()->updateResidencyTaskCount(newTaskCount, this->osContext->getContextId());
|
commandStream.getGraphicsAllocation()->updateResidencyTaskCount(newTaskCount, this->osContext->getContextId());
|
||||||
@@ -1290,7 +1291,7 @@ SubmissionStatus CommandStreamReceiverHw<GfxFamily>::flushSmallTask(LinearStream
|
|||||||
|
|
||||||
BatchBuffer batchBuffer{commandStreamTask.getGraphicsAllocation(), commandStreamStartTask, 0, taskStartAddress,
|
BatchBuffer batchBuffer{commandStreamTask.getGraphicsAllocation(), commandStreamStartTask, 0, taskStartAddress,
|
||||||
nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount,
|
nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount,
|
||||||
commandStreamTask.getUsed(), &commandStreamTask, endingCmdPtr, false, true};
|
commandStreamTask.getUsed(), &commandStreamTask, endingCmdPtr, false, true, false};
|
||||||
|
|
||||||
this->latestSentTaskCount = taskCount + 1;
|
this->latestSentTaskCount = taskCount + 1;
|
||||||
auto submissionStatus = flushHandler(batchBuffer, getResidencyAllocations());
|
auto submissionStatus = flushHandler(batchBuffer, getResidencyAllocations());
|
||||||
|
|||||||
@@ -57,36 +57,37 @@ struct DispatchFlags {
|
|||||||
uint64_t sliceCountP, bool blockingP, bool dcFlushP, bool useSLMP, bool guardCommandBufferWithPipeControlP, bool gsba32BitRequiredP,
|
uint64_t sliceCountP, bool blockingP, bool dcFlushP, bool useSLMP, bool guardCommandBufferWithPipeControlP, bool gsba32BitRequiredP,
|
||||||
bool requiresCoherencyP, bool lowPriorityP, bool implicitFlushP, bool outOfOrderExecutionAllowedP, bool epilogueRequiredP,
|
bool requiresCoherencyP, bool lowPriorityP, bool implicitFlushP, bool outOfOrderExecutionAllowedP, bool epilogueRequiredP,
|
||||||
bool usePerDSSbackedBufferP, bool useSingleSubdeviceP, bool useGlobalAtomicsP, bool areMultipleSubDevicesInContextP, bool memoryMigrationRequiredP, bool textureCacheFlush,
|
bool usePerDSSbackedBufferP, bool useSingleSubdeviceP, bool useGlobalAtomicsP, bool areMultipleSubDevicesInContextP, bool memoryMigrationRequiredP, bool textureCacheFlush,
|
||||||
bool hasStallingCmds) : csrDependencies(csrDependenciesP),
|
bool hasStallingCmds, bool hasRelaxedOrderingDependencies) : csrDependencies(csrDependenciesP),
|
||||||
barrierTimestampPacketNodes(barrierTimestampPacketNodesP),
|
barrierTimestampPacketNodes(barrierTimestampPacketNodesP),
|
||||||
pipelineSelectArgs(pipelineSelectArgsP),
|
pipelineSelectArgs(pipelineSelectArgsP),
|
||||||
flushStampReference(flushStampReferenceP),
|
flushStampReference(flushStampReferenceP),
|
||||||
throttle(throttleP),
|
throttle(throttleP),
|
||||||
preemptionMode(preemptionModeP),
|
preemptionMode(preemptionModeP),
|
||||||
numGrfRequired(numGrfRequiredP),
|
numGrfRequired(numGrfRequiredP),
|
||||||
l3CacheSettings(l3CacheSettingsP),
|
l3CacheSettings(l3CacheSettingsP),
|
||||||
threadArbitrationPolicy(threadArbitrationPolicyP),
|
threadArbitrationPolicy(threadArbitrationPolicyP),
|
||||||
additionalKernelExecInfo(additionalKernelExecInfoP),
|
additionalKernelExecInfo(additionalKernelExecInfoP),
|
||||||
kernelExecutionType(kernelExecutionTypeP),
|
kernelExecutionType(kernelExecutionTypeP),
|
||||||
memoryCompressionState(memoryCompressionStateP),
|
memoryCompressionState(memoryCompressionStateP),
|
||||||
sliceCount(sliceCountP),
|
sliceCount(sliceCountP),
|
||||||
blocking(blockingP),
|
blocking(blockingP),
|
||||||
dcFlush(dcFlushP),
|
dcFlush(dcFlushP),
|
||||||
useSLM(useSLMP),
|
useSLM(useSLMP),
|
||||||
guardCommandBufferWithPipeControl(guardCommandBufferWithPipeControlP),
|
guardCommandBufferWithPipeControl(guardCommandBufferWithPipeControlP),
|
||||||
gsba32BitRequired(gsba32BitRequiredP),
|
gsba32BitRequired(gsba32BitRequiredP),
|
||||||
requiresCoherency(requiresCoherencyP),
|
requiresCoherency(requiresCoherencyP),
|
||||||
lowPriority(lowPriorityP),
|
lowPriority(lowPriorityP),
|
||||||
implicitFlush(implicitFlushP),
|
implicitFlush(implicitFlushP),
|
||||||
outOfOrderExecutionAllowed(outOfOrderExecutionAllowedP),
|
outOfOrderExecutionAllowed(outOfOrderExecutionAllowedP),
|
||||||
epilogueRequired(epilogueRequiredP),
|
epilogueRequired(epilogueRequiredP),
|
||||||
usePerDssBackedBuffer(usePerDSSbackedBufferP),
|
usePerDssBackedBuffer(usePerDSSbackedBufferP),
|
||||||
useSingleSubdevice(useSingleSubdeviceP),
|
useSingleSubdevice(useSingleSubdeviceP),
|
||||||
useGlobalAtomics(useGlobalAtomicsP),
|
useGlobalAtomics(useGlobalAtomicsP),
|
||||||
areMultipleSubDevicesInContext(areMultipleSubDevicesInContextP),
|
areMultipleSubDevicesInContext(areMultipleSubDevicesInContextP),
|
||||||
memoryMigrationRequired(memoryMigrationRequiredP),
|
memoryMigrationRequired(memoryMigrationRequiredP),
|
||||||
textureCacheFlush(textureCacheFlush),
|
textureCacheFlush(textureCacheFlush),
|
||||||
hasStallingCmds(hasStallingCmds){};
|
hasStallingCmds(hasStallingCmds),
|
||||||
|
hasRelaxedOrderingDependencies(hasRelaxedOrderingDependencies){};
|
||||||
|
|
||||||
CsrDependencies csrDependencies;
|
CsrDependencies csrDependencies;
|
||||||
TimestampPacketContainer *barrierTimestampPacketNodes = nullptr;
|
TimestampPacketContainer *barrierTimestampPacketNodes = nullptr;
|
||||||
@@ -119,6 +120,7 @@ struct DispatchFlags {
|
|||||||
bool memoryMigrationRequired = false;
|
bool memoryMigrationRequired = false;
|
||||||
bool textureCacheFlush = false;
|
bool textureCacheFlush = false;
|
||||||
bool hasStallingCmds = false;
|
bool hasStallingCmds = false;
|
||||||
|
bool hasRelaxedOrderingDependencies = false;
|
||||||
bool disableEUFusion = false;
|
bool disableEUFusion = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -100,14 +100,15 @@ void NEO::SubmissionAggregator::aggregateCommandBuffers(ResourcePackage &resourc
|
|||||||
|
|
||||||
NEO::BatchBuffer::BatchBuffer(GraphicsAllocation *commandBufferAllocation, size_t startOffset,
|
NEO::BatchBuffer::BatchBuffer(GraphicsAllocation *commandBufferAllocation, size_t startOffset,
|
||||||
size_t chainedBatchBufferStartOffset, uint64_t taskStartAddress, GraphicsAllocation *chainedBatchBuffer,
|
size_t chainedBatchBufferStartOffset, uint64_t taskStartAddress, GraphicsAllocation *chainedBatchBuffer,
|
||||||
bool requiresCoherency, bool lowPriority,
|
bool requiresCoherency, bool lowPriority, QueueThrottle throttle, uint64_t sliceCount,
|
||||||
QueueThrottle throttle, uint64_t sliceCount,
|
size_t usedSize, LinearStream *stream, void *endCmdPtr, bool useSingleSubdevice, bool hasStallingCmds,
|
||||||
size_t usedSize, LinearStream *stream, void *endCmdPtr, bool useSingleSubdevice, bool hasStallingCmds)
|
bool hasRelaxedOrderingDependencies)
|
||||||
: commandBufferAllocation(commandBufferAllocation), startOffset(startOffset),
|
: commandBufferAllocation(commandBufferAllocation), startOffset(startOffset),
|
||||||
chainedBatchBufferStartOffset(chainedBatchBufferStartOffset), taskStartAddress(taskStartAddress), chainedBatchBuffer(chainedBatchBuffer),
|
chainedBatchBufferStartOffset(chainedBatchBufferStartOffset), taskStartAddress(taskStartAddress), chainedBatchBuffer(chainedBatchBuffer),
|
||||||
requiresCoherency(requiresCoherency), low_priority(lowPriority),
|
requiresCoherency(requiresCoherency), low_priority(lowPriority),
|
||||||
throttle(throttle), sliceCount(sliceCount),
|
throttle(throttle), sliceCount(sliceCount),
|
||||||
usedSize(usedSize), stream(stream), endCmdPtr(endCmdPtr), useSingleSubdevice(useSingleSubdevice), hasStallingCmds(hasStallingCmds) {}
|
usedSize(usedSize), stream(stream), endCmdPtr(endCmdPtr), useSingleSubdevice(useSingleSubdevice), hasStallingCmds(hasStallingCmds),
|
||||||
|
hasRelaxedOrderingDependencies(hasRelaxedOrderingDependencies) {}
|
||||||
|
|
||||||
NEO::CommandBuffer::CommandBuffer(Device &device) : device(device) {
|
NEO::CommandBuffer::CommandBuffer(Device &device) : device(device) {
|
||||||
flushStamp.reset(new FlushStampTracker(false));
|
flushStamp.reset(new FlushStampTracker(false));
|
||||||
|
|||||||
@@ -32,7 +32,8 @@ struct BatchBuffer {
|
|||||||
LinearStream *stream,
|
LinearStream *stream,
|
||||||
void *endCmdPtr,
|
void *endCmdPtr,
|
||||||
bool useSingleSubdevice,
|
bool useSingleSubdevice,
|
||||||
bool hasStallingCmds);
|
bool hasStallingCmds,
|
||||||
|
bool hasRelaxedOrderingDependencies);
|
||||||
BatchBuffer() {}
|
BatchBuffer() {}
|
||||||
GraphicsAllocation *commandBufferAllocation = nullptr;
|
GraphicsAllocation *commandBufferAllocation = nullptr;
|
||||||
size_t startOffset = 0u;
|
size_t startOffset = 0u;
|
||||||
@@ -52,6 +53,7 @@ struct BatchBuffer {
|
|||||||
|
|
||||||
bool useSingleSubdevice = false;
|
bool useSingleSubdevice = false;
|
||||||
bool hasStallingCmds = false;
|
bool hasStallingCmds = false;
|
||||||
|
bool hasRelaxedOrderingDependencies = false;
|
||||||
bool ringBufferRestartRequest = false;
|
bool ringBufferRestartRequest = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -114,12 +114,12 @@ class DirectSubmissionHw {
|
|||||||
|
|
||||||
void cpuCachelineFlush(void *ptr, size_t size);
|
void cpuCachelineFlush(void *ptr, size_t size);
|
||||||
|
|
||||||
void dispatchSemaphoreSection(uint32_t value, bool firstSubmission);
|
void dispatchSemaphoreSection(uint32_t value);
|
||||||
size_t getSizeSemaphoreSection(bool firstSubmission);
|
size_t getSizeSemaphoreSection(bool relaxedOrderingSchedulerRequired);
|
||||||
|
|
||||||
void dispatchRelaxedOrderingSchedulerSection(uint32_t value);
|
MOCKABLE_VIRTUAL void dispatchRelaxedOrderingSchedulerSection(uint32_t value);
|
||||||
|
|
||||||
void dispatchRelaxedOrderingReturnPtrRegs(LinearStream &cmdStream, uint64_t returnPtr);
|
void dispatchRelaxedOrderingReturnPtrRegs(LinearStream &cmdStream, uint64_t returnPtr, bool hasRelaxedOrderingDependencies);
|
||||||
|
|
||||||
void dispatchStartSection(uint64_t gpuStartAddress);
|
void dispatchStartSection(uint64_t gpuStartAddress);
|
||||||
size_t getSizeStartSection();
|
size_t getSizeStartSection();
|
||||||
@@ -127,10 +127,10 @@ class DirectSubmissionHw {
|
|||||||
void dispatchSwitchRingBufferSection(uint64_t nextBufferGpuAddress);
|
void dispatchSwitchRingBufferSection(uint64_t nextBufferGpuAddress);
|
||||||
size_t getSizeSwitchRingBufferSection();
|
size_t getSizeSwitchRingBufferSection();
|
||||||
|
|
||||||
void dispatchRelaxedOrderingQueueStall();
|
MOCKABLE_VIRTUAL void dispatchRelaxedOrderingQueueStall();
|
||||||
size_t getSizeDispatchRelaxedOrderingQueueStall();
|
size_t getSizeDispatchRelaxedOrderingQueueStall();
|
||||||
|
|
||||||
void dispatchTaskStoreSection(uint64_t taskStartSectionVa);
|
MOCKABLE_VIRTUAL void dispatchTaskStoreSection(uint64_t taskStartSectionVa);
|
||||||
MOCKABLE_VIRTUAL void preinitializeRelaxedOrderingSections();
|
MOCKABLE_VIRTUAL void preinitializeRelaxedOrderingSections();
|
||||||
|
|
||||||
void initRelaxedOrderingRegisters();
|
void initRelaxedOrderingRegisters();
|
||||||
@@ -138,7 +138,7 @@ class DirectSubmissionHw {
|
|||||||
void setReturnAddress(void *returnCmd, uint64_t returnAddress);
|
void setReturnAddress(void *returnCmd, uint64_t returnAddress);
|
||||||
|
|
||||||
void *dispatchWorkloadSection(BatchBuffer &batchBuffer);
|
void *dispatchWorkloadSection(BatchBuffer &batchBuffer);
|
||||||
size_t getSizeDispatch();
|
size_t getSizeDispatch(bool relaxedOrderingSchedulerRequired);
|
||||||
|
|
||||||
void dispatchPrefetchMitigation();
|
void dispatchPrefetchMitigation();
|
||||||
size_t getSizePrefetchMitigation();
|
size_t getSizePrefetchMitigation();
|
||||||
@@ -148,7 +148,7 @@ class DirectSubmissionHw {
|
|||||||
|
|
||||||
MOCKABLE_VIRTUAL void dispatchStaticRelaxedOrderingScheduler();
|
MOCKABLE_VIRTUAL void dispatchStaticRelaxedOrderingScheduler();
|
||||||
|
|
||||||
size_t getSizeEnd();
|
size_t getSizeEnd(bool relaxedOrderingSchedulerRequired);
|
||||||
|
|
||||||
void dispatchPartitionRegisterConfiguration();
|
void dispatchPartitionRegisterConfiguration();
|
||||||
size_t getSizePartitionRegisterConfigurationSection();
|
size_t getSizePartitionRegisterConfigurationSection();
|
||||||
@@ -226,6 +226,6 @@ class DirectSubmissionHw {
|
|||||||
bool dcFlushRequired = false;
|
bool dcFlushRequired = false;
|
||||||
bool relaxedOrderingEnabled = false;
|
bool relaxedOrderingEnabled = false;
|
||||||
bool relaxedOrderingInitialized = false;
|
bool relaxedOrderingInitialized = false;
|
||||||
bool firstSubmissionAfterRingStart = true;
|
bool relaxedOrderingSchedulerRequired = false;
|
||||||
};
|
};
|
||||||
} // namespace NEO
|
} // namespace NEO
|
||||||
|
|||||||
@@ -403,7 +403,7 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::initialize(bool submitOnInit, bo
|
|||||||
initDiagnostic(submitOnInit);
|
initDiagnostic(submitOnInit);
|
||||||
if (ret && submitOnInit) {
|
if (ret && submitOnInit) {
|
||||||
size_t startBufferSize = Dispatcher::getSizePreemption() +
|
size_t startBufferSize = Dispatcher::getSizePreemption() +
|
||||||
getSizeSemaphoreSection(true);
|
getSizeSemaphoreSection(false);
|
||||||
|
|
||||||
Dispatcher::dispatchPreemption(ringCommandStream);
|
Dispatcher::dispatchPreemption(ringCommandStream);
|
||||||
if (this->partitionedMode) {
|
if (this->partitionedMode) {
|
||||||
@@ -431,7 +431,7 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::initialize(bool submitOnInit, bo
|
|||||||
dispatchDiagnosticModeSection();
|
dispatchDiagnosticModeSection();
|
||||||
startBufferSize += getDiagnosticModeSection();
|
startBufferSize += getDiagnosticModeSection();
|
||||||
}
|
}
|
||||||
dispatchSemaphoreSection(currentQueueWorkCount, true);
|
dispatchSemaphoreSection(currentQueueWorkCount);
|
||||||
|
|
||||||
ringStart = submit(ringCommandStream.getGraphicsAllocation()->getGpuAddress(), startBufferSize);
|
ringStart = submit(ringCommandStream.getGraphicsAllocation()->getGpuAddress(), startBufferSize);
|
||||||
performDiagnosticMode();
|
performDiagnosticMode();
|
||||||
@@ -446,7 +446,7 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::startRingBuffer() {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t startSize = getSizeSemaphoreSection(true);
|
size_t startSize = getSizeSemaphoreSection(false);
|
||||||
if (!this->partitionConfigSet) {
|
if (!this->partitionConfigSet) {
|
||||||
startSize += getSizePartitionRegisterConfigurationSection();
|
startSize += getSizePartitionRegisterConfigurationSection();
|
||||||
}
|
}
|
||||||
@@ -457,7 +457,7 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::startRingBuffer() {
|
|||||||
startSize += RelaxedOrderingHelper::getSizeRegistersInit<GfxFamily>();
|
startSize += RelaxedOrderingHelper::getSizeRegistersInit<GfxFamily>();
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t requiredSize = startSize + getSizeDispatch() + getSizeEnd();
|
size_t requiredSize = startSize + getSizeDispatch(false) + getSizeEnd(false);
|
||||||
if (ringCommandStream.getAvailableSpace() < requiredSize) {
|
if (ringCommandStream.getAvailableSpace() < requiredSize) {
|
||||||
switchRingBuffers();
|
switchRingBuffers();
|
||||||
}
|
}
|
||||||
@@ -482,12 +482,10 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::startRingBuffer() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
currentQueueWorkCount++;
|
currentQueueWorkCount++;
|
||||||
dispatchSemaphoreSection(currentQueueWorkCount, true);
|
dispatchSemaphoreSection(currentQueueWorkCount);
|
||||||
|
|
||||||
ringStart = submit(gpuStartVa, startSize);
|
ringStart = submit(gpuStartVa, startSize);
|
||||||
|
|
||||||
firstSubmissionAfterRingStart = true;
|
|
||||||
|
|
||||||
return ringStart;
|
return ringStart;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -497,7 +495,8 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::stopRingBuffer() {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (this->relaxedOrderingEnabled && !firstSubmissionAfterRingStart) {
|
bool relaxedOrderingSchedulerWasRequired = this->relaxedOrderingSchedulerRequired;
|
||||||
|
if (this->relaxedOrderingEnabled && this->relaxedOrderingSchedulerRequired) {
|
||||||
dispatchRelaxedOrderingQueueStall();
|
dispatchRelaxedOrderingQueueStall();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -515,7 +514,7 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::stopRingBuffer() {
|
|||||||
EncodeNoop<GfxFamily>::emitNoop(ringCommandStream, bytesToPad);
|
EncodeNoop<GfxFamily>::emitNoop(ringCommandStream, bytesToPad);
|
||||||
EncodeNoop<GfxFamily>::alignToCacheLine(ringCommandStream);
|
EncodeNoop<GfxFamily>::alignToCacheLine(ringCommandStream);
|
||||||
|
|
||||||
cpuCachelineFlush(flushPtr, getSizeEnd());
|
cpuCachelineFlush(flushPtr, getSizeEnd(relaxedOrderingSchedulerWasRequired));
|
||||||
this->unblockGpu();
|
this->unblockGpu();
|
||||||
cpuCachelineFlush(semaphorePtr, MemoryConstants::cacheLineSize);
|
cpuCachelineFlush(semaphorePtr, MemoryConstants::cacheLineSize);
|
||||||
|
|
||||||
@@ -526,13 +525,13 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::stopRingBuffer() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename GfxFamily, typename Dispatcher>
|
template <typename GfxFamily, typename Dispatcher>
|
||||||
inline void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchSemaphoreSection(uint32_t value, bool firstSubmission) {
|
inline void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchSemaphoreSection(uint32_t value) {
|
||||||
using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT;
|
using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT;
|
||||||
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
|
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
|
||||||
|
|
||||||
dispatchDisablePrefetcher(true);
|
dispatchDisablePrefetcher(true);
|
||||||
|
|
||||||
if (this->relaxedOrderingEnabled && !firstSubmission) {
|
if (this->relaxedOrderingEnabled && this->relaxedOrderingSchedulerRequired) {
|
||||||
dispatchRelaxedOrderingSchedulerSection(value);
|
dispatchRelaxedOrderingSchedulerSection(value);
|
||||||
} else {
|
} else {
|
||||||
EncodeSempahore<GfxFamily>::addMiSemaphoreWaitCommand(ringCommandStream,
|
EncodeSempahore<GfxFamily>::addMiSemaphoreWaitCommand(ringCommandStream,
|
||||||
@@ -550,9 +549,9 @@ inline void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchSemaphoreSection(
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename GfxFamily, typename Dispatcher>
|
template <typename GfxFamily, typename Dispatcher>
|
||||||
inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeSemaphoreSection(bool firstSubmission) {
|
inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeSemaphoreSection(bool relaxedOrderingSchedulerRequired) {
|
||||||
size_t semaphoreSize = (this->relaxedOrderingEnabled && !firstSubmission) ? RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::totalSize
|
size_t semaphoreSize = (this->relaxedOrderingEnabled && relaxedOrderingSchedulerRequired) ? RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::totalSize
|
||||||
: EncodeSempahore<GfxFamily>::getSizeMiSemaphoreWait();
|
: EncodeSempahore<GfxFamily>::getSizeMiSemaphoreWait();
|
||||||
semaphoreSize += getSizePrefetchMitigation();
|
semaphoreSize += getSizePrefetchMitigation();
|
||||||
|
|
||||||
if (isDisablePrefetcherRequired) {
|
if (isDisablePrefetcherRequired) {
|
||||||
@@ -597,7 +596,7 @@ inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeSwitchRingBuffer
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename GfxFamily, typename Dispatcher>
|
template <typename GfxFamily, typename Dispatcher>
|
||||||
inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeEnd() {
|
inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeEnd(bool relaxedOrderingSchedulerRequired) {
|
||||||
size_t size = Dispatcher::getSizeStopCommandBuffer() +
|
size_t size = Dispatcher::getSizeStopCommandBuffer() +
|
||||||
Dispatcher::getSizeCacheFlush(*hwInfo) +
|
Dispatcher::getSizeCacheFlush(*hwInfo) +
|
||||||
(Dispatcher::getSizeStartCommandBuffer() - Dispatcher::getSizeStopCommandBuffer()) +
|
(Dispatcher::getSizeStartCommandBuffer() - Dispatcher::getSizeStopCommandBuffer()) +
|
||||||
@@ -605,15 +604,15 @@ inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeEnd() {
|
|||||||
if (disableMonitorFence) {
|
if (disableMonitorFence) {
|
||||||
size += Dispatcher::getSizeMonitorFence(*hwInfo);
|
size += Dispatcher::getSizeMonitorFence(*hwInfo);
|
||||||
}
|
}
|
||||||
if (this->relaxedOrderingEnabled) {
|
if (this->relaxedOrderingEnabled && relaxedOrderingSchedulerRequired) {
|
||||||
size += getSizeDispatchRelaxedOrderingQueueStall();
|
size += getSizeDispatchRelaxedOrderingQueueStall();
|
||||||
}
|
}
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename GfxFamily, typename Dispatcher>
|
template <typename GfxFamily, typename Dispatcher>
|
||||||
inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeDispatch() {
|
inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeDispatch(bool relaxedOrderingSchedulerRequired) {
|
||||||
size_t size = getSizeSemaphoreSection(false);
|
size_t size = getSizeSemaphoreSection(relaxedOrderingSchedulerRequired);
|
||||||
if (workloadMode == 0) {
|
if (workloadMode == 0) {
|
||||||
size += getSizeStartSection();
|
size += getSizeStartSection();
|
||||||
if (this->relaxedOrderingEnabled) {
|
if (this->relaxedOrderingEnabled) {
|
||||||
@@ -673,7 +672,7 @@ void *DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchWorkloadSection(BatchBu
|
|||||||
uint64_t returnGpuPointer = ringCommandStream.getCurrentGpuAddressPosition();
|
uint64_t returnGpuPointer = ringCommandStream.getCurrentGpuAddressPosition();
|
||||||
|
|
||||||
if (this->relaxedOrderingEnabled) {
|
if (this->relaxedOrderingEnabled) {
|
||||||
dispatchRelaxedOrderingReturnPtrRegs(relaxedOrderingReturnPtrCmdStream, returnGpuPointer);
|
dispatchRelaxedOrderingReturnPtrRegs(relaxedOrderingReturnPtrCmdStream, returnGpuPointer, batchBuffer.hasRelaxedOrderingDependencies);
|
||||||
} else {
|
} else {
|
||||||
setReturnAddress(returnCmd, returnGpuPointer);
|
setReturnAddress(returnCmd, returnGpuPointer);
|
||||||
}
|
}
|
||||||
@@ -683,7 +682,7 @@ void *DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchWorkloadSection(BatchBu
|
|||||||
}
|
}
|
||||||
// mode 2 does not dispatch any commands
|
// mode 2 does not dispatch any commands
|
||||||
|
|
||||||
if (this->relaxedOrderingEnabled) {
|
if (this->relaxedOrderingEnabled && batchBuffer.hasRelaxedOrderingDependencies) {
|
||||||
dispatchTaskStoreSection(batchBuffer.taskStartAddress);
|
dispatchTaskStoreSection(batchBuffer.taskStartAddress);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -698,7 +697,7 @@ void *DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchWorkloadSection(BatchBu
|
|||||||
this->useNotifyForPostSync, this->partitionedMode, this->dcFlushRequired);
|
this->useNotifyForPostSync, this->partitionedMode, this->dcFlushRequired);
|
||||||
}
|
}
|
||||||
|
|
||||||
dispatchSemaphoreSection(currentQueueWorkCount + 1, false);
|
dispatchSemaphoreSection(currentQueueWorkCount + 1);
|
||||||
return currentPosition;
|
return currentPosition;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -708,25 +707,31 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchRelaxedOrderingQueueStal
|
|||||||
EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataRegBatchBufferStart());
|
EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataRegBatchBufferStart());
|
||||||
|
|
||||||
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R5, 1, true);
|
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R5, 1, true);
|
||||||
dispatchSemaphoreSection(currentQueueWorkCount, false);
|
dispatchSemaphoreSection(currentQueueWorkCount);
|
||||||
|
|
||||||
// patch conditional bb_start with current GPU address
|
// patch conditional bb_start with current GPU address
|
||||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(bbStartStream, ringCommandStream.getCurrentGpuAddressPosition(),
|
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(bbStartStream, ringCommandStream.getCurrentGpuAddressPosition(),
|
||||||
CS_GPR_R1, 0, CompareOperation::Equal, false);
|
CS_GPR_R1, 0, CompareOperation::Equal, false);
|
||||||
|
|
||||||
|
relaxedOrderingSchedulerRequired = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename GfxFamily, typename Dispatcher>
|
template <typename GfxFamily, typename Dispatcher>
|
||||||
size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeDispatchRelaxedOrderingQueueStall() {
|
size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeDispatchRelaxedOrderingQueueStall() {
|
||||||
return getSizeSemaphoreSection(false) + sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM) +
|
return getSizeSemaphoreSection(true) + sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM) +
|
||||||
EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataRegBatchBufferStart();
|
EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataRegBatchBufferStart();
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename GfxFamily, typename Dispatcher>
|
template <typename GfxFamily, typename Dispatcher>
|
||||||
void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchRelaxedOrderingReturnPtrRegs(LinearStream &cmdStream, uint64_t returnPtr) {
|
void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchRelaxedOrderingReturnPtrRegs(LinearStream &cmdStream, uint64_t returnPtr, bool hasRelaxedOrderingDependencies) {
|
||||||
LriHelper<GfxFamily>::program(&cmdStream, CS_GPR_R4, static_cast<uint32_t>(returnPtr & 0xFFFF'FFFFULL), true);
|
LriHelper<GfxFamily>::program(&cmdStream, CS_GPR_R4, static_cast<uint32_t>(returnPtr & 0xFFFF'FFFFULL), true);
|
||||||
LriHelper<GfxFamily>::program(&cmdStream, CS_GPR_R4 + 4, static_cast<uint32_t>(returnPtr >> 32), true);
|
LriHelper<GfxFamily>::program(&cmdStream, CS_GPR_R4 + 4, static_cast<uint32_t>(returnPtr >> 32), true);
|
||||||
|
|
||||||
uint64_t returnPtrAfterTaskStoreSection = returnPtr + RelaxedOrderingHelper::getSizeTaskStoreSection<GfxFamily>();
|
uint64_t returnPtrAfterTaskStoreSection = returnPtr;
|
||||||
|
|
||||||
|
if (hasRelaxedOrderingDependencies) {
|
||||||
|
returnPtrAfterTaskStoreSection += RelaxedOrderingHelper::getSizeTaskStoreSection<GfxFamily>();
|
||||||
|
}
|
||||||
|
|
||||||
LriHelper<GfxFamily>::program(&cmdStream, CS_GPR_R3, static_cast<uint32_t>(returnPtrAfterTaskStoreSection & 0xFFFF'FFFFULL), true);
|
LriHelper<GfxFamily>::program(&cmdStream, CS_GPR_R3, static_cast<uint32_t>(returnPtrAfterTaskStoreSection & 0xFFFF'FFFFULL), true);
|
||||||
LriHelper<GfxFamily>::program(&cmdStream, CS_GPR_R3 + 4, static_cast<uint32_t>(returnPtrAfterTaskStoreSection >> 32), true);
|
LriHelper<GfxFamily>::program(&cmdStream, CS_GPR_R3 + 4, static_cast<uint32_t>(returnPtrAfterTaskStoreSection >> 32), true);
|
||||||
@@ -846,24 +851,32 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchCommandBuffer(BatchBuffe
|
|||||||
|
|
||||||
this->startRingBuffer();
|
this->startRingBuffer();
|
||||||
|
|
||||||
size_t dispatchSize = getSizeDispatch();
|
bool relaxedOrderingSchedulerWillBeNeeded = (this->relaxedOrderingSchedulerRequired || batchBuffer.hasRelaxedOrderingDependencies);
|
||||||
|
|
||||||
|
size_t dispatchSize = getSizeDispatch(relaxedOrderingSchedulerWillBeNeeded);
|
||||||
size_t cycleSize = getSizeSwitchRingBufferSection();
|
size_t cycleSize = getSizeSwitchRingBufferSection();
|
||||||
size_t requiredMinimalSize = dispatchSize + cycleSize + getSizeEnd();
|
size_t requiredMinimalSize = dispatchSize + cycleSize + getSizeEnd(relaxedOrderingSchedulerWillBeNeeded);
|
||||||
if (this->relaxedOrderingEnabled) {
|
if (this->relaxedOrderingEnabled) {
|
||||||
if (batchBuffer.hasStallingCmds && !firstSubmissionAfterRingStart) {
|
requiredMinimalSize += +RelaxedOrderingHelper::getSizeReturnPtrRegs<GfxFamily>();
|
||||||
|
|
||||||
|
if (batchBuffer.hasStallingCmds && this->relaxedOrderingSchedulerRequired) {
|
||||||
requiredMinimalSize += getSizeDispatchRelaxedOrderingQueueStall();
|
requiredMinimalSize += getSizeDispatchRelaxedOrderingQueueStall();
|
||||||
}
|
}
|
||||||
requiredMinimalSize += RelaxedOrderingHelper::getSizeTaskStoreSection<GfxFamily>() + RelaxedOrderingHelper::getSizeReturnPtrRegs<GfxFamily>();
|
if (batchBuffer.hasRelaxedOrderingDependencies) {
|
||||||
|
requiredMinimalSize += RelaxedOrderingHelper::getSizeTaskStoreSection<GfxFamily>();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ringCommandStream.getAvailableSpace() < requiredMinimalSize) {
|
if (ringCommandStream.getAvailableSpace() < requiredMinimalSize) {
|
||||||
switchRingBuffers();
|
switchRingBuffers();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (this->relaxedOrderingEnabled && batchBuffer.hasStallingCmds && !firstSubmissionAfterRingStart) {
|
if (this->relaxedOrderingEnabled && batchBuffer.hasStallingCmds && this->relaxedOrderingSchedulerRequired) {
|
||||||
dispatchRelaxedOrderingQueueStall();
|
dispatchRelaxedOrderingQueueStall();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
this->relaxedOrderingSchedulerRequired |= batchBuffer.hasRelaxedOrderingDependencies;
|
||||||
|
|
||||||
handleNewResourcesSubmission();
|
handleNewResourcesSubmission();
|
||||||
|
|
||||||
void *currentPosition = dispatchWorkloadSection(batchBuffer);
|
void *currentPosition = dispatchWorkloadSection(batchBuffer);
|
||||||
@@ -890,8 +903,6 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchCommandBuffer(BatchBuffe
|
|||||||
uint64_t flushValue = updateTagValue();
|
uint64_t flushValue = updateTagValue();
|
||||||
flushStamp.setStamp(flushValue);
|
flushStamp.setStamp(flushValue);
|
||||||
|
|
||||||
firstSubmissionAfterRingStart = false;
|
|
||||||
|
|
||||||
return ringStart;
|
return ringStart;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -26,7 +26,8 @@ struct BatchBufferHelper {
|
|||||||
stream, // stream
|
stream, // stream
|
||||||
nullptr, // endCmdPtr
|
nullptr, // endCmdPtr
|
||||||
false, // useSingleSubdevice
|
false, // useSingleSubdevice
|
||||||
false // hasStallingCmds
|
false, // hasStallingCmds
|
||||||
|
false // hasRelaxedOrderingDependencies
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -43,7 +43,8 @@ struct DispatchFlagsHelper {
|
|||||||
false, // areMultipleSubDevicesInContext
|
false, // areMultipleSubDevicesInContext
|
||||||
false, // memoryMigrationRequired
|
false, // memoryMigrationRequired
|
||||||
false, // textureCacheFlush
|
false, // textureCacheFlush
|
||||||
false // hasStallingCmds
|
false, // hasStallingCmds
|
||||||
|
false // hasRelaxedOrderingDependencies
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -32,6 +32,7 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw<GfxFamily, Dispatcher>
|
|||||||
using BaseClass::dispatchDisablePrefetcher;
|
using BaseClass::dispatchDisablePrefetcher;
|
||||||
using BaseClass::dispatchPartitionRegisterConfiguration;
|
using BaseClass::dispatchPartitionRegisterConfiguration;
|
||||||
using BaseClass::dispatchPrefetchMitigation;
|
using BaseClass::dispatchPrefetchMitigation;
|
||||||
|
using BaseClass::dispatchRelaxedOrderingReturnPtrRegs;
|
||||||
using BaseClass::dispatchSemaphoreSection;
|
using BaseClass::dispatchSemaphoreSection;
|
||||||
using BaseClass::dispatchStartSection;
|
using BaseClass::dispatchStartSection;
|
||||||
using BaseClass::dispatchSwitchRingBufferSection;
|
using BaseClass::dispatchSwitchRingBufferSection;
|
||||||
@@ -39,6 +40,7 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw<GfxFamily, Dispatcher>
|
|||||||
using BaseClass::getDiagnosticModeSection;
|
using BaseClass::getDiagnosticModeSection;
|
||||||
using BaseClass::getSizeDisablePrefetcher;
|
using BaseClass::getSizeDisablePrefetcher;
|
||||||
using BaseClass::getSizeDispatch;
|
using BaseClass::getSizeDispatch;
|
||||||
|
using BaseClass::getSizeDispatchRelaxedOrderingQueueStall;
|
||||||
using BaseClass::getSizeEnd;
|
using BaseClass::getSizeEnd;
|
||||||
using BaseClass::getSizePartitionRegisterConfigurationSection;
|
using BaseClass::getSizePartitionRegisterConfigurationSection;
|
||||||
using BaseClass::getSizePrefetchMitigation;
|
using BaseClass::getSizePrefetchMitigation;
|
||||||
@@ -47,6 +49,7 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw<GfxFamily, Dispatcher>
|
|||||||
using BaseClass::getSizeSwitchRingBufferSection;
|
using BaseClass::getSizeSwitchRingBufferSection;
|
||||||
using BaseClass::getSizeSystemMemoryFenceAddress;
|
using BaseClass::getSizeSystemMemoryFenceAddress;
|
||||||
using BaseClass::hwInfo;
|
using BaseClass::hwInfo;
|
||||||
|
using BaseClass::isDisablePrefetcherRequired;
|
||||||
using BaseClass::miMemFenceRequired;
|
using BaseClass::miMemFenceRequired;
|
||||||
using BaseClass::osContext;
|
using BaseClass::osContext;
|
||||||
using BaseClass::partitionConfigSet;
|
using BaseClass::partitionConfigSet;
|
||||||
@@ -57,6 +60,7 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw<GfxFamily, Dispatcher>
|
|||||||
using BaseClass::preinitializedTaskStoreSection;
|
using BaseClass::preinitializedTaskStoreSection;
|
||||||
using BaseClass::relaxedOrderingInitialized;
|
using BaseClass::relaxedOrderingInitialized;
|
||||||
using BaseClass::relaxedOrderingSchedulerAllocation;
|
using BaseClass::relaxedOrderingSchedulerAllocation;
|
||||||
|
using BaseClass::relaxedOrderingSchedulerRequired;
|
||||||
using BaseClass::reserved;
|
using BaseClass::reserved;
|
||||||
using BaseClass::ringBuffers;
|
using BaseClass::ringBuffers;
|
||||||
using BaseClass::ringCommandStream;
|
using BaseClass::ringCommandStream;
|
||||||
@@ -98,6 +102,21 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw<GfxFamily, Dispatcher>
|
|||||||
BaseClass::dispatchStaticRelaxedOrderingScheduler();
|
BaseClass::dispatchStaticRelaxedOrderingScheduler();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void dispatchRelaxedOrderingSchedulerSection(uint32_t value) override {
|
||||||
|
dispatchRelaxedOrderingSchedulerSectionCalled++;
|
||||||
|
BaseClass::dispatchRelaxedOrderingSchedulerSection(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
void dispatchRelaxedOrderingQueueStall() override {
|
||||||
|
dispatchRelaxedOrderingQueueStallCalled++;
|
||||||
|
BaseClass::dispatchRelaxedOrderingQueueStall();
|
||||||
|
}
|
||||||
|
|
||||||
|
void dispatchTaskStoreSection(uint64_t taskStartSectionVa) override {
|
||||||
|
dispatchTaskStoreSectionCalled++;
|
||||||
|
BaseClass::dispatchTaskStoreSection(taskStartSectionVa);
|
||||||
|
}
|
||||||
|
|
||||||
bool makeResourcesResident(DirectSubmissionAllocations &allocations) override {
|
bool makeResourcesResident(DirectSubmissionAllocations &allocations) override {
|
||||||
makeResourcesResidentVectorSize = static_cast<uint32_t>(allocations.size());
|
makeResourcesResidentVectorSize = static_cast<uint32_t>(allocations.size());
|
||||||
if (callBaseResident) {
|
if (callBaseResident) {
|
||||||
@@ -155,6 +174,9 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw<GfxFamily, Dispatcher>
|
|||||||
uint32_t disabledDiagnosticCalled = 0u;
|
uint32_t disabledDiagnosticCalled = 0u;
|
||||||
uint32_t preinitializeRelaxedOrderingSectionsCalled = 0;
|
uint32_t preinitializeRelaxedOrderingSectionsCalled = 0;
|
||||||
uint32_t dispatchStaticRelaxedOrderingSchedulerCalled = 0;
|
uint32_t dispatchStaticRelaxedOrderingSchedulerCalled = 0;
|
||||||
|
uint32_t dispatchRelaxedOrderingSchedulerSectionCalled = 0;
|
||||||
|
uint32_t dispatchRelaxedOrderingQueueStallCalled = 0;
|
||||||
|
uint32_t dispatchTaskStoreSectionCalled = 0;
|
||||||
uint32_t makeResourcesResidentVectorSize = 0u;
|
uint32_t makeResourcesResidentVectorSize = 0u;
|
||||||
bool allocateOsResourcesReturn = true;
|
bool allocateOsResourcesReturn = true;
|
||||||
bool submitReturn = true;
|
bool submitReturn = true;
|
||||||
|
|||||||
@@ -592,7 +592,9 @@ HWTEST_F(AubFileStreamTests, givenAubCommandStreamReceiverWhenFlushIsCalledThenI
|
|||||||
aubCsr.initializeTagAllocation();
|
aubCsr.initializeTagAllocation();
|
||||||
|
|
||||||
LinearStream cs(commandBuffer);
|
LinearStream cs(commandBuffer);
|
||||||
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 1, 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false, false};
|
BatchBuffer batchBuffer = BatchBufferHelper::createDefaultBatchBuffer(cs.getGraphicsAllocation(), &cs, cs.getUsed());
|
||||||
|
batchBuffer.startOffset = 1;
|
||||||
|
|
||||||
ResidencyContainer allocationsForResidency;
|
ResidencyContainer allocationsForResidency;
|
||||||
|
|
||||||
aubCsr.flush(batchBuffer, allocationsForResidency);
|
aubCsr.flush(batchBuffer, allocationsForResidency);
|
||||||
|
|||||||
@@ -95,6 +95,6 @@ struct ComputeModeRequirements : public ::testing::Test {
|
|||||||
|
|
||||||
CommandStreamReceiver *csr = nullptr;
|
CommandStreamReceiver *csr = nullptr;
|
||||||
std::unique_ptr<MockDevice> device;
|
std::unique_ptr<MockDevice> device;
|
||||||
DispatchFlags flags{{}, nullptr, {}, nullptr, QueueThrottle::MEDIUM, PreemptionMode::Disabled, GrfConfig::DefaultGrfNumber, L3CachingSettings::l3CacheOn, ThreadArbitrationPolicy::NotPresent, AdditionalKernelExecInfo::NotApplicable, KernelExecutionType::NotApplicable, MemoryCompressionState::NotApplicable, QueueSliceCount::defaultSliceCount, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false};
|
DispatchFlags flags{{}, nullptr, {}, nullptr, QueueThrottle::MEDIUM, PreemptionMode::Disabled, GrfConfig::DefaultGrfNumber, L3CachingSettings::l3CacheOn, ThreadArbitrationPolicy::NotPresent, AdditionalKernelExecInfo::NotApplicable, KernelExecutionType::NotApplicable, MemoryCompressionState::NotApplicable, QueueSliceCount::defaultSliceCount, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false};
|
||||||
GraphicsAllocation *alloc = nullptr;
|
GraphicsAllocation *alloc = nullptr;
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -467,7 +467,9 @@ HWTEST_F(TbxCommandStreamTests, givenTbxCommandStreamReceiverWhenFlushIsCalledTh
|
|||||||
auto commandBuffer = pDevice->executionEnvironment->memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize, pDevice->getDeviceBitfield()});
|
auto commandBuffer = pDevice->executionEnvironment->memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize, pDevice->getDeviceBitfield()});
|
||||||
|
|
||||||
LinearStream cs(commandBuffer);
|
LinearStream cs(commandBuffer);
|
||||||
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 1, 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false, false};
|
BatchBuffer batchBuffer = BatchBufferHelper::createDefaultBatchBuffer(cs.getGraphicsAllocation(), &cs, cs.getUsed());
|
||||||
|
batchBuffer.startOffset = 1;
|
||||||
|
|
||||||
MockGraphicsAllocation allocation(reinterpret_cast<void *>(0x1000), 0x1000);
|
MockGraphicsAllocation allocation(reinterpret_cast<void *>(0x1000), 0x1000);
|
||||||
ResidencyContainer allocationsForResidency = {&allocation};
|
ResidencyContainer allocationsForResidency = {&allocation};
|
||||||
|
|
||||||
@@ -493,7 +495,8 @@ HWTEST_F(TbxCommandStreamTests, givenTbxCommandStreamReceiverInBatchedModeWhenFl
|
|||||||
auto commandBuffer = pDevice->executionEnvironment->memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize, pDevice->getDeviceBitfield()});
|
auto commandBuffer = pDevice->executionEnvironment->memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{pDevice->getRootDeviceIndex(), MemoryConstants::pageSize, pDevice->getDeviceBitfield()});
|
||||||
|
|
||||||
LinearStream cs(commandBuffer);
|
LinearStream cs(commandBuffer);
|
||||||
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 1, 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false, false};
|
BatchBuffer batchBuffer = BatchBufferHelper::createDefaultBatchBuffer(cs.getGraphicsAllocation(), &cs, cs.getUsed());
|
||||||
|
batchBuffer.startOffset = 1;
|
||||||
ResidencyContainer allocationsForResidency;
|
ResidencyContainer allocationsForResidency;
|
||||||
|
|
||||||
tbxCsr.flush(batchBuffer, allocationsForResidency);
|
tbxCsr.flush(batchBuffer, allocationsForResidency);
|
||||||
|
|||||||
@@ -380,7 +380,7 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionStopWhenStopRingIsCalledThen
|
|||||||
|
|
||||||
directSubmission.stopRingBuffer();
|
directSubmission.stopRingBuffer();
|
||||||
|
|
||||||
size_t expectedDispatchSize = alreadyDispatchedSize + directSubmission.getSizeEnd();
|
size_t expectedDispatchSize = alreadyDispatchedSize + directSubmission.getSizeEnd(false);
|
||||||
EXPECT_LE(directSubmission.ringCommandStream.getUsed(), expectedDispatchSize);
|
EXPECT_LE(directSubmission.ringCommandStream.getUsed(), expectedDispatchSize);
|
||||||
EXPECT_GE(directSubmission.ringCommandStream.getUsed() + MemoryConstants::cacheLineSize, expectedDispatchSize);
|
EXPECT_GE(directSubmission.ringCommandStream.getUsed() + MemoryConstants::cacheLineSize, expectedDispatchSize);
|
||||||
EXPECT_EQ(oldQueueCount + 1, directSubmission.semaphoreData->QueueWorkCount);
|
EXPECT_EQ(oldQueueCount + 1, directSubmission.semaphoreData->QueueWorkCount);
|
||||||
@@ -393,7 +393,7 @@ HWTEST_F(DirectSubmissionTest,
|
|||||||
using Dispatcher = RenderDispatcher<FamilyType>;
|
using Dispatcher = RenderDispatcher<FamilyType>;
|
||||||
|
|
||||||
MockDirectSubmissionHw<FamilyType, Dispatcher> regularDirectSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
MockDirectSubmissionHw<FamilyType, Dispatcher> regularDirectSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||||
size_t regularSizeEnd = regularDirectSubmission.getSizeEnd();
|
size_t regularSizeEnd = regularDirectSubmission.getSizeEnd(false);
|
||||||
|
|
||||||
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||||
|
|
||||||
@@ -405,7 +405,7 @@ HWTEST_F(DirectSubmissionTest,
|
|||||||
|
|
||||||
size_t tagUpdateSize = Dispatcher::getSizeMonitorFence(*directSubmission.hwInfo);
|
size_t tagUpdateSize = Dispatcher::getSizeMonitorFence(*directSubmission.hwInfo);
|
||||||
|
|
||||||
size_t disabledSizeEnd = directSubmission.getSizeEnd();
|
size_t disabledSizeEnd = directSubmission.getSizeEnd(false);
|
||||||
EXPECT_EQ(disabledSizeEnd, regularSizeEnd + tagUpdateSize);
|
EXPECT_EQ(disabledSizeEnd, regularSizeEnd + tagUpdateSize);
|
||||||
|
|
||||||
directSubmission.tagValueSetValue = 0x4343123ull;
|
directSubmission.tagValueSetValue = 0x4343123ull;
|
||||||
@@ -441,7 +441,7 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWhenDispatchSemaphoreThenExp
|
|||||||
bool ret = directSubmission.initialize(false, false);
|
bool ret = directSubmission.initialize(false, false);
|
||||||
EXPECT_TRUE(ret);
|
EXPECT_TRUE(ret);
|
||||||
|
|
||||||
directSubmission.dispatchSemaphoreSection(1u, false);
|
directSubmission.dispatchSemaphoreSection(1u);
|
||||||
EXPECT_EQ(directSubmission.getSizeSemaphoreSection(false), directSubmission.ringCommandStream.getUsed());
|
EXPECT_EQ(directSubmission.getSizeSemaphoreSection(false), directSubmission.ringCommandStream.getUsed());
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -510,7 +510,7 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWhenGetDispatchSizeThenExpec
|
|||||||
Dispatcher::getSizeCacheFlush(*directSubmission.hwInfo) +
|
Dispatcher::getSizeCacheFlush(*directSubmission.hwInfo) +
|
||||||
Dispatcher::getSizeMonitorFence(*directSubmission.hwInfo) +
|
Dispatcher::getSizeMonitorFence(*directSubmission.hwInfo) +
|
||||||
directSubmission.getSizeSemaphoreSection(false);
|
directSubmission.getSizeSemaphoreSection(false);
|
||||||
size_t actualSize = directSubmission.getSizeDispatch();
|
size_t actualSize = directSubmission.getSizeDispatch(false);
|
||||||
EXPECT_EQ(expectedSize, actualSize);
|
EXPECT_EQ(expectedSize, actualSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -526,7 +526,7 @@ HWTEST_F(DirectSubmissionTest,
|
|||||||
Dispatcher::getSizeCacheFlush(*directSubmission.hwInfo) +
|
Dispatcher::getSizeCacheFlush(*directSubmission.hwInfo) +
|
||||||
Dispatcher::getSizeMonitorFence(*directSubmission.hwInfo) +
|
Dispatcher::getSizeMonitorFence(*directSubmission.hwInfo) +
|
||||||
directSubmission.getSizeSemaphoreSection(false);
|
directSubmission.getSizeSemaphoreSection(false);
|
||||||
size_t actualSize = directSubmission.getSizeDispatch();
|
size_t actualSize = directSubmission.getSizeDispatch(false);
|
||||||
EXPECT_EQ(expectedSize, actualSize);
|
EXPECT_EQ(expectedSize, actualSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -541,7 +541,7 @@ HWTEST_F(DirectSubmissionTest,
|
|||||||
size_t expectedSize = Dispatcher::getSizeCacheFlush(*directSubmission.hwInfo) +
|
size_t expectedSize = Dispatcher::getSizeCacheFlush(*directSubmission.hwInfo) +
|
||||||
Dispatcher::getSizeMonitorFence(*directSubmission.hwInfo) +
|
Dispatcher::getSizeMonitorFence(*directSubmission.hwInfo) +
|
||||||
directSubmission.getSizeSemaphoreSection(false);
|
directSubmission.getSizeSemaphoreSection(false);
|
||||||
size_t actualSize = directSubmission.getSizeDispatch();
|
size_t actualSize = directSubmission.getSizeDispatch(false);
|
||||||
EXPECT_EQ(expectedSize, actualSize);
|
EXPECT_EQ(expectedSize, actualSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -554,7 +554,7 @@ HWTEST_F(DirectSubmissionTest,
|
|||||||
size_t expectedSize = directSubmission.getSizeStartSection() +
|
size_t expectedSize = directSubmission.getSizeStartSection() +
|
||||||
Dispatcher::getSizeMonitorFence(*directSubmission.hwInfo) +
|
Dispatcher::getSizeMonitorFence(*directSubmission.hwInfo) +
|
||||||
directSubmission.getSizeSemaphoreSection(false);
|
directSubmission.getSizeSemaphoreSection(false);
|
||||||
size_t actualSize = directSubmission.getSizeDispatch();
|
size_t actualSize = directSubmission.getSizeDispatch(false);
|
||||||
EXPECT_EQ(expectedSize, actualSize);
|
EXPECT_EQ(expectedSize, actualSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -568,7 +568,7 @@ HWTEST_F(DirectSubmissionTest,
|
|||||||
size_t expectedSize = directSubmission.getSizeStartSection() +
|
size_t expectedSize = directSubmission.getSizeStartSection() +
|
||||||
Dispatcher::getSizeCacheFlush(*directSubmission.hwInfo) +
|
Dispatcher::getSizeCacheFlush(*directSubmission.hwInfo) +
|
||||||
directSubmission.getSizeSemaphoreSection(false);
|
directSubmission.getSizeSemaphoreSection(false);
|
||||||
size_t actualSize = directSubmission.getSizeDispatch();
|
size_t actualSize = directSubmission.getSizeDispatch(false);
|
||||||
EXPECT_EQ(expectedSize, actualSize);
|
EXPECT_EQ(expectedSize, actualSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -581,7 +581,7 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWhenGetEndSizeThenExpectCorr
|
|||||||
Dispatcher::getSizeCacheFlush(*directSubmission.hwInfo) +
|
Dispatcher::getSizeCacheFlush(*directSubmission.hwInfo) +
|
||||||
(Dispatcher::getSizeStartCommandBuffer() - Dispatcher::getSizeStopCommandBuffer()) +
|
(Dispatcher::getSizeStartCommandBuffer() - Dispatcher::getSizeStopCommandBuffer()) +
|
||||||
MemoryConstants::cacheLineSize;
|
MemoryConstants::cacheLineSize;
|
||||||
size_t actualSize = directSubmission.getSizeEnd();
|
size_t actualSize = directSubmission.getSizeEnd(false);
|
||||||
EXPECT_EQ(expectedSize, actualSize);
|
EXPECT_EQ(expectedSize, actualSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -872,7 +872,7 @@ HWTEST_F(DirectSubmissionTest,
|
|||||||
size_t expectedSize = Dispatcher::getSizePreemption() +
|
size_t expectedSize = Dispatcher::getSizePreemption() +
|
||||||
directSubmission.getSizeSemaphoreSection(false) +
|
directSubmission.getSizeSemaphoreSection(false) +
|
||||||
directSubmission.getDiagnosticModeSection();
|
directSubmission.getDiagnosticModeSection();
|
||||||
expectedSize += expectedExecCount * directSubmission.getSizeDispatch();
|
expectedSize += expectedExecCount * directSubmission.getSizeDispatch(false);
|
||||||
|
|
||||||
if (directSubmission.miMemFenceRequired) {
|
if (directSubmission.miMemFenceRequired) {
|
||||||
expectedSize += directSubmission.getSizeSystemMemoryFenceAddress();
|
expectedSize += directSubmission.getSizeSystemMemoryFenceAddress();
|
||||||
@@ -969,7 +969,7 @@ HWTEST_F(DirectSubmissionTest,
|
|||||||
size_t expectedSize = Dispatcher::getSizePreemption() +
|
size_t expectedSize = Dispatcher::getSizePreemption() +
|
||||||
directSubmission.getSizeSemaphoreSection(false);
|
directSubmission.getSizeSemaphoreSection(false);
|
||||||
size_t expectedDispatch = directSubmission.getSizeSemaphoreSection(false);
|
size_t expectedDispatch = directSubmission.getSizeSemaphoreSection(false);
|
||||||
EXPECT_EQ(expectedDispatch, directSubmission.getSizeDispatch());
|
EXPECT_EQ(expectedDispatch, directSubmission.getSizeDispatch(false));
|
||||||
expectedSize += expectedExecCount * expectedDispatch;
|
expectedSize += expectedExecCount * expectedDispatch;
|
||||||
|
|
||||||
if (directSubmission.miMemFenceRequired) {
|
if (directSubmission.miMemFenceRequired) {
|
||||||
|
|||||||
@@ -224,7 +224,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DirectSubmissionDispatchBufferTest,
|
|||||||
EXPECT_EQ(1u, directSubmission.submitCount);
|
EXPECT_EQ(1u, directSubmission.submitCount);
|
||||||
EXPECT_EQ(2u, directSubmission.handleResidencyCount);
|
EXPECT_EQ(2u, directSubmission.handleResidencyCount);
|
||||||
|
|
||||||
EXPECT_EQ(directSubmission.getSizeDispatch(), directSubmission.ringCommandStream.getUsed());
|
EXPECT_EQ(directSubmission.getSizeDispatch(false), directSubmission.ringCommandStream.getUsed());
|
||||||
EXPECT_TRUE(directSubmission.ringStart);
|
EXPECT_TRUE(directSubmission.ringStart);
|
||||||
|
|
||||||
HardwareParse hwParse;
|
HardwareParse hwParse;
|
||||||
@@ -257,7 +257,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
|
|||||||
DebugManager.flags.DirectSubmissionDisableCacheFlush.set(0);
|
DebugManager.flags.DirectSubmissionDisableCacheFlush.set(0);
|
||||||
|
|
||||||
MockDirectSubmissionHw<FamilyType, Dispatcher> regularDirectSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
MockDirectSubmissionHw<FamilyType, Dispatcher> regularDirectSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||||
size_t regularSizeDispatch = regularDirectSubmission.getSizeDispatch();
|
size_t regularSizeDispatch = regularDirectSubmission.getSizeDispatch(false);
|
||||||
|
|
||||||
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||||
directSubmission.disableMonitorFence = true;
|
directSubmission.disableMonitorFence = true;
|
||||||
@@ -267,7 +267,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
|
|||||||
|
|
||||||
size_t tagUpdateSize = Dispatcher::getSizeMonitorFence(*directSubmission.hwInfo);
|
size_t tagUpdateSize = Dispatcher::getSizeMonitorFence(*directSubmission.hwInfo);
|
||||||
|
|
||||||
size_t disabledSizeDispatch = directSubmission.getSizeDispatch();
|
size_t disabledSizeDispatch = directSubmission.getSizeDispatch(false);
|
||||||
EXPECT_EQ(disabledSizeDispatch, (regularSizeDispatch - tagUpdateSize));
|
EXPECT_EQ(disabledSizeDispatch, (regularSizeDispatch - tagUpdateSize));
|
||||||
|
|
||||||
directSubmission.tagValueSetValue = 0x4343123ull;
|
directSubmission.tagValueSetValue = 0x4343123ull;
|
||||||
@@ -306,7 +306,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
|
|||||||
DebugManager.flags.DirectSubmissionDisableCacheFlush.set(0);
|
DebugManager.flags.DirectSubmissionDisableCacheFlush.set(0);
|
||||||
|
|
||||||
MockDirectSubmissionHw<FamilyType, Dispatcher> regularDirectSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
MockDirectSubmissionHw<FamilyType, Dispatcher> regularDirectSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||||
size_t regularSizeDispatch = regularDirectSubmission.getSizeDispatch();
|
size_t regularSizeDispatch = regularDirectSubmission.getSizeDispatch(false);
|
||||||
|
|
||||||
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||||
|
|
||||||
@@ -316,7 +316,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
|
|||||||
|
|
||||||
size_t flushSize = Dispatcher::getSizeCacheFlush(*directSubmission.hwInfo);
|
size_t flushSize = Dispatcher::getSizeCacheFlush(*directSubmission.hwInfo);
|
||||||
|
|
||||||
size_t disabledSizeDispatch = directSubmission.getSizeDispatch();
|
size_t disabledSizeDispatch = directSubmission.getSizeDispatch(false);
|
||||||
EXPECT_EQ(disabledSizeDispatch, (regularSizeDispatch - flushSize));
|
EXPECT_EQ(disabledSizeDispatch, (regularSizeDispatch - flushSize));
|
||||||
|
|
||||||
directSubmission.dispatchWorkloadSection(batchBuffer);
|
directSubmission.dispatchWorkloadSection(batchBuffer);
|
||||||
@@ -354,7 +354,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
|
|||||||
|
|
||||||
MockDirectSubmissionHw<FamilyType, Dispatcher> regularDirectSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
MockDirectSubmissionHw<FamilyType, Dispatcher> regularDirectSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||||
|
|
||||||
size_t regularSizeDispatch = regularDirectSubmission.getSizeDispatch();
|
size_t regularSizeDispatch = regularDirectSubmission.getSizeDispatch(false);
|
||||||
|
|
||||||
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||||
|
|
||||||
@@ -365,7 +365,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
|
|||||||
size_t startSize = directSubmission.getSizeStartSection();
|
size_t startSize = directSubmission.getSizeStartSection();
|
||||||
size_t storeDataSize = Dispatcher::getSizeStoreDwordCommand();
|
size_t storeDataSize = Dispatcher::getSizeStoreDwordCommand();
|
||||||
|
|
||||||
size_t debugSizeDispatch = directSubmission.getSizeDispatch();
|
size_t debugSizeDispatch = directSubmission.getSizeDispatch(false);
|
||||||
EXPECT_EQ(debugSizeDispatch, (regularSizeDispatch - startSize + storeDataSize));
|
EXPECT_EQ(debugSizeDispatch, (regularSizeDispatch - startSize + storeDataSize));
|
||||||
|
|
||||||
directSubmission.workloadModeOneExpectedValue = 0x40u;
|
directSubmission.workloadModeOneExpectedValue = 0x40u;
|
||||||
@@ -398,7 +398,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
|
|||||||
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
|
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
|
||||||
|
|
||||||
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> regularDirectSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> regularDirectSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||||
size_t regularSizeDispatch = regularDirectSubmission.getSizeDispatch();
|
size_t regularSizeDispatch = regularDirectSubmission.getSizeDispatch(false);
|
||||||
|
|
||||||
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||||
|
|
||||||
@@ -408,7 +408,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
|
|||||||
|
|
||||||
size_t startSize = directSubmission.getSizeStartSection();
|
size_t startSize = directSubmission.getSizeStartSection();
|
||||||
|
|
||||||
size_t debugSizeDispatch = directSubmission.getSizeDispatch();
|
size_t debugSizeDispatch = directSubmission.getSizeDispatch(false);
|
||||||
EXPECT_EQ(debugSizeDispatch, (regularSizeDispatch - startSize));
|
EXPECT_EQ(debugSizeDispatch, (regularSizeDispatch - startSize));
|
||||||
|
|
||||||
directSubmission.currentQueueWorkCount = 0x40u;
|
directSubmission.currentQueueWorkCount = 0x40u;
|
||||||
@@ -463,7 +463,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
|
|||||||
EXPECT_EQ(1u, directSubmission.submitCount);
|
EXPECT_EQ(1u, directSubmission.submitCount);
|
||||||
EXPECT_EQ(2u, directSubmission.handleResidencyCount);
|
EXPECT_EQ(2u, directSubmission.handleResidencyCount);
|
||||||
|
|
||||||
EXPECT_EQ(sizeUsed + directSubmission.getSizeDispatch(), directSubmission.ringCommandStream.getUsed());
|
EXPECT_EQ(sizeUsed + directSubmission.getSizeDispatch(false), directSubmission.ringCommandStream.getUsed());
|
||||||
EXPECT_TRUE(directSubmission.ringStart);
|
EXPECT_TRUE(directSubmission.ringStart);
|
||||||
|
|
||||||
HardwareParse hwParse;
|
HardwareParse hwParse;
|
||||||
@@ -502,7 +502,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
|
|||||||
EXPECT_EQ(oldRingAllocation->getGpuAddress(), directSubmission.submitGpuAddress);
|
EXPECT_EQ(oldRingAllocation->getGpuAddress(), directSubmission.submitGpuAddress);
|
||||||
EXPECT_EQ(2u, directSubmission.handleResidencyCount);
|
EXPECT_EQ(2u, directSubmission.handleResidencyCount);
|
||||||
|
|
||||||
size_t dispatchSize = submitSize + directSubmission.getSizeDispatch();
|
size_t dispatchSize = submitSize + directSubmission.getSizeDispatch(false);
|
||||||
|
|
||||||
EXPECT_EQ(dispatchSize, directSubmission.ringCommandStream.getUsed());
|
EXPECT_EQ(dispatchSize, directSubmission.ringCommandStream.getUsed());
|
||||||
EXPECT_TRUE(directSubmission.ringStart);
|
EXPECT_TRUE(directSubmission.ringStart);
|
||||||
@@ -542,7 +542,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
|
|||||||
EXPECT_EQ(1u, directSubmission.submitCount);
|
EXPECT_EQ(1u, directSubmission.submitCount);
|
||||||
EXPECT_EQ(2u, directSubmission.handleResidencyCount);
|
EXPECT_EQ(2u, directSubmission.handleResidencyCount);
|
||||||
|
|
||||||
EXPECT_EQ(directSubmission.getSizeDispatch(), directSubmission.ringCommandStream.getUsed());
|
EXPECT_EQ(directSubmission.getSizeDispatch(false), directSubmission.ringCommandStream.getUsed());
|
||||||
EXPECT_TRUE(directSubmission.ringStart);
|
EXPECT_TRUE(directSubmission.ringStart);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -576,7 +576,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
|
|||||||
EXPECT_EQ(submitSize, directSubmission.submitSize);
|
EXPECT_EQ(submitSize, directSubmission.submitSize);
|
||||||
EXPECT_EQ(2u, directSubmission.handleResidencyCount);
|
EXPECT_EQ(2u, directSubmission.handleResidencyCount);
|
||||||
|
|
||||||
size_t dispatchSize = submitSize + directSubmission.getSizeDispatch();
|
size_t dispatchSize = submitSize + directSubmission.getSizeDispatch(false);
|
||||||
|
|
||||||
EXPECT_EQ(dispatchSize, directSubmission.ringCommandStream.getUsed());
|
EXPECT_EQ(dispatchSize, directSubmission.ringCommandStream.getUsed());
|
||||||
EXPECT_TRUE(directSubmission.ringStart);
|
EXPECT_TRUE(directSubmission.ringStart);
|
||||||
@@ -949,6 +949,7 @@ struct DirectSubmissionRelaxedOrderingTests : public DirectSubmissionDispatchBuf
|
|||||||
bool verifyBbStart(typename FamilyType::MI_BATCH_BUFFER_START *cmd, uint64_t startAddress, bool indirect, bool predicate);
|
bool verifyBbStart(typename FamilyType::MI_BATCH_BUFFER_START *cmd, uint64_t startAddress, bool indirect, bool predicate);
|
||||||
|
|
||||||
DebugManagerStateRestore restore;
|
DebugManagerStateRestore restore;
|
||||||
|
FlushStampTracker flushStamp{true};
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename FamilyType>
|
template <typename FamilyType>
|
||||||
@@ -1668,7 +1669,6 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, whenInitializingThenDispatchStat
|
|||||||
directSubmission.startRingBuffer();
|
directSubmission.startRingBuffer();
|
||||||
EXPECT_EQ(1u, directSubmission.dispatchStaticRelaxedOrderingSchedulerCalled);
|
EXPECT_EQ(1u, directSubmission.dispatchStaticRelaxedOrderingSchedulerCalled);
|
||||||
|
|
||||||
FlushStampTracker flushStamp(true);
|
|
||||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||||
EXPECT_EQ(1u, directSubmission.dispatchStaticRelaxedOrderingSchedulerCalled);
|
EXPECT_EQ(1u, directSubmission.dispatchStaticRelaxedOrderingSchedulerCalled);
|
||||||
}
|
}
|
||||||
@@ -1773,7 +1773,7 @@ HWTEST_F(DirectSubmissionRelaxedOrderingTests, whenDispatchingWorkThenDispatchTa
|
|||||||
directSubmission.initialize(true, false);
|
directSubmission.initialize(true, false);
|
||||||
auto offset = directSubmission.ringCommandStream.getUsed() + directSubmission.getSizeStartSection() + RelaxedOrderingHelper::getSizeReturnPtrRegs<FamilyType>();
|
auto offset = directSubmission.ringCommandStream.getUsed() + directSubmission.getSizeStartSection() + RelaxedOrderingHelper::getSizeReturnPtrRegs<FamilyType>();
|
||||||
|
|
||||||
FlushStampTracker flushStamp(true);
|
batchBuffer.hasRelaxedOrderingDependencies = true;
|
||||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||||
|
|
||||||
auto taskStoreSection = ptrOffset(directSubmission.ringCommandStream.getCpuBase(), offset);
|
auto taskStoreSection = ptrOffset(directSubmission.ringCommandStream.getCpuBase(), offset);
|
||||||
@@ -1840,13 +1840,12 @@ HWTEST_F(DirectSubmissionRelaxedOrderingTests, givenNotEnoughSpaceForTaskStoreSe
|
|||||||
directSubmission.ringCommandStream.getUsed();
|
directSubmission.ringCommandStream.getUsed();
|
||||||
|
|
||||||
auto sizeToConsume = directSubmission.ringCommandStream.getAvailableSpace() -
|
auto sizeToConsume = directSubmission.ringCommandStream.getAvailableSpace() -
|
||||||
(directSubmission.getSizeDispatch() + directSubmission.getSizeEnd() + directSubmission.getSizeSwitchRingBufferSection());
|
(directSubmission.getSizeDispatch(false) + directSubmission.getSizeEnd(false) + directSubmission.getSizeSwitchRingBufferSection());
|
||||||
|
|
||||||
directSubmission.ringCommandStream.getSpace(sizeToConsume);
|
directSubmission.ringCommandStream.getSpace(sizeToConsume);
|
||||||
|
|
||||||
auto oldAllocation = directSubmission.ringCommandStream.getGraphicsAllocation();
|
auto oldAllocation = directSubmission.ringCommandStream.getGraphicsAllocation();
|
||||||
|
|
||||||
FlushStampTracker flushStamp(true);
|
|
||||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||||
|
|
||||||
EXPECT_NE(oldAllocation, directSubmission.ringCommandStream.getGraphicsAllocation());
|
EXPECT_NE(oldAllocation, directSubmission.ringCommandStream.getGraphicsAllocation());
|
||||||
@@ -1867,7 +1866,7 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, whenDispatchingWorkThenDispatchS
|
|||||||
|
|
||||||
EXPECT_FALSE(verifyDynamicSchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, 0, endOffset));
|
EXPECT_FALSE(verifyDynamicSchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, 0, endOffset));
|
||||||
|
|
||||||
FlushStampTracker flushStamp(true);
|
batchBuffer.hasRelaxedOrderingDependencies = true;
|
||||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||||
|
|
||||||
EXPECT_TRUE(verifyDynamicSchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, offset, endOffset));
|
EXPECT_TRUE(verifyDynamicSchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, offset, endOffset));
|
||||||
@@ -1896,8 +1895,8 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenBbWithStallingCmdsWhenDispa
|
|||||||
|
|
||||||
EXPECT_FALSE(verifyDynamicSchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, 0, endOffset));
|
EXPECT_FALSE(verifyDynamicSchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, 0, endOffset));
|
||||||
|
|
||||||
FlushStampTracker flushStamp(true);
|
|
||||||
batchBuffer.hasStallingCmds = false;
|
batchBuffer.hasStallingCmds = false;
|
||||||
|
batchBuffer.hasRelaxedOrderingDependencies = true;
|
||||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||||
|
|
||||||
EXPECT_TRUE(verifyDynamicSchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, offset, endOffset));
|
EXPECT_TRUE(verifyDynamicSchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, offset, endOffset));
|
||||||
@@ -1909,7 +1908,7 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenBbWithStallingCmdsWhenDispa
|
|||||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||||
|
|
||||||
auto startAddress = ptrOffset(directSubmission.ringCommandStream.getCpuBase(), offset);
|
auto startAddress = ptrOffset(directSubmission.ringCommandStream.getCpuBase(), offset);
|
||||||
auto jumpOffset = directSubmission.getSizeSemaphoreSection(false) + sizeof(typename FamilyType::MI_LOAD_REGISTER_IMM) +
|
auto jumpOffset = directSubmission.getSizeSemaphoreSection(true) + sizeof(typename FamilyType::MI_LOAD_REGISTER_IMM) +
|
||||||
EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart();
|
EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart();
|
||||||
uint64_t expectedJumpAddress = directSubmission.ringCommandStream.getGpuBase() + offset + jumpOffset;
|
uint64_t expectedJumpAddress = directSubmission.ringCommandStream.getGpuBase() + offset + jumpOffset;
|
||||||
|
|
||||||
@@ -1958,7 +1957,6 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenFirstBbWithStallingCmdsWhen
|
|||||||
|
|
||||||
EXPECT_FALSE(verifyDynamicSchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, 0, endOffset));
|
EXPECT_FALSE(verifyDynamicSchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, 0, endOffset));
|
||||||
|
|
||||||
FlushStampTracker flushStamp(true);
|
|
||||||
batchBuffer.hasStallingCmds = true;
|
batchBuffer.hasStallingCmds = true;
|
||||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||||
|
|
||||||
@@ -1999,8 +1997,8 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, whenStoppingRingThenProgramSched
|
|||||||
|
|
||||||
EXPECT_FALSE(verifyDynamicSchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, 0, endOffset));
|
EXPECT_FALSE(verifyDynamicSchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, 0, endOffset));
|
||||||
|
|
||||||
FlushStampTracker flushStamp(true);
|
|
||||||
batchBuffer.hasStallingCmds = false;
|
batchBuffer.hasStallingCmds = false;
|
||||||
|
batchBuffer.hasRelaxedOrderingDependencies = true;
|
||||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||||
|
|
||||||
EXPECT_TRUE(verifyDynamicSchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, offset, endOffset));
|
EXPECT_TRUE(verifyDynamicSchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, offset, endOffset));
|
||||||
@@ -2011,7 +2009,7 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, whenStoppingRingThenProgramSched
|
|||||||
directSubmission.stopRingBuffer();
|
directSubmission.stopRingBuffer();
|
||||||
|
|
||||||
auto startAddress = ptrOffset(directSubmission.ringCommandStream.getCpuBase(), offset);
|
auto startAddress = ptrOffset(directSubmission.ringCommandStream.getCpuBase(), offset);
|
||||||
auto jumpOffset = directSubmission.getSizeSemaphoreSection(false) + sizeof(typename FamilyType::MI_LOAD_REGISTER_IMM) +
|
auto jumpOffset = directSubmission.getSizeSemaphoreSection(true) + sizeof(typename FamilyType::MI_LOAD_REGISTER_IMM) +
|
||||||
EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart();
|
EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart();
|
||||||
uint64_t expectedJumpAddress = directSubmission.ringCommandStream.getGpuBase() + offset + jumpOffset;
|
uint64_t expectedJumpAddress = directSubmission.ringCommandStream.getGpuBase() + offset + jumpOffset;
|
||||||
|
|
||||||
@@ -2122,6 +2120,7 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, whenDispatchingWorkloadSectionTh
|
|||||||
|
|
||||||
auto originalBbStart = *reinterpret_cast<MI_BATCH_BUFFER_START *>(batchBuffer.endCmdPtr);
|
auto originalBbStart = *reinterpret_cast<MI_BATCH_BUFFER_START *>(batchBuffer.endCmdPtr);
|
||||||
|
|
||||||
|
batchBuffer.hasRelaxedOrderingDependencies = true;
|
||||||
directSubmission.dispatchWorkloadSection(batchBuffer);
|
directSubmission.dispatchWorkloadSection(batchBuffer);
|
||||||
|
|
||||||
uint64_t returnPtr = directSubmission.ringCommandStream.getGpuBase() + offset + (4 * sizeof(MI_LOAD_REGISTER_IMM)) + directSubmission.getSizeStartSection();
|
uint64_t returnPtr = directSubmission.ringCommandStream.getGpuBase() + offset + (4 * sizeof(MI_LOAD_REGISTER_IMM)) + directSubmission.getSizeStartSection();
|
||||||
@@ -2135,4 +2134,412 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, whenDispatchingWorkloadSectionTh
|
|||||||
EXPECT_TRUE(verifyLri<FamilyType>(++lriCmd, CS_GPR_R3 + 4, static_cast<uint32_t>(returnPtr2 >> 32)));
|
EXPECT_TRUE(verifyLri<FamilyType>(++lriCmd, CS_GPR_R3 + 4, static_cast<uint32_t>(returnPtr2 >> 32)));
|
||||||
|
|
||||||
EXPECT_EQ(0, memcmp(&originalBbStart, batchBuffer.endCmdPtr, sizeof(MI_BATCH_BUFFER_START)));
|
EXPECT_EQ(0, memcmp(&originalBbStart, batchBuffer.endCmdPtr, sizeof(MI_BATCH_BUFFER_START)));
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenBbWithStallingCmdsAndDependenciesWhenDispatchingNextCmdBufferThenProgramSchedulerIfNeeded, IsAtLeastXeHpcCore) {
|
||||||
|
{
|
||||||
|
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||||
|
directSubmission.initialize(true, false);
|
||||||
|
|
||||||
|
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||||
|
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||||
|
EXPECT_EQ(0u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||||
|
|
||||||
|
batchBuffer.hasStallingCmds = true;
|
||||||
|
batchBuffer.hasRelaxedOrderingDependencies = true;
|
||||||
|
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||||
|
EXPECT_EQ(1u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||||
|
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||||
|
EXPECT_EQ(1u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||||
|
|
||||||
|
batchBuffer.hasStallingCmds = false;
|
||||||
|
batchBuffer.hasRelaxedOrderingDependencies = true;
|
||||||
|
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||||
|
EXPECT_EQ(2u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||||
|
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||||
|
EXPECT_EQ(2u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||||
|
directSubmission.initialize(true, false);
|
||||||
|
|
||||||
|
batchBuffer.hasStallingCmds = true;
|
||||||
|
batchBuffer.hasRelaxedOrderingDependencies = true;
|
||||||
|
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||||
|
|
||||||
|
batchBuffer.hasStallingCmds = true;
|
||||||
|
batchBuffer.hasRelaxedOrderingDependencies = true;
|
||||||
|
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||||
|
EXPECT_EQ(3u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||||
|
EXPECT_EQ(1u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||||
|
EXPECT_EQ(2u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||||
|
directSubmission.initialize(true, false);
|
||||||
|
|
||||||
|
batchBuffer.hasStallingCmds = true;
|
||||||
|
batchBuffer.hasRelaxedOrderingDependencies = true;
|
||||||
|
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||||
|
|
||||||
|
batchBuffer.hasStallingCmds = true;
|
||||||
|
batchBuffer.hasRelaxedOrderingDependencies = false;
|
||||||
|
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||||
|
EXPECT_EQ(2u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||||
|
EXPECT_EQ(1u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||||
|
EXPECT_EQ(1u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||||
|
directSubmission.initialize(true, false);
|
||||||
|
|
||||||
|
batchBuffer.hasStallingCmds = true;
|
||||||
|
batchBuffer.hasRelaxedOrderingDependencies = true;
|
||||||
|
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||||
|
|
||||||
|
batchBuffer.hasStallingCmds = false;
|
||||||
|
batchBuffer.hasRelaxedOrderingDependencies = false;
|
||||||
|
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||||
|
EXPECT_EQ(2u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||||
|
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||||
|
EXPECT_EQ(1u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||||
|
directSubmission.initialize(true, false);
|
||||||
|
|
||||||
|
batchBuffer.hasStallingCmds = true;
|
||||||
|
batchBuffer.hasRelaxedOrderingDependencies = true;
|
||||||
|
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||||
|
|
||||||
|
directSubmission.stopRingBuffer();
|
||||||
|
EXPECT_EQ(2u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||||
|
EXPECT_EQ(1u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||||
|
EXPECT_EQ(1u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenBbWithNonStallingCmdsAndDependenciesWhenDispatchingNextCmdBufferThenProgramSchedulerIfNeeded, IsAtLeastXeHpcCore) {
|
||||||
|
{
|
||||||
|
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||||
|
directSubmission.initialize(true, false);
|
||||||
|
|
||||||
|
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||||
|
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||||
|
EXPECT_EQ(0u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||||
|
|
||||||
|
batchBuffer.hasStallingCmds = false;
|
||||||
|
batchBuffer.hasRelaxedOrderingDependencies = true;
|
||||||
|
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||||
|
EXPECT_EQ(1u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||||
|
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||||
|
EXPECT_EQ(1u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||||
|
|
||||||
|
batchBuffer.hasStallingCmds = true;
|
||||||
|
batchBuffer.hasRelaxedOrderingDependencies = true;
|
||||||
|
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||||
|
EXPECT_EQ(3u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||||
|
EXPECT_EQ(1u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||||
|
EXPECT_EQ(2u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||||
|
directSubmission.initialize(true, false);
|
||||||
|
|
||||||
|
batchBuffer.hasStallingCmds = false;
|
||||||
|
batchBuffer.hasRelaxedOrderingDependencies = true;
|
||||||
|
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||||
|
|
||||||
|
batchBuffer.hasStallingCmds = false;
|
||||||
|
batchBuffer.hasRelaxedOrderingDependencies = false;
|
||||||
|
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||||
|
EXPECT_EQ(2u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||||
|
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||||
|
EXPECT_EQ(1u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||||
|
directSubmission.initialize(true, false);
|
||||||
|
|
||||||
|
batchBuffer.hasStallingCmds = false;
|
||||||
|
batchBuffer.hasRelaxedOrderingDependencies = true;
|
||||||
|
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||||
|
|
||||||
|
batchBuffer.hasStallingCmds = true;
|
||||||
|
batchBuffer.hasRelaxedOrderingDependencies = false;
|
||||||
|
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||||
|
EXPECT_EQ(2u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||||
|
EXPECT_EQ(1u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||||
|
EXPECT_EQ(1u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||||
|
directSubmission.initialize(true, false);
|
||||||
|
|
||||||
|
batchBuffer.hasStallingCmds = false;
|
||||||
|
batchBuffer.hasRelaxedOrderingDependencies = true;
|
||||||
|
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||||
|
|
||||||
|
batchBuffer.hasStallingCmds = false;
|
||||||
|
batchBuffer.hasRelaxedOrderingDependencies = false;
|
||||||
|
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||||
|
EXPECT_EQ(2u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||||
|
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||||
|
EXPECT_EQ(1u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||||
|
directSubmission.initialize(true, false);
|
||||||
|
|
||||||
|
batchBuffer.hasStallingCmds = false;
|
||||||
|
batchBuffer.hasRelaxedOrderingDependencies = true;
|
||||||
|
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||||
|
|
||||||
|
directSubmission.stopRingBuffer();
|
||||||
|
EXPECT_EQ(2u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||||
|
EXPECT_EQ(1u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||||
|
EXPECT_EQ(1u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenBbWithStallingCmdsAndWithoutDependenciesWhenDispatchingNextCmdBufferThenProgramSchedulerIfNeeded, IsAtLeastXeHpcCore) {
|
||||||
|
{
|
||||||
|
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||||
|
directSubmission.initialize(true, false);
|
||||||
|
|
||||||
|
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||||
|
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||||
|
EXPECT_EQ(0u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||||
|
|
||||||
|
batchBuffer.hasStallingCmds = true;
|
||||||
|
batchBuffer.hasRelaxedOrderingDependencies = false;
|
||||||
|
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||||
|
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||||
|
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||||
|
EXPECT_EQ(0u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||||
|
|
||||||
|
batchBuffer.hasStallingCmds = true;
|
||||||
|
batchBuffer.hasRelaxedOrderingDependencies = true;
|
||||||
|
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||||
|
EXPECT_EQ(1u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||||
|
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||||
|
EXPECT_EQ(1u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||||
|
directSubmission.initialize(true, false);
|
||||||
|
|
||||||
|
batchBuffer.hasStallingCmds = true;
|
||||||
|
batchBuffer.hasRelaxedOrderingDependencies = false;
|
||||||
|
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||||
|
|
||||||
|
batchBuffer.hasStallingCmds = true;
|
||||||
|
batchBuffer.hasRelaxedOrderingDependencies = false;
|
||||||
|
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||||
|
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||||
|
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||||
|
EXPECT_EQ(0u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||||
|
directSubmission.initialize(true, false);
|
||||||
|
|
||||||
|
batchBuffer.hasStallingCmds = true;
|
||||||
|
batchBuffer.hasRelaxedOrderingDependencies = false;
|
||||||
|
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||||
|
|
||||||
|
batchBuffer.hasStallingCmds = false;
|
||||||
|
batchBuffer.hasRelaxedOrderingDependencies = true;
|
||||||
|
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||||
|
EXPECT_EQ(1u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||||
|
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||||
|
EXPECT_EQ(1u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||||
|
directSubmission.initialize(true, false);
|
||||||
|
|
||||||
|
batchBuffer.hasStallingCmds = true;
|
||||||
|
batchBuffer.hasRelaxedOrderingDependencies = false;
|
||||||
|
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||||
|
|
||||||
|
batchBuffer.hasStallingCmds = false;
|
||||||
|
batchBuffer.hasRelaxedOrderingDependencies = false;
|
||||||
|
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||||
|
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||||
|
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||||
|
EXPECT_EQ(0u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||||
|
directSubmission.initialize(true, false);
|
||||||
|
|
||||||
|
batchBuffer.hasStallingCmds = true;
|
||||||
|
batchBuffer.hasRelaxedOrderingDependencies = false;
|
||||||
|
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||||
|
|
||||||
|
directSubmission.stopRingBuffer();
|
||||||
|
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||||
|
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||||
|
EXPECT_EQ(0u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenBbWithNonStallingCmdsAndWithoutDependenciesWhenDispatchingNextCmdBufferThenProgramSchedulerIfNeeded, IsAtLeastXeHpcCore) {
|
||||||
|
{
|
||||||
|
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||||
|
directSubmission.initialize(true, false);
|
||||||
|
|
||||||
|
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||||
|
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||||
|
EXPECT_EQ(0u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||||
|
|
||||||
|
batchBuffer.hasStallingCmds = false;
|
||||||
|
batchBuffer.hasRelaxedOrderingDependencies = false;
|
||||||
|
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||||
|
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||||
|
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||||
|
EXPECT_EQ(0u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||||
|
|
||||||
|
batchBuffer.hasStallingCmds = true;
|
||||||
|
batchBuffer.hasRelaxedOrderingDependencies = true;
|
||||||
|
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||||
|
EXPECT_EQ(1u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||||
|
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||||
|
EXPECT_EQ(1u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||||
|
directSubmission.initialize(true, false);
|
||||||
|
|
||||||
|
batchBuffer.hasStallingCmds = false;
|
||||||
|
batchBuffer.hasRelaxedOrderingDependencies = false;
|
||||||
|
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||||
|
|
||||||
|
batchBuffer.hasStallingCmds = false;
|
||||||
|
batchBuffer.hasRelaxedOrderingDependencies = false;
|
||||||
|
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||||
|
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||||
|
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||||
|
EXPECT_EQ(0u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||||
|
directSubmission.initialize(true, false);
|
||||||
|
|
||||||
|
batchBuffer.hasStallingCmds = false;
|
||||||
|
batchBuffer.hasRelaxedOrderingDependencies = false;
|
||||||
|
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||||
|
|
||||||
|
batchBuffer.hasStallingCmds = false;
|
||||||
|
batchBuffer.hasRelaxedOrderingDependencies = true;
|
||||||
|
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||||
|
EXPECT_EQ(1u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||||
|
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||||
|
EXPECT_EQ(1u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||||
|
directSubmission.initialize(true, false);
|
||||||
|
|
||||||
|
batchBuffer.hasStallingCmds = false;
|
||||||
|
batchBuffer.hasRelaxedOrderingDependencies = false;
|
||||||
|
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||||
|
|
||||||
|
batchBuffer.hasStallingCmds = true;
|
||||||
|
batchBuffer.hasRelaxedOrderingDependencies = false;
|
||||||
|
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||||
|
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||||
|
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||||
|
EXPECT_EQ(0u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||||
|
directSubmission.initialize(true, false);
|
||||||
|
|
||||||
|
batchBuffer.hasStallingCmds = false;
|
||||||
|
batchBuffer.hasRelaxedOrderingDependencies = false;
|
||||||
|
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||||
|
|
||||||
|
directSubmission.stopRingBuffer();
|
||||||
|
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingSchedulerSectionCalled);
|
||||||
|
EXPECT_EQ(0u, directSubmission.dispatchRelaxedOrderingQueueStallCalled);
|
||||||
|
EXPECT_EQ(0u, directSubmission.dispatchTaskStoreSectionCalled);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenRelaxedOrderingSchedulerRequiredWhenAskingForCmdsSizeThenReturnCorrectValue, IsAtLeastXeHpcCore) {
|
||||||
|
using Dispatcher = RenderDispatcher<FamilyType>;
|
||||||
|
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||||
|
|
||||||
|
size_t expectedBaseSemaphoreSectionSize = directSubmission.getSizePrefetchMitigation();
|
||||||
|
if (directSubmission.isDisablePrefetcherRequired) {
|
||||||
|
expectedBaseSemaphoreSectionSize += 2 * directSubmission.getSizeDisablePrefetcher();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (directSubmission.miMemFenceRequired) {
|
||||||
|
expectedBaseSemaphoreSectionSize += MemorySynchronizationCommands<FamilyType>::getSizeForSingleAdditionalSynchronizationForDirectSubmission(pDevice->getHardwareInfo());
|
||||||
|
}
|
||||||
|
|
||||||
|
EXPECT_EQ(expectedBaseSemaphoreSectionSize + RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<FamilyType>::totalSize, directSubmission.getSizeSemaphoreSection(true));
|
||||||
|
EXPECT_EQ(expectedBaseSemaphoreSectionSize + EncodeSempahore<FamilyType>::getSizeMiSemaphoreWait(), directSubmission.getSizeSemaphoreSection(false));
|
||||||
|
|
||||||
|
size_t expectedBaseEndSize = Dispatcher::getSizeStopCommandBuffer() +
|
||||||
|
Dispatcher::getSizeCacheFlush(pDevice->getHardwareInfo()) +
|
||||||
|
(Dispatcher::getSizeStartCommandBuffer() - Dispatcher::getSizeStopCommandBuffer()) +
|
||||||
|
MemoryConstants::cacheLineSize;
|
||||||
|
if (directSubmission.disableMonitorFence) {
|
||||||
|
expectedBaseEndSize += Dispatcher::getSizeMonitorFence(pDevice->getHardwareInfo());
|
||||||
|
}
|
||||||
|
|
||||||
|
EXPECT_EQ(expectedBaseEndSize + directSubmission.getSizeDispatchRelaxedOrderingQueueStall(), directSubmission.getSizeEnd(true));
|
||||||
|
EXPECT_EQ(expectedBaseEndSize, directSubmission.getSizeEnd(false));
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenSchedulerRequiredWhenDispatchingReturnPtrsThenAddOffset, IsAtLeastXeHpcCore) {
|
||||||
|
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||||
|
|
||||||
|
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||||
|
directSubmission.initialize(true, false);
|
||||||
|
|
||||||
|
uint64_t returnPtr = 0x800100123000;
|
||||||
|
uint64_t returnPtr2 = returnPtr + RelaxedOrderingHelper::getSizeTaskStoreSection<FamilyType>();
|
||||||
|
|
||||||
|
size_t offset = directSubmission.ringCommandStream.getUsed();
|
||||||
|
|
||||||
|
directSubmission.dispatchRelaxedOrderingReturnPtrRegs(directSubmission.ringCommandStream, returnPtr, true);
|
||||||
|
|
||||||
|
auto lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(ptrOffset(directSubmission.ringCommandStream.getCpuBase(), offset));
|
||||||
|
EXPECT_TRUE(verifyLri<FamilyType>(lriCmd, CS_GPR_R4, static_cast<uint32_t>(returnPtr & 0xFFFF'FFFFULL)));
|
||||||
|
EXPECT_TRUE(verifyLri<FamilyType>(++lriCmd, CS_GPR_R4 + 4, static_cast<uint32_t>(returnPtr >> 32)));
|
||||||
|
|
||||||
|
EXPECT_TRUE(verifyLri<FamilyType>(++lriCmd, CS_GPR_R3, static_cast<uint32_t>(returnPtr2 & 0xFFFF'FFFFULL)));
|
||||||
|
EXPECT_TRUE(verifyLri<FamilyType>(++lriCmd, CS_GPR_R3 + 4, static_cast<uint32_t>(returnPtr2 >> 32)));
|
||||||
|
|
||||||
|
offset = directSubmission.ringCommandStream.getUsed();
|
||||||
|
|
||||||
|
directSubmission.dispatchRelaxedOrderingReturnPtrRegs(directSubmission.ringCommandStream, returnPtr, false);
|
||||||
|
|
||||||
|
lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(ptrOffset(directSubmission.ringCommandStream.getCpuBase(), offset));
|
||||||
|
EXPECT_TRUE(verifyLri<FamilyType>(lriCmd, CS_GPR_R4, static_cast<uint32_t>(returnPtr & 0xFFFF'FFFFULL)));
|
||||||
|
EXPECT_TRUE(verifyLri<FamilyType>(++lriCmd, CS_GPR_R4 + 4, static_cast<uint32_t>(returnPtr >> 32)));
|
||||||
|
|
||||||
|
EXPECT_TRUE(verifyLri<FamilyType>(++lriCmd, CS_GPR_R3, static_cast<uint32_t>(returnPtr & 0xFFFF'FFFFULL)));
|
||||||
|
EXPECT_TRUE(verifyLri<FamilyType>(++lriCmd, CS_GPR_R3 + 4, static_cast<uint32_t>(returnPtr >> 32)));
|
||||||
}
|
}
|
||||||
@@ -288,7 +288,8 @@ TEST_F(WddmCommandStreamTest, GivenOffsetWhenFlushingThenFlushIsSubmittedCorrect
|
|||||||
ASSERT_NE(nullptr, commandBuffer);
|
ASSERT_NE(nullptr, commandBuffer);
|
||||||
LinearStream cs(commandBuffer);
|
LinearStream cs(commandBuffer);
|
||||||
|
|
||||||
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), offset, 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, nullptr, false, false};
|
BatchBuffer batchBuffer = BatchBufferHelper::createDefaultBatchBuffer(cs.getGraphicsAllocation(), &cs, cs.getUsed());
|
||||||
|
batchBuffer.startOffset = offset;
|
||||||
csr->flush(batchBuffer, csr->getResidencyAllocations());
|
csr->flush(batchBuffer, csr->getResidencyAllocations());
|
||||||
EXPECT_EQ(1u, wddm->submitResult.called);
|
EXPECT_EQ(1u, wddm->submitResult.called);
|
||||||
EXPECT_TRUE(wddm->submitResult.success);
|
EXPECT_TRUE(wddm->submitResult.success);
|
||||||
@@ -1165,7 +1166,7 @@ HWTEST_TEMPLATED_F(WddmCommandStreamMockGdiTest, givenDirectSubmissionEnabledOnR
|
|||||||
size_t actualDispatchSize = directSubmission->ringCommandStream.getUsed();
|
size_t actualDispatchSize = directSubmission->ringCommandStream.getUsed();
|
||||||
size_t expectedSize = directSubmission->getSizeSemaphoreSection(false) +
|
size_t expectedSize = directSubmission->getSizeSemaphoreSection(false) +
|
||||||
Dispatcher::getSizePreemption() +
|
Dispatcher::getSizePreemption() +
|
||||||
directSubmission->getSizeDispatch();
|
directSubmission->getSizeDispatch(false);
|
||||||
|
|
||||||
if (directSubmission->miMemFenceRequired) {
|
if (directSubmission->miMemFenceRequired) {
|
||||||
expectedSize += directSubmission->getSizeSystemMemoryFenceAddress();
|
expectedSize += directSubmission->getSizeSystemMemoryFenceAddress();
|
||||||
@@ -1206,7 +1207,7 @@ HWTEST_TEMPLATED_F(WddmCommandStreamMockGdiTest, givenDirectSubmissionEnabledOnB
|
|||||||
size_t actualDispatchSize = directSubmission->ringCommandStream.getUsed();
|
size_t actualDispatchSize = directSubmission->ringCommandStream.getUsed();
|
||||||
size_t expectedSize = directSubmission->getSizeSemaphoreSection(false) +
|
size_t expectedSize = directSubmission->getSizeSemaphoreSection(false) +
|
||||||
Dispatcher::getSizePreemption() +
|
Dispatcher::getSizePreemption() +
|
||||||
directSubmission->getSizeDispatch();
|
directSubmission->getSizeDispatch(false);
|
||||||
|
|
||||||
if (directSubmission->miMemFenceRequired) {
|
if (directSubmission->miMemFenceRequired) {
|
||||||
expectedSize += directSubmission->getSizeSystemMemoryFenceAddress();
|
expectedSize += directSubmission->getSizeSystemMemoryFenceAddress();
|
||||||
|
|||||||
Reference in New Issue
Block a user