mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
RelaxedOrdering: Preallocate scheduler to optimize dispatch time
Related-To: NEO-7458 Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
5e2efc4013
commit
a7d4162ca2
@ -131,7 +131,7 @@ class DirectSubmissionHw {
|
||||
size_t getSizeDispatchRelaxedOrderingQueueStall();
|
||||
|
||||
void dispatchTaskStoreSection(uint64_t taskStartSectionVa);
|
||||
MOCKABLE_VIRTUAL void preinitializeTaskStoreSection();
|
||||
MOCKABLE_VIRTUAL void preinitializeRelaxedOrderingSections();
|
||||
|
||||
void initRelaxedOrderingRegisters();
|
||||
|
||||
@ -146,6 +146,8 @@ class DirectSubmissionHw {
|
||||
void dispatchDisablePrefetcher(bool disable);
|
||||
size_t getSizeDisablePrefetcher();
|
||||
|
||||
MOCKABLE_VIRTUAL void dispatchStaticRelaxedOrderingScheduler();
|
||||
|
||||
size_t getSizeEnd();
|
||||
|
||||
void dispatchPartitionRegisterConfiguration();
|
||||
@ -174,6 +176,7 @@ class DirectSubmissionHw {
|
||||
};
|
||||
std::vector<RingBufferUse> ringBuffers;
|
||||
std::unique_ptr<uint8_t[]> preinitializedTaskStoreSection;
|
||||
std::unique_ptr<uint8_t[]> preinitializedRelaxedOrderingScheduler;
|
||||
uint32_t currentRingBuffer = 0u;
|
||||
uint32_t previousRingBuffer = 0u;
|
||||
uint32_t maxRingBufferCount = std::numeric_limits<uint32_t>::max();
|
||||
@ -196,6 +199,7 @@ class DirectSubmissionHw {
|
||||
GraphicsAllocation *semaphores = nullptr;
|
||||
GraphicsAllocation *workPartitionAllocation = nullptr;
|
||||
GraphicsAllocation *deferredTasksListAllocation = nullptr;
|
||||
GraphicsAllocation *relaxedOrderingSchedulerAllocation = nullptr;
|
||||
void *semaphorePtr = nullptr;
|
||||
volatile RingSemaphoreData *semaphoreData = nullptr;
|
||||
volatile void *workloadModeOneStoreAddress = nullptr;
|
||||
|
@ -81,38 +81,40 @@ DirectSubmissionHw<GfxFamily, Dispatcher>::DirectSubmissionHw(const DirectSubmis
|
||||
}
|
||||
|
||||
template <typename GfxFamily, typename Dispatcher>
|
||||
void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchRelaxedOrderingSchedulerSection(uint32_t value) {
|
||||
uint64_t schedulerStartAddress = ringCommandStream.getCurrentGpuAddressPosition();
|
||||
void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchStaticRelaxedOrderingScheduler() {
|
||||
LinearStream schedulerCmdStream(this->relaxedOrderingSchedulerAllocation);
|
||||
uint64_t schedulerStartAddress = schedulerCmdStream.getGpuBase();
|
||||
uint64_t deferredTasksListGpuVa = deferredTasksListAllocation->getGpuAddress();
|
||||
|
||||
// 1. Init section
|
||||
{
|
||||
EncodeMiPredicate<GfxFamily>::encode(ringCommandStream, MiPredicateType::Disable);
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(ringCommandStream,
|
||||
schedulerStartAddress + RelaxedOrderingHelper::SchedulerSizeAndOffsetSection<GfxFamily>::semaphoreSectionStart,
|
||||
CS_GPR_R1, 0, CompareOperation::Equal, false);
|
||||
EncodeMiPredicate<GfxFamily>::encode(schedulerCmdStream, MiPredicateType::Disable);
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(
|
||||
schedulerCmdStream,
|
||||
schedulerStartAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<GfxFamily>::semaphoreSectionJumpStart,
|
||||
CS_GPR_R1, 0, CompareOperation::Equal, false);
|
||||
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R2, 0, true);
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R2 + 4, 0, true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R2, 0, true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R2 + 4, 0, true);
|
||||
|
||||
uint64_t removeTaskVa = schedulerStartAddress + RelaxedOrderingHelper::SchedulerSizeAndOffsetSection<GfxFamily>::removeTaskSectionStart;
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R3, static_cast<uint32_t>(removeTaskVa & 0xFFFF'FFFFULL), true);
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R3 + 4, static_cast<uint32_t>(removeTaskVa >> 32), true);
|
||||
uint64_t removeTaskVa = schedulerStartAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<GfxFamily>::removeTaskSectionStart;
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R3, static_cast<uint32_t>(removeTaskVa & 0xFFFF'FFFFULL), true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R3 + 4, static_cast<uint32_t>(removeTaskVa >> 32), true);
|
||||
|
||||
uint64_t walkersLoopConditionCheckVa = schedulerStartAddress + RelaxedOrderingHelper::SchedulerSizeAndOffsetSection<GfxFamily>::tasksListLoopCheckSectionStart;
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R4, static_cast<uint32_t>(walkersLoopConditionCheckVa & 0xFFFF'FFFFULL), true);
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R4 + 4, static_cast<uint32_t>(walkersLoopConditionCheckVa >> 32), true);
|
||||
uint64_t walkersLoopConditionCheckVa = schedulerStartAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<GfxFamily>::tasksListLoopCheckSectionStart;
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R4, static_cast<uint32_t>(walkersLoopConditionCheckVa & 0xFFFF'FFFFULL), true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R4 + 4, static_cast<uint32_t>(walkersLoopConditionCheckVa >> 32), true);
|
||||
}
|
||||
|
||||
// 2. Dispatch task section (loop start)
|
||||
{
|
||||
EncodeMiPredicate<GfxFamily>::encode(ringCommandStream, MiPredicateType::Disable);
|
||||
EncodeMiPredicate<GfxFamily>::encode(schedulerCmdStream, MiPredicateType::Disable);
|
||||
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R6, 8, true);
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R6 + 4, 0, true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R6, 8, true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R6 + 4, 0, true);
|
||||
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R8, static_cast<uint32_t>(deferredTasksListGpuVa & 0xFFFF'FFFFULL), true);
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R8 + 4, static_cast<uint32_t>(deferredTasksListGpuVa >> 32), true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R8, static_cast<uint32_t>(deferredTasksListGpuVa & 0xFFFF'FFFFULL), true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R8 + 4, static_cast<uint32_t>(deferredTasksListGpuVa >> 32), true);
|
||||
|
||||
EncodeAluHelper<GfxFamily, 10> aluHelper;
|
||||
aluHelper.setNextAlu(AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_2);
|
||||
@ -126,27 +128,28 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchRelaxedOrderingScheduler
|
||||
aluHelper.setNextAlu(AluRegisters::OPCODE_LOADIND, AluRegisters::R_0, AluRegisters::R_ACCU);
|
||||
aluHelper.setNextAlu(AluRegisters::OPCODE_FENCE_RD);
|
||||
|
||||
aluHelper.copyToCmdStream(ringCommandStream);
|
||||
aluHelper.copyToCmdStream(schedulerCmdStream);
|
||||
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferStart(&ringCommandStream, 0, false, true, false);
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferStart(&schedulerCmdStream, 0, false, true, false);
|
||||
}
|
||||
|
||||
// 3. Remove task section
|
||||
{
|
||||
EncodeMiPredicate<GfxFamily>::encode(ringCommandStream, MiPredicateType::Disable);
|
||||
EncodeMiPredicate<GfxFamily>::encode(schedulerCmdStream, MiPredicateType::Disable);
|
||||
|
||||
EncodeMathMMIO<GfxFamily>::encodeDecrement(ringCommandStream, AluRegisters::R_1);
|
||||
EncodeMathMMIO<GfxFamily>::encodeDecrement(ringCommandStream, AluRegisters::R_2);
|
||||
EncodeMathMMIO<GfxFamily>::encodeDecrement(schedulerCmdStream, AluRegisters::R_1);
|
||||
EncodeMathMMIO<GfxFamily>::encodeDecrement(schedulerCmdStream, AluRegisters::R_2);
|
||||
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(ringCommandStream,
|
||||
schedulerStartAddress + RelaxedOrderingHelper::SchedulerSizeAndOffsetSection<GfxFamily>::semaphoreSectionStart,
|
||||
CS_GPR_R1, 0, CompareOperation::Equal, false);
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(
|
||||
schedulerCmdStream,
|
||||
schedulerStartAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<GfxFamily>::semaphoreSectionJumpStart,
|
||||
CS_GPR_R1, 0, CompareOperation::Equal, false);
|
||||
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R7, 8, true);
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R7 + 4, 0, true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R7, 8, true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R7 + 4, 0, true);
|
||||
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R8, static_cast<uint32_t>(deferredTasksListGpuVa & 0xFFFF'FFFFULL), true);
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R8 + 4, static_cast<uint32_t>(deferredTasksListGpuVa >> 32), true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R8, static_cast<uint32_t>(deferredTasksListGpuVa & 0xFFFF'FFFFULL), true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R8 + 4, static_cast<uint32_t>(deferredTasksListGpuVa >> 32), true);
|
||||
|
||||
EncodeAluHelper<GfxFamily, 14> aluHelper;
|
||||
aluHelper.setNextAlu(AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_1);
|
||||
@ -164,38 +167,79 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchRelaxedOrderingScheduler
|
||||
aluHelper.setNextAlu(AluRegisters::OPCODE_STOREIND, AluRegisters::R_ACCU, AluRegisters::R_7);
|
||||
aluHelper.setNextAlu(AluRegisters::OPCODE_FENCE_WR);
|
||||
|
||||
aluHelper.copyToCmdStream(ringCommandStream);
|
||||
aluHelper.copyToCmdStream(schedulerCmdStream);
|
||||
}
|
||||
|
||||
// 4. List loop check section
|
||||
{
|
||||
EncodeMiPredicate<GfxFamily>::encode(ringCommandStream, MiPredicateType::Disable);
|
||||
EncodeMiPredicate<GfxFamily>::encode(schedulerCmdStream, MiPredicateType::Disable);
|
||||
|
||||
EncodeMathMMIO<GfxFamily>::encodeIncrement(ringCommandStream, AluRegisters::R_2);
|
||||
EncodeMathMMIO<GfxFamily>::encodeIncrement(schedulerCmdStream, AluRegisters::R_2);
|
||||
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalRegRegBatchBufferStart(ringCommandStream,
|
||||
schedulerStartAddress + RelaxedOrderingHelper::SchedulerSizeAndOffsetSection<GfxFamily>::loopStartSectionStart,
|
||||
AluRegisters::R_1, AluRegisters::R_2, CompareOperation::NotEqual, false);
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalRegRegBatchBufferStart(
|
||||
schedulerCmdStream,
|
||||
schedulerStartAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<GfxFamily>::loopStartSectionStart,
|
||||
AluRegisters::R_1, AluRegisters::R_2, CompareOperation::NotEqual, false);
|
||||
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R2, 0, true);
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R2 + 4, 0, true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R2, 0, true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R2 + 4, 0, true);
|
||||
}
|
||||
|
||||
// 5. Drain request section
|
||||
{
|
||||
*ringCommandStream.getSpaceForCmd<typename GfxFamily::MI_ARB_CHECK>() = GfxFamily::cmdInitArbCheck;
|
||||
*schedulerCmdStream.getSpaceForCmd<typename GfxFamily::MI_ARB_CHECK>() = GfxFamily::cmdInitArbCheck;
|
||||
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(ringCommandStream,
|
||||
schedulerStartAddress + RelaxedOrderingHelper::SchedulerSizeAndOffsetSection<GfxFamily>::loopStartSectionStart,
|
||||
CS_GPR_R5, 1, CompareOperation::Equal, false);
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(
|
||||
schedulerCmdStream,
|
||||
schedulerStartAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<GfxFamily>::loopStartSectionStart,
|
||||
CS_GPR_R5, 1, CompareOperation::Equal, false);
|
||||
}
|
||||
|
||||
// 6. Scheduler loop check section
|
||||
{
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(ringCommandStream, schedulerStartAddress + RelaxedOrderingHelper::SchedulerSizeAndOffsetSection<GfxFamily>::endSectionStart,
|
||||
semaphoreGpuVa, value, CompareOperation::GreaterOrEqual, false);
|
||||
// Exit Static scheduler
|
||||
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferStart(&ringCommandStream, schedulerStartAddress + RelaxedOrderingHelper::SchedulerSizeAndOffsetSection<GfxFamily>::loopStartSectionStart, false, false, false);
|
||||
// 6. Jump to scheduler loop check section (dynamic scheduler)
|
||||
EncodeSetMMIO<GfxFamily>::encodeREG(schedulerCmdStream, CS_GPR_R0, CS_GPR_R9);
|
||||
EncodeSetMMIO<GfxFamily>::encodeREG(schedulerCmdStream, CS_GPR_R0 + 4, CS_GPR_R9 + 4);
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferStart(&schedulerCmdStream, 0, false, true, false);
|
||||
|
||||
// 7. Jump to Semaphore section (dynamic scheduler)
|
||||
EncodeMiPredicate<GfxFamily>::encode(schedulerCmdStream, MiPredicateType::Disable);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R10, static_cast<uint32_t>(RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::schedulerLoopCheckSectionSize), true);
|
||||
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R10 + 4, 0, true);
|
||||
|
||||
EncodeAluHelper<GfxFamily, 4> aluHelper;
|
||||
aluHelper.setNextAlu(AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_9);
|
||||
aluHelper.setNextAlu(AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCB, AluRegisters::R_10);
|
||||
aluHelper.setNextAlu(AluRegisters::OPCODE_ADD);
|
||||
aluHelper.setNextAlu(AluRegisters::OPCODE_STORE, AluRegisters::R_0, AluRegisters::R_ACCU);
|
||||
aluHelper.copyToCmdStream(schedulerCmdStream);
|
||||
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferStart(&schedulerCmdStream, 0, false, true, false);
|
||||
}
|
||||
|
||||
template <typename GfxFamily, typename Dispatcher>
|
||||
void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchRelaxedOrderingSchedulerSection(uint32_t value) {
|
||||
LinearStream schedulerCmdStream(this->preinitializedRelaxedOrderingScheduler.get(), RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::totalSize);
|
||||
|
||||
// 1. Init section
|
||||
|
||||
uint64_t schedulerStartVa = ringCommandStream.getCurrentGpuAddressPosition();
|
||||
|
||||
uint64_t schedulerLoopCheckVa = schedulerStartVa + RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::schedulerLoopCheckSectionStart;
|
||||
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R9, static_cast<uint32_t>(schedulerLoopCheckVa & 0xFFFF'FFFFULL), true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R9 + 4, static_cast<uint32_t>(schedulerLoopCheckVa >> 32), true);
|
||||
|
||||
schedulerCmdStream.getSpace(sizeof(typename GfxFamily::MI_BATCH_BUFFER_START)); // skip patching
|
||||
|
||||
// 2. Scheduler loop check section
|
||||
{
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(
|
||||
schedulerCmdStream, schedulerStartVa + RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::endSectionStart,
|
||||
semaphoreGpuVa, value, CompareOperation::GreaterOrEqual, false);
|
||||
|
||||
schedulerCmdStream.getSpace(sizeof(typename GfxFamily::MI_BATCH_BUFFER_START)); // skip patching
|
||||
}
|
||||
|
||||
// 7. Semaphore section
|
||||
@ -203,20 +247,17 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchRelaxedOrderingScheduler
|
||||
using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT;
|
||||
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
|
||||
|
||||
EncodeMiPredicate<GfxFamily>::encode(ringCommandStream, MiPredicateType::Disable);
|
||||
schedulerCmdStream.getSpace(EncodeMiPredicate<GfxFamily>::getCmdSize()); // skip patching
|
||||
|
||||
EncodeSempahore<GfxFamily>::addMiSemaphoreWaitCommand(ringCommandStream,
|
||||
semaphoreGpuVa,
|
||||
value,
|
||||
EncodeSempahore<GfxFamily>::addMiSemaphoreWaitCommand(schedulerCmdStream, semaphoreGpuVa, value,
|
||||
COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD);
|
||||
}
|
||||
|
||||
// 8. End section
|
||||
{
|
||||
EncodeMiPredicate<GfxFamily>::encode(ringCommandStream, MiPredicateType::Disable);
|
||||
// skip patching End section
|
||||
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R5, 0, true);
|
||||
}
|
||||
auto dst = ringCommandStream.getSpace(RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::totalSize);
|
||||
memcpy_s(dst, RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::totalSize,
|
||||
this->preinitializedRelaxedOrderingScheduler.get(), RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::totalSize);
|
||||
}
|
||||
|
||||
template <typename GfxFamily, typename Dispatcher>
|
||||
@ -269,6 +310,16 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::allocateResources() {
|
||||
UNRECOVERABLE_IF(deferredTasksListAllocation == nullptr);
|
||||
|
||||
allocations.push_back(deferredTasksListAllocation);
|
||||
|
||||
const AllocationProperties relaxedOrderingSchedulerAllocationProperties(rootDeviceIndex,
|
||||
true, MemoryConstants::pageSize64k,
|
||||
AllocationType::COMMAND_BUFFER,
|
||||
isMultiOsContextCapable, false, osContext.getDeviceBitfield());
|
||||
|
||||
relaxedOrderingSchedulerAllocation = memoryManager->allocateGraphicsMemoryWithProperties(relaxedOrderingSchedulerAllocationProperties);
|
||||
UNRECOVERABLE_IF(relaxedOrderingSchedulerAllocation == nullptr);
|
||||
|
||||
allocations.push_back(relaxedOrderingSchedulerAllocation);
|
||||
}
|
||||
|
||||
if (DebugManager.flags.DirectSubmissionPrintBuffers.get()) {
|
||||
@ -368,9 +419,10 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::initialize(bool submitOnInit, bo
|
||||
this->systemMemoryFenceAddressSet = true;
|
||||
}
|
||||
if (this->relaxedOrderingEnabled) {
|
||||
preinitializeTaskStoreSection();
|
||||
preinitializeRelaxedOrderingSections();
|
||||
|
||||
initRelaxedOrderingRegisters();
|
||||
dispatchStaticRelaxedOrderingScheduler();
|
||||
startBufferSize += RelaxedOrderingHelper::getSizeRegistersInit<GfxFamily>();
|
||||
|
||||
this->relaxedOrderingInitialized = true;
|
||||
@ -422,7 +474,8 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::startRingBuffer() {
|
||||
}
|
||||
|
||||
if (this->relaxedOrderingEnabled && !this->relaxedOrderingInitialized) {
|
||||
preinitializeTaskStoreSection();
|
||||
preinitializeRelaxedOrderingSections();
|
||||
dispatchStaticRelaxedOrderingScheduler();
|
||||
initRelaxedOrderingRegisters();
|
||||
|
||||
this->relaxedOrderingInitialized = true;
|
||||
@ -498,7 +551,7 @@ inline void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchSemaphoreSection(
|
||||
|
||||
template <typename GfxFamily, typename Dispatcher>
|
||||
inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeSemaphoreSection(bool firstSubmission) {
|
||||
size_t semaphoreSize = (this->relaxedOrderingEnabled && !firstSubmission) ? RelaxedOrderingHelper::SchedulerSizeAndOffsetSection<GfxFamily>::totalSize
|
||||
size_t semaphoreSize = (this->relaxedOrderingEnabled && !firstSubmission) ? RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::totalSize
|
||||
: EncodeSempahore<GfxFamily>::getSizeMiSemaphoreWait();
|
||||
semaphoreSize += getSizePrefetchMitigation();
|
||||
|
||||
@ -688,7 +741,8 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::initRelaxedOrderingRegisters() {
|
||||
}
|
||||
|
||||
template <typename GfxFamily, typename Dispatcher>
|
||||
void DirectSubmissionHw<GfxFamily, Dispatcher>::preinitializeTaskStoreSection() {
|
||||
void DirectSubmissionHw<GfxFamily, Dispatcher>::preinitializeRelaxedOrderingSections() {
|
||||
// Task store section
|
||||
preinitializedTaskStoreSection = std::make_unique<uint8_t[]>(RelaxedOrderingHelper::getSizeTaskStoreSection<GfxFamily>());
|
||||
|
||||
LinearStream stream(preinitializedTaskStoreSection.get(), RelaxedOrderingHelper::getSizeTaskStoreSection<GfxFamily>());
|
||||
@ -723,6 +777,46 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::preinitializeTaskStoreSection()
|
||||
EncodeMathMMIO<GfxFamily>::encodeIncrement(stream, AluRegisters::R_1);
|
||||
|
||||
UNRECOVERABLE_IF(stream.getUsed() != RelaxedOrderingHelper::getSizeTaskStoreSection<GfxFamily>());
|
||||
|
||||
// Scheduler section
|
||||
preinitializedRelaxedOrderingScheduler = std::make_unique<uint8_t[]>(RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::totalSize);
|
||||
LinearStream schedulerStream(preinitializedRelaxedOrderingScheduler.get(), RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::totalSize);
|
||||
|
||||
uint64_t schedulerStartAddress = relaxedOrderingSchedulerAllocation->getGpuAddress();
|
||||
|
||||
// 1. Init section
|
||||
LriHelper<GfxFamily>::program(&schedulerStream, CS_GPR_R9, 0, true);
|
||||
LriHelper<GfxFamily>::program(&schedulerStream, CS_GPR_R9 + 4, 0, true);
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferStart(&schedulerStream, schedulerStartAddress, false, false, false);
|
||||
|
||||
// 2. Scheduler loop check section
|
||||
{
|
||||
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(schedulerStream, 0, 0, 0, CompareOperation::GreaterOrEqual, false);
|
||||
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferStart(&schedulerStream,
|
||||
schedulerStartAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<GfxFamily>::loopStartSectionStart,
|
||||
false, false, false);
|
||||
}
|
||||
|
||||
// 3. Semaphore section
|
||||
{
|
||||
using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT;
|
||||
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
|
||||
|
||||
EncodeMiPredicate<GfxFamily>::encode(schedulerStream, MiPredicateType::Disable);
|
||||
|
||||
EncodeSempahore<GfxFamily>::addMiSemaphoreWaitCommand(schedulerStream, 0, 0, COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD);
|
||||
}
|
||||
|
||||
// 4. End section
|
||||
{
|
||||
EncodeMiPredicate<GfxFamily>::encode(schedulerStream, MiPredicateType::Disable);
|
||||
|
||||
LriHelper<GfxFamily>::program(&schedulerStream, CS_GPR_R5, 0, true);
|
||||
}
|
||||
|
||||
UNRECOVERABLE_IF(schedulerStream.getUsed() != RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::totalSize);
|
||||
}
|
||||
|
||||
template <typename GfxFamily, typename Dispatcher>
|
||||
@ -889,6 +983,7 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::deallocateResources() {
|
||||
}
|
||||
|
||||
memoryManager->freeGraphicsMemory(deferredTasksListAllocation);
|
||||
memoryManager->freeGraphicsMemory(relaxedOrderingSchedulerAllocation);
|
||||
}
|
||||
|
||||
template <typename GfxFamily, typename Dispatcher>
|
||||
|
@ -32,10 +32,9 @@ constexpr size_t getSizeReturnPtrRegs() {
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
struct SchedulerSizeAndOffsetSection {
|
||||
using MI_MATH = typename GfxFamily::MI_MATH;
|
||||
using MI_MATH_ALU_INST_INLINE = typename GfxFamily::MI_MATH_ALU_INST_INLINE;
|
||||
struct StaticSchedulerSizeAndOffsetSection {
|
||||
using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM;
|
||||
using MI_LOAD_REGISTER_REG = typename GfxFamily::MI_LOAD_REGISTER_REG;
|
||||
using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START;
|
||||
|
||||
static constexpr uint64_t initSectionSize = EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataRegBatchBufferStart() + (6 * sizeof(MI_LOAD_REGISTER_IMM)) +
|
||||
@ -56,7 +55,24 @@ struct SchedulerSizeAndOffsetSection {
|
||||
static constexpr uint64_t drainRequestSectionStart = tasksListLoopCheckSectionStart + tasksListLoopCheckSectionSize;
|
||||
static constexpr uint64_t drainRequestSectionSize = sizeof(typename GfxFamily::MI_ARB_CHECK) + EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataRegBatchBufferStart();
|
||||
|
||||
static constexpr uint64_t schedulerLoopCheckSectionStart = drainRequestSectionStart + drainRequestSectionSize;
|
||||
static constexpr uint64_t schedulerLoopCheckSectionJumpStart = drainRequestSectionStart + drainRequestSectionSize;
|
||||
static constexpr uint64_t schedulerLoopCheckSectionJumpSize = 2 * sizeof(MI_LOAD_REGISTER_REG) + sizeof(MI_BATCH_BUFFER_START);
|
||||
|
||||
static constexpr uint64_t semaphoreSectionJumpStart = schedulerLoopCheckSectionJumpStart + schedulerLoopCheckSectionJumpSize;
|
||||
static constexpr uint64_t semaphoreSectionJumpSize = EncodeMiPredicate<GfxFamily>::getCmdSize() + (2 * sizeof(MI_LOAD_REGISTER_IMM)) + EncodeAluHelper<GfxFamily, 4>::getCmdsSize() +
|
||||
sizeof(MI_BATCH_BUFFER_START);
|
||||
|
||||
static constexpr uint64_t totalSize = semaphoreSectionJumpStart + semaphoreSectionJumpSize;
|
||||
};
|
||||
|
||||
template <typename GfxFamily>
|
||||
struct DynamicSchedulerSizeAndOffsetSection {
|
||||
using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM;
|
||||
using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START;
|
||||
|
||||
static constexpr uint64_t initSectionSize = (2 * sizeof(MI_LOAD_REGISTER_IMM)) + sizeof(MI_BATCH_BUFFER_START);
|
||||
|
||||
static constexpr uint64_t schedulerLoopCheckSectionStart = initSectionSize;
|
||||
static constexpr uint64_t schedulerLoopCheckSectionSize = EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataMemBatchBufferStart() + sizeof(MI_BATCH_BUFFER_START);
|
||||
|
||||
static constexpr uint64_t semaphoreSectionStart = schedulerLoopCheckSectionStart + schedulerLoopCheckSectionSize;
|
||||
|
@ -53,8 +53,10 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw<GfxFamily, Dispatcher>
|
||||
using BaseClass::partitionedMode;
|
||||
using BaseClass::performDiagnosticMode;
|
||||
using BaseClass::postSyncOffset;
|
||||
using BaseClass::preinitializedRelaxedOrderingScheduler;
|
||||
using BaseClass::preinitializedTaskStoreSection;
|
||||
using BaseClass::relaxedOrderingInitialized;
|
||||
using BaseClass::relaxedOrderingSchedulerAllocation;
|
||||
using BaseClass::reserved;
|
||||
using BaseClass::ringBuffers;
|
||||
using BaseClass::ringCommandStream;
|
||||
@ -86,9 +88,14 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw<GfxFamily, Dispatcher>
|
||||
return allocateOsResourcesReturn;
|
||||
}
|
||||
|
||||
void preinitializeTaskStoreSection() override {
|
||||
preinitializeTaskStoreSectionCalled++;
|
||||
BaseClass::preinitializeTaskStoreSection();
|
||||
void preinitializeRelaxedOrderingSections() override {
|
||||
preinitializeRelaxedOrderingSectionsCalled++;
|
||||
BaseClass::preinitializeRelaxedOrderingSections();
|
||||
}
|
||||
|
||||
void dispatchStaticRelaxedOrderingScheduler() override {
|
||||
dispatchStaticRelaxedOrderingSchedulerCalled++;
|
||||
BaseClass::dispatchStaticRelaxedOrderingScheduler();
|
||||
}
|
||||
|
||||
bool makeResourcesResident(DirectSubmissionAllocations &allocations) override {
|
||||
@ -146,7 +153,8 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw<GfxFamily, Dispatcher>
|
||||
uint32_t submitCount = 0u;
|
||||
uint32_t handleResidencyCount = 0u;
|
||||
uint32_t disabledDiagnosticCalled = 0u;
|
||||
uint32_t preinitializeTaskStoreSectionCalled = 0;
|
||||
uint32_t preinitializeRelaxedOrderingSectionsCalled = 0;
|
||||
uint32_t dispatchStaticRelaxedOrderingSchedulerCalled = 0;
|
||||
uint32_t makeResourcesResidentVectorSize = 0u;
|
||||
bool allocateOsResourcesReturn = true;
|
||||
bool submitReturn = true;
|
||||
|
@ -913,7 +913,10 @@ struct DirectSubmissionRelaxedOrderingTests : public DirectSubmissionDispatchBuf
|
||||
}
|
||||
|
||||
template <typename FamilyType>
|
||||
bool verifySchedulerProgramming(LinearStream &cs, uint64_t deferredTaskListVa, uint64_t semaphoreGpuVa, uint32_t semaphoreValue, size_t offset, size_t &endOffset);
|
||||
bool verifyDynamicSchedulerProgramming(LinearStream &cs, uint64_t schedulerAllocationGpuVa, uint64_t semaphoreGpuVa, uint32_t semaphoreValue, size_t offset, size_t &endOffset);
|
||||
|
||||
template <typename FamilyType>
|
||||
bool verifyStaticSchedulerProgramming(GraphicsAllocation &schedulerAllocation, uint64_t deferredTaskListVa);
|
||||
|
||||
template <typename FamilyType>
|
||||
bool verifyMiPredicate(void *miPredicateCmd, MiPredicateType predicateType);
|
||||
@ -1187,7 +1190,343 @@ bool DirectSubmissionRelaxedOrderingTests::verifyConditionalDataRegBbStart(void
|
||||
}
|
||||
|
||||
template <typename FamilyType>
|
||||
bool DirectSubmissionRelaxedOrderingTests::verifySchedulerProgramming(LinearStream &cs, uint64_t deferredTaskListVa, uint64_t semaphoreGpuVa, uint32_t semaphoreValue, size_t offset, size_t &endOffset) {
|
||||
bool DirectSubmissionRelaxedOrderingTests::verifyStaticSchedulerProgramming(GraphicsAllocation &schedulerAllocation, uint64_t deferredTaskListVa) {
|
||||
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG;
|
||||
using MI_SET_PREDICATE = typename FamilyType::MI_SET_PREDICATE;
|
||||
using MI_MATH_ALU_INST_INLINE = typename FamilyType::MI_MATH_ALU_INST_INLINE;
|
||||
using MI_ARB_CHECK = typename FamilyType::MI_ARB_CHECK;
|
||||
using MI_MATH = typename FamilyType::MI_MATH;
|
||||
|
||||
uint64_t schedulerStartGpuAddress = schedulerAllocation.getGpuAddress();
|
||||
void *schedulerCmds = schedulerAllocation.getUnderlyingBuffer();
|
||||
|
||||
// 1. Init section
|
||||
auto miPredicate = reinterpret_cast<MI_SET_PREDICATE *>(schedulerCmds);
|
||||
|
||||
if (!verifyMiPredicate<FamilyType>(miPredicate, MiPredicateType::Disable)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
miPredicate++;
|
||||
if (!verifyConditionalDataRegBbStart<FamilyType>(miPredicate, schedulerStartGpuAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<FamilyType>::semaphoreSectionJumpStart,
|
||||
CS_GPR_R1, 0, CompareOperation::Equal, false)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(ptrOffset(miPredicate, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart()));
|
||||
if (!verifyLri<FamilyType>(lriCmd, CS_GPR_R2, 0)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!verifyLri<FamilyType>(++lriCmd, CS_GPR_R2 + 4, 0)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
uint64_t removeTaskVa = schedulerStartGpuAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<FamilyType>::removeTaskSectionStart;
|
||||
|
||||
if (!verifyLri<FamilyType>(++lriCmd, CS_GPR_R3, static_cast<uint32_t>(removeTaskVa & 0xFFFF'FFFFULL))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!verifyLri<FamilyType>(++lriCmd, CS_GPR_R3 + 4, static_cast<uint32_t>(removeTaskVa >> 32))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
uint64_t walkersLoopConditionCheckVa = schedulerStartGpuAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<FamilyType>::tasksListLoopCheckSectionStart;
|
||||
|
||||
if (!verifyLri<FamilyType>(++lriCmd, CS_GPR_R4, static_cast<uint32_t>(walkersLoopConditionCheckVa & 0xFFFF'FFFFULL))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!verifyLri<FamilyType>(++lriCmd, CS_GPR_R4 + 4, static_cast<uint32_t>(walkersLoopConditionCheckVa >> 32))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// 2. Dispatch task section (loop start)
|
||||
miPredicate = reinterpret_cast<MI_SET_PREDICATE *>(++lriCmd);
|
||||
|
||||
if (!verifyMiPredicate<FamilyType>(miPredicate, MiPredicateType::Disable)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(++miPredicate);
|
||||
if (!verifyLri<FamilyType>(lriCmd, CS_GPR_R6, 8)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!verifyLri<FamilyType>(++lriCmd, CS_GPR_R6 + 4, 0)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!verifyLri<FamilyType>(++lriCmd, CS_GPR_R8, static_cast<uint32_t>(deferredTaskListVa & 0xFFFF'FFFFULL))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!verifyLri<FamilyType>(++lriCmd, CS_GPR_R8 + 4, static_cast<uint32_t>(deferredTaskListVa >> 32))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto miMathCmd = reinterpret_cast<MI_MATH *>(++lriCmd);
|
||||
if (miMathCmd->DW0.BitField.DwordLength != 9) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto miAluCmd = reinterpret_cast<MI_MATH_ALU_INST_INLINE *>(++miMathCmd);
|
||||
if (!verifyAlu<FamilyType>(miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_2)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCB, AluRegisters::R_6)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_SHL, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_STORE, AluRegisters::R_7, AluRegisters::R_ACCU)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_7)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCB, AluRegisters::R_8)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_ADD, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_STORE, AluRegisters::R_6, AluRegisters::R_ACCU)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_LOADIND, AluRegisters::R_0, AluRegisters::R_ACCU)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_FENCE_RD, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto bbStart = reinterpret_cast<MI_BATCH_BUFFER_START *>(++miAluCmd);
|
||||
if (!verifyBbStart<FamilyType>(bbStart, 0, true, false)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// 3. Remove task section
|
||||
miPredicate = reinterpret_cast<MI_SET_PREDICATE *>(++bbStart);
|
||||
if (!verifyMiPredicate<FamilyType>(miPredicate, MiPredicateType::Disable)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
miPredicate++;
|
||||
if (!verifyIncrementOrDecrement<FamilyType>(miPredicate, AluRegisters::R_1, false)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto cmds = ptrOffset(miPredicate, EncodeMathMMIO<FamilyType>::getCmdSizeForIncrementOrDecrement());
|
||||
|
||||
if (!verifyIncrementOrDecrement<FamilyType>(cmds, AluRegisters::R_2, false)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
cmds = ptrOffset(cmds, EncodeMathMMIO<FamilyType>::getCmdSizeForIncrementOrDecrement());
|
||||
|
||||
if (!verifyConditionalDataRegBbStart<FamilyType>(cmds, schedulerStartGpuAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<FamilyType>::semaphoreSectionJumpStart,
|
||||
CS_GPR_R1, 0, CompareOperation::Equal, false)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(ptrOffset(cmds, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart()));
|
||||
if (!verifyLri<FamilyType>(lriCmd, CS_GPR_R7, 8)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!verifyLri<FamilyType>(++lriCmd, CS_GPR_R7 + 4, 0)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!verifyLri<FamilyType>(++lriCmd, CS_GPR_R8, static_cast<uint32_t>(deferredTaskListVa & 0xFFFF'FFFFULL))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!verifyLri<FamilyType>(++lriCmd, CS_GPR_R8 + 4, static_cast<uint32_t>(deferredTaskListVa >> 32))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
miMathCmd = reinterpret_cast<MI_MATH *>(++lriCmd);
|
||||
if (miMathCmd->DW0.BitField.DwordLength != 13) {
|
||||
return false;
|
||||
}
|
||||
|
||||
miAluCmd = reinterpret_cast<MI_MATH_ALU_INST_INLINE *>(++miMathCmd);
|
||||
if (!verifyAlu<FamilyType>(miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_1)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCB, AluRegisters::R_7)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_SHL, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_STORE, AluRegisters::R_7, AluRegisters::R_ACCU)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_7)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCB, AluRegisters::R_8)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_ADD, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_LOADIND, AluRegisters::R_7, AluRegisters::R_ACCU)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_FENCE_RD, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_6)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_LOAD0, AluRegisters::R_SRCB, AluRegisters::OPCODE_NONE)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_ADD, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_STOREIND, AluRegisters::R_ACCU, AluRegisters::R_7)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_FENCE_WR, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// 4. List loop check section
|
||||
|
||||
miPredicate = reinterpret_cast<MI_SET_PREDICATE *>(++miAluCmd);
|
||||
if (!verifyMiPredicate<FamilyType>(miPredicate, MiPredicateType::Disable)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
miPredicate++;
|
||||
if (!verifyIncrementOrDecrement<FamilyType>(miPredicate, AluRegisters::R_2, true)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
cmds = ptrOffset(miPredicate, EncodeMathMMIO<FamilyType>::getCmdSizeForIncrementOrDecrement());
|
||||
|
||||
if (!verifyConditionalRegRegBbStart<FamilyType>(cmds, schedulerStartGpuAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<FamilyType>::loopStartSectionStart,
|
||||
AluRegisters::R_1, AluRegisters::R_2, CompareOperation::NotEqual, false)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(ptrOffset(cmds, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalRegRegBatchBufferStart()));
|
||||
|
||||
if (!verifyLri<FamilyType>(lriCmd, CS_GPR_R2, 0)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!verifyLri<FamilyType>(++lriCmd, CS_GPR_R2 + 4, 0)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// 5. Drain request section
|
||||
auto arbCheck = reinterpret_cast<MI_ARB_CHECK *>(++lriCmd);
|
||||
if (memcmp(arbCheck, &FamilyType::cmdInitArbCheck, sizeof(MI_ARB_CHECK)) != 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!verifyConditionalDataRegBbStart<FamilyType>(++arbCheck, schedulerStartGpuAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<FamilyType>::loopStartSectionStart,
|
||||
CS_GPR_R5, 1, CompareOperation::Equal, false)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// 6. Jump to scheduler loop check section (dynamic scheduler)
|
||||
auto lrrCmd = reinterpret_cast<MI_LOAD_REGISTER_REG *>(ptrOffset(arbCheck, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart()));
|
||||
|
||||
if (!verifyLrr<FamilyType>(lrrCmd, CS_GPR_R0, CS_GPR_R9)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!verifyLrr<FamilyType>(++lrrCmd, CS_GPR_R0 + 4, CS_GPR_R9 + 4)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bbStart = reinterpret_cast<MI_BATCH_BUFFER_START *>(++lrrCmd);
|
||||
if (!verifyBbStart<FamilyType>(bbStart, 0, true, false)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// 7. Jump to Semaphore section (dynamic scheduler)
|
||||
miPredicate = reinterpret_cast<MI_SET_PREDICATE *>(++bbStart);
|
||||
|
||||
if (!verifyMiPredicate<FamilyType>(miPredicate, MiPredicateType::Disable)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(++miPredicate);
|
||||
|
||||
if (!verifyLri<FamilyType>(lriCmd, CS_GPR_R10, static_cast<uint32_t>(RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<FamilyType>::schedulerLoopCheckSectionSize))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!verifyLri<FamilyType>(++lriCmd, CS_GPR_R10 + 4, 0)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
miMathCmd = reinterpret_cast<MI_MATH *>(++lriCmd);
|
||||
if (miMathCmd->DW0.BitField.DwordLength != 3) {
|
||||
return false;
|
||||
}
|
||||
|
||||
miAluCmd = reinterpret_cast<MI_MATH_ALU_INST_INLINE *>(++miMathCmd);
|
||||
if (!verifyAlu<FamilyType>(miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_9)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCB, AluRegisters::R_10)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_ADD, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_STORE, AluRegisters::R_0, AluRegisters::R_ACCU)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bbStart = reinterpret_cast<MI_BATCH_BUFFER_START *>(++miAluCmd);
|
||||
if (!verifyBbStart<FamilyType>(bbStart, 0, true, false)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename FamilyType>
|
||||
bool DirectSubmissionRelaxedOrderingTests::verifyDynamicSchedulerProgramming(LinearStream &cs, uint64_t schedulerAllocationGpuVa, uint64_t semaphoreGpuVa, uint32_t semaphoreValue, size_t offset, size_t &endOffset) {
|
||||
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
using MI_SET_PREDICATE = typename FamilyType::MI_SET_PREDICATE;
|
||||
@ -1203,282 +1542,42 @@ bool DirectSubmissionRelaxedOrderingTests::verifySchedulerProgramming(LinearStre
|
||||
bool success = false;
|
||||
|
||||
for (auto &it : hwParse.cmdList) {
|
||||
if (auto miPredicate = genCmdCast<MI_SET_PREDICATE *>(it)) {
|
||||
if (auto lriCmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(it)) {
|
||||
// 1. Init section
|
||||
if (!verifyMiPredicate<FamilyType>(miPredicate, MiPredicateType::Disable)) {
|
||||
|
||||
uint64_t schedulerStartAddress = cs.getGraphicsAllocation()->getGpuAddress() + ptrDiff(lriCmd, cs.getCpuBase());
|
||||
|
||||
uint64_t schedulerLoopCheckVa = schedulerStartAddress + RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<FamilyType>::schedulerLoopCheckSectionStart;
|
||||
|
||||
if (!verifyLri<FamilyType>(lriCmd, CS_GPR_R9, static_cast<uint32_t>(schedulerLoopCheckVa & 0xFFFF'FFFFULL))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
uint64_t schedulerStartAddress = cs.getGraphicsAllocation()->getGpuAddress() + ptrDiff(miPredicate, cs.getCpuBase());
|
||||
|
||||
miPredicate++;
|
||||
if (!verifyConditionalDataRegBbStart<FamilyType>(miPredicate, schedulerStartAddress + RelaxedOrderingHelper::SchedulerSizeAndOffsetSection<FamilyType>::semaphoreSectionStart,
|
||||
CS_GPR_R1, 0, CompareOperation::Equal, false)) {
|
||||
if (!verifyLri<FamilyType>(++lriCmd, CS_GPR_R9 + 4, static_cast<uint32_t>(schedulerLoopCheckVa >> 32))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(ptrOffset(miPredicate, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart()));
|
||||
if (!verifyLri<FamilyType>(lriCmd, CS_GPR_R2, 0)) {
|
||||
auto bbStart = reinterpret_cast<MI_BATCH_BUFFER_START *>(++lriCmd);
|
||||
if (!verifyBbStart<FamilyType>(bbStart, schedulerAllocationGpuVa, false, false)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyLri<FamilyType>(++lriCmd, CS_GPR_R2 + 4, 0)) {
|
||||
continue;
|
||||
}
|
||||
// 2. Scheduler loop check section
|
||||
|
||||
uint64_t removeTaskVa = schedulerStartAddress + RelaxedOrderingHelper::SchedulerSizeAndOffsetSection<FamilyType>::removeTaskSectionStart;
|
||||
bbStart++;
|
||||
|
||||
if (!verifyLri<FamilyType>(++lriCmd, CS_GPR_R3, static_cast<uint32_t>(removeTaskVa & 0xFFFF'FFFFULL))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyLri<FamilyType>(++lriCmd, CS_GPR_R3 + 4, static_cast<uint32_t>(removeTaskVa >> 32))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
uint64_t walkersLoopConditionCheckVa = schedulerStartAddress + RelaxedOrderingHelper::SchedulerSizeAndOffsetSection<FamilyType>::tasksListLoopCheckSectionStart;
|
||||
|
||||
if (!verifyLri<FamilyType>(++lriCmd, CS_GPR_R4, static_cast<uint32_t>(walkersLoopConditionCheckVa & 0xFFFF'FFFFULL))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyLri<FamilyType>(++lriCmd, CS_GPR_R4 + 4, static_cast<uint32_t>(walkersLoopConditionCheckVa >> 32))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// 2. Dispatch task section (loop start)
|
||||
miPredicate = reinterpret_cast<MI_SET_PREDICATE *>(++lriCmd);
|
||||
|
||||
if (!verifyMiPredicate<FamilyType>(miPredicate, MiPredicateType::Disable)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(++miPredicate);
|
||||
if (!verifyLri<FamilyType>(lriCmd, CS_GPR_R6, 8)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyLri<FamilyType>(++lriCmd, CS_GPR_R6 + 4, 0)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyLri<FamilyType>(++lriCmd, CS_GPR_R8, static_cast<uint32_t>(deferredTaskListVa & 0xFFFF'FFFFULL))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyLri<FamilyType>(++lriCmd, CS_GPR_R8 + 4, static_cast<uint32_t>(deferredTaskListVa >> 32))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto miMathCmd = reinterpret_cast<MI_MATH *>(++lriCmd);
|
||||
if (miMathCmd->DW0.BitField.DwordLength != 9) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto miAluCmd = reinterpret_cast<MI_MATH_ALU_INST_INLINE *>(++miMathCmd);
|
||||
if (!verifyAlu<FamilyType>(miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_2)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCB, AluRegisters::R_6)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_SHL, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_STORE, AluRegisters::R_7, AluRegisters::R_ACCU)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_7)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCB, AluRegisters::R_8)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_ADD, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_STORE, AluRegisters::R_6, AluRegisters::R_ACCU)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_LOADIND, AluRegisters::R_0, AluRegisters::R_ACCU)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_FENCE_RD, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto bbStart = reinterpret_cast<MI_BATCH_BUFFER_START *>(++miAluCmd);
|
||||
if (!verifyBbStart<FamilyType>(bbStart, 0, true, false)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// 3. Remove task section
|
||||
miPredicate = reinterpret_cast<MI_SET_PREDICATE *>(++bbStart);
|
||||
if (!verifyMiPredicate<FamilyType>(miPredicate, MiPredicateType::Disable)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
miPredicate++;
|
||||
if (!verifyIncrementOrDecrement<FamilyType>(miPredicate, AluRegisters::R_1, false)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto cmds = ptrOffset(miPredicate, EncodeMathMMIO<FamilyType>::getCmdSizeForIncrementOrDecrement());
|
||||
|
||||
if (!verifyIncrementOrDecrement<FamilyType>(cmds, AluRegisters::R_2, false)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
cmds = ptrOffset(cmds, EncodeMathMMIO<FamilyType>::getCmdSizeForIncrementOrDecrement());
|
||||
|
||||
if (!verifyConditionalDataRegBbStart<FamilyType>(cmds, schedulerStartAddress + RelaxedOrderingHelper::SchedulerSizeAndOffsetSection<FamilyType>::semaphoreSectionStart,
|
||||
CS_GPR_R1, 0, CompareOperation::Equal, false)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(ptrOffset(cmds, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart()));
|
||||
if (!verifyLri<FamilyType>(lriCmd, CS_GPR_R7, 8)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyLri<FamilyType>(++lriCmd, CS_GPR_R7 + 4, 0)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyLri<FamilyType>(++lriCmd, CS_GPR_R8, static_cast<uint32_t>(deferredTaskListVa & 0xFFFF'FFFFULL))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyLri<FamilyType>(++lriCmd, CS_GPR_R8 + 4, static_cast<uint32_t>(deferredTaskListVa >> 32))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
miMathCmd = reinterpret_cast<MI_MATH *>(++lriCmd);
|
||||
if (miMathCmd->DW0.BitField.DwordLength != 13) {
|
||||
continue;
|
||||
}
|
||||
|
||||
miAluCmd = reinterpret_cast<MI_MATH_ALU_INST_INLINE *>(++miMathCmd);
|
||||
if (!verifyAlu<FamilyType>(miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_1)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCB, AluRegisters::R_7)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_SHL, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_STORE, AluRegisters::R_7, AluRegisters::R_ACCU)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_7)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCB, AluRegisters::R_8)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_ADD, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_LOADIND, AluRegisters::R_7, AluRegisters::R_ACCU)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_FENCE_RD, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_6)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_LOAD0, AluRegisters::R_SRCB, AluRegisters::OPCODE_NONE)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_ADD, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_STOREIND, AluRegisters::R_ACCU, AluRegisters::R_7)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_FENCE_WR, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// 4. List loop check section
|
||||
|
||||
miPredicate = reinterpret_cast<MI_SET_PREDICATE *>(++miAluCmd);
|
||||
if (!verifyMiPredicate<FamilyType>(miPredicate, MiPredicateType::Disable)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
miPredicate++;
|
||||
if (!verifyIncrementOrDecrement<FamilyType>(miPredicate, AluRegisters::R_2, true)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
cmds = ptrOffset(miPredicate, EncodeMathMMIO<FamilyType>::getCmdSizeForIncrementOrDecrement());
|
||||
|
||||
if (!verifyConditionalRegRegBbStart<FamilyType>(cmds, schedulerStartAddress + RelaxedOrderingHelper::SchedulerSizeAndOffsetSection<FamilyType>::loopStartSectionStart,
|
||||
AluRegisters::R_1, AluRegisters::R_2, CompareOperation::NotEqual, false)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(ptrOffset(cmds, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalRegRegBatchBufferStart()));
|
||||
|
||||
if (!verifyLri<FamilyType>(lriCmd, CS_GPR_R2, 0)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyLri<FamilyType>(++lriCmd, CS_GPR_R2 + 4, 0)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// 5. Drain request section
|
||||
auto arbCheck = reinterpret_cast<MI_ARB_CHECK *>(++lriCmd);
|
||||
if (memcmp(arbCheck, &FamilyType::cmdInitArbCheck, sizeof(MI_ARB_CHECK)) != 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyConditionalDataRegBbStart<FamilyType>(++arbCheck, schedulerStartAddress + RelaxedOrderingHelper::SchedulerSizeAndOffsetSection<FamilyType>::loopStartSectionStart,
|
||||
CS_GPR_R5, 1, CompareOperation::Equal, false)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// 6. Scheduler loop check section
|
||||
auto cmds2 = ptrOffset(arbCheck, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart());
|
||||
|
||||
if (!verifyConditionalDataMemBbStart<FamilyType>(cmds2, schedulerStartAddress + RelaxedOrderingHelper::SchedulerSizeAndOffsetSection<FamilyType>::endSectionStart,
|
||||
if (!verifyConditionalDataMemBbStart<FamilyType>(bbStart, schedulerStartAddress + RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<FamilyType>::endSectionStart,
|
||||
semaphoreGpuVa, semaphoreValue, CompareOperation::GreaterOrEqual, false)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
bbStart = reinterpret_cast<MI_BATCH_BUFFER_START *>(ptrOffset(cmds2, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart()));
|
||||
if (!verifyBbStart<FamilyType>(bbStart, schedulerStartAddress + RelaxedOrderingHelper::SchedulerSizeAndOffsetSection<FamilyType>::loopStartSectionStart, false, false)) {
|
||||
bbStart = reinterpret_cast<MI_BATCH_BUFFER_START *>(ptrOffset(bbStart, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart()));
|
||||
if (!verifyBbStart<FamilyType>(bbStart, schedulerAllocationGpuVa + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<FamilyType>::loopStartSectionStart, false, false)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// 7. Semaphore section
|
||||
miPredicate = reinterpret_cast<MI_SET_PREDICATE *>(++bbStart);
|
||||
// 3. Semaphore section
|
||||
auto miPredicate = reinterpret_cast<MI_SET_PREDICATE *>(++bbStart);
|
||||
if (!verifyMiPredicate<FamilyType>(miPredicate, MiPredicateType::Disable)) {
|
||||
continue;
|
||||
}
|
||||
@ -1490,7 +1589,7 @@ bool DirectSubmissionRelaxedOrderingTests::verifySchedulerProgramming(LinearStre
|
||||
continue;
|
||||
}
|
||||
|
||||
// 8. End section
|
||||
// 4. End section
|
||||
|
||||
miPredicate = reinterpret_cast<MI_SET_PREDICATE *>(++semaphore);
|
||||
if (!verifyMiPredicate<FamilyType>(miPredicate, MiPredicateType::Disable)) {
|
||||
@ -1512,7 +1611,7 @@ bool DirectSubmissionRelaxedOrderingTests::verifySchedulerProgramming(LinearStre
|
||||
return success;
|
||||
}
|
||||
|
||||
HWTEST_F(DirectSubmissionRelaxedOrderingTests, whenAllocatingResourcesThenCreateDeferredTasksAllocation) {
|
||||
HWTEST_F(DirectSubmissionRelaxedOrderingTests, whenAllocatingResourcesThenCreateDeferredTasksAndSchedulerAllocation) {
|
||||
using Dispatcher = RenderDispatcher<FamilyType>;
|
||||
|
||||
auto mockMemoryOperations = new MockMemoryOperations();
|
||||
@ -1525,12 +1624,57 @@ HWTEST_F(DirectSubmissionRelaxedOrderingTests, whenAllocatingResourcesThenCreate
|
||||
|
||||
directSubmission.initialize(false, false);
|
||||
|
||||
auto allocsIter = mockMemoryOperations->gfxAllocationsForMakeResident.rbegin();
|
||||
|
||||
EXPECT_EQ(AllocationType::COMMAND_BUFFER, directSubmission.relaxedOrderingSchedulerAllocation->getAllocationType());
|
||||
EXPECT_NE(nullptr, directSubmission.relaxedOrderingSchedulerAllocation);
|
||||
EXPECT_EQ(directSubmission.relaxedOrderingSchedulerAllocation, *allocsIter);
|
||||
|
||||
allocsIter++;
|
||||
|
||||
EXPECT_EQ(AllocationType::DEFERRED_TASKS_LIST, directSubmission.deferredTasksListAllocation->getAllocationType());
|
||||
EXPECT_NE(nullptr, directSubmission.deferredTasksListAllocation);
|
||||
EXPECT_EQ(directSubmission.deferredTasksListAllocation, mockMemoryOperations->gfxAllocationsForMakeResident.back());
|
||||
EXPECT_EQ(directSubmission.deferredTasksListAllocation, *allocsIter);
|
||||
}
|
||||
|
||||
HWTEST_F(DirectSubmissionRelaxedOrderingTests, whenInitializingThenPreinitializeTaskStoreSectionAndInitRegs) {
|
||||
HWTEST2_F(DirectSubmissionRelaxedOrderingTests, whenInitializingThenDispatchStaticScheduler, IsAtLeastXeHpcCore) {
|
||||
using Dispatcher = RenderDispatcher<FamilyType>;
|
||||
|
||||
{
|
||||
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
directSubmission.initialize(false, false);
|
||||
|
||||
EXPECT_EQ(0u, directSubmission.dispatchStaticRelaxedOrderingSchedulerCalled);
|
||||
}
|
||||
|
||||
{
|
||||
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
directSubmission.initialize(true, false);
|
||||
|
||||
EXPECT_EQ(1u, directSubmission.dispatchStaticRelaxedOrderingSchedulerCalled);
|
||||
EXPECT_TRUE(verifyStaticSchedulerProgramming<FamilyType>(*directSubmission.relaxedOrderingSchedulerAllocation,
|
||||
directSubmission.deferredTasksListAllocation->getGpuAddress()));
|
||||
}
|
||||
|
||||
{
|
||||
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
directSubmission.initialize(false, false);
|
||||
EXPECT_EQ(0u, directSubmission.dispatchStaticRelaxedOrderingSchedulerCalled);
|
||||
|
||||
directSubmission.startRingBuffer();
|
||||
|
||||
EXPECT_EQ(1u, directSubmission.dispatchStaticRelaxedOrderingSchedulerCalled);
|
||||
|
||||
directSubmission.startRingBuffer();
|
||||
EXPECT_EQ(1u, directSubmission.dispatchStaticRelaxedOrderingSchedulerCalled);
|
||||
|
||||
FlushStampTracker flushStamp(true);
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
EXPECT_EQ(1u, directSubmission.dispatchStaticRelaxedOrderingSchedulerCalled);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(DirectSubmissionRelaxedOrderingTests, whenInitializingThenPreinitializeTaskStoreSectionAndStaticSchedulerAndInitRegs) {
|
||||
using Dispatcher = RenderDispatcher<FamilyType>;
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
|
||||
@ -1575,9 +1719,10 @@ HWTEST_F(DirectSubmissionRelaxedOrderingTests, whenInitializingThenPreinitialize
|
||||
directSubmission.initialize(false, false);
|
||||
EXPECT_FALSE(verifyInitRegisters(directSubmission.ringCommandStream, 0));
|
||||
|
||||
EXPECT_EQ(0u, directSubmission.preinitializeTaskStoreSectionCalled);
|
||||
EXPECT_EQ(0u, directSubmission.preinitializeRelaxedOrderingSectionsCalled);
|
||||
EXPECT_FALSE(directSubmission.relaxedOrderingInitialized);
|
||||
EXPECT_EQ(nullptr, directSubmission.preinitializedTaskStoreSection.get());
|
||||
EXPECT_EQ(nullptr, directSubmission.preinitializedRelaxedOrderingScheduler.get());
|
||||
}
|
||||
|
||||
{
|
||||
@ -1585,33 +1730,35 @@ HWTEST_F(DirectSubmissionRelaxedOrderingTests, whenInitializingThenPreinitialize
|
||||
directSubmission.initialize(true, false);
|
||||
EXPECT_TRUE(verifyInitRegisters(directSubmission.ringCommandStream, 0));
|
||||
|
||||
EXPECT_EQ(1u, directSubmission.preinitializeTaskStoreSectionCalled);
|
||||
EXPECT_EQ(1u, directSubmission.preinitializeRelaxedOrderingSectionsCalled);
|
||||
EXPECT_TRUE(directSubmission.relaxedOrderingInitialized);
|
||||
EXPECT_NE(nullptr, directSubmission.preinitializedTaskStoreSection.get());
|
||||
EXPECT_NE(nullptr, directSubmission.preinitializedRelaxedOrderingScheduler.get());
|
||||
|
||||
size_t offset = directSubmission.ringCommandStream.getUsed();
|
||||
|
||||
directSubmission.startRingBuffer();
|
||||
EXPECT_FALSE(verifyInitRegisters(directSubmission.ringCommandStream, offset));
|
||||
|
||||
EXPECT_EQ(1u, directSubmission.preinitializeTaskStoreSectionCalled);
|
||||
EXPECT_EQ(1u, directSubmission.preinitializeRelaxedOrderingSectionsCalled);
|
||||
}
|
||||
|
||||
{
|
||||
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
directSubmission.initialize(false, false);
|
||||
EXPECT_EQ(0u, directSubmission.preinitializeTaskStoreSectionCalled);
|
||||
EXPECT_EQ(0u, directSubmission.preinitializeRelaxedOrderingSectionsCalled);
|
||||
|
||||
directSubmission.startRingBuffer();
|
||||
|
||||
EXPECT_EQ(1u, directSubmission.preinitializeTaskStoreSectionCalled);
|
||||
EXPECT_EQ(1u, directSubmission.preinitializeRelaxedOrderingSectionsCalled);
|
||||
EXPECT_TRUE(directSubmission.relaxedOrderingInitialized);
|
||||
EXPECT_NE(nullptr, directSubmission.preinitializedTaskStoreSection.get());
|
||||
EXPECT_NE(nullptr, directSubmission.preinitializedRelaxedOrderingScheduler.get());
|
||||
|
||||
size_t offset = directSubmission.ringCommandStream.getUsed();
|
||||
directSubmission.startRingBuffer();
|
||||
EXPECT_FALSE(verifyInitRegisters(directSubmission.ringCommandStream, offset));
|
||||
EXPECT_EQ(1u, directSubmission.preinitializeTaskStoreSectionCalled);
|
||||
EXPECT_EQ(1u, directSubmission.preinitializeRelaxedOrderingSectionsCalled);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1713,23 +1860,23 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, whenDispatchingWorkThenDispatchS
|
||||
directSubmission.initialize(true, false);
|
||||
auto offset = directSubmission.ringCommandStream.getUsed();
|
||||
|
||||
uint64_t deferredTasksListVa = directSubmission.deferredTasksListAllocation->getGpuAddress();
|
||||
uint64_t staticSchedulerGpuAddress = directSubmission.relaxedOrderingSchedulerAllocation->getGpuAddress();
|
||||
uint64_t semaphoreGpuVa = directSubmission.semaphoreGpuVa;
|
||||
|
||||
size_t endOffset = 0;
|
||||
|
||||
EXPECT_FALSE(verifySchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, deferredTasksListVa, semaphoreGpuVa, directSubmission.currentQueueWorkCount, 0, endOffset));
|
||||
EXPECT_FALSE(verifyDynamicSchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, 0, endOffset));
|
||||
|
||||
FlushStampTracker flushStamp(true);
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
|
||||
EXPECT_TRUE(verifySchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, deferredTasksListVa, semaphoreGpuVa, directSubmission.currentQueueWorkCount, offset, endOffset));
|
||||
EXPECT_TRUE(verifyDynamicSchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, offset, endOffset));
|
||||
|
||||
offset = directSubmission.ringCommandStream.getUsed();
|
||||
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
|
||||
EXPECT_TRUE(verifySchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, deferredTasksListVa, semaphoreGpuVa, directSubmission.currentQueueWorkCount, offset, endOffset));
|
||||
EXPECT_TRUE(verifyDynamicSchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, offset, endOffset));
|
||||
}
|
||||
|
||||
HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenBbWithStallingCmdsWhenDispatchingThenProgramSchedulerWithR5, IsAtLeastXeHpcCore) {
|
||||
@ -1742,19 +1889,19 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenBbWithStallingCmdsWhenDispa
|
||||
directSubmission.initialize(true, false);
|
||||
size_t offset = directSubmission.ringCommandStream.getUsed();
|
||||
|
||||
uint64_t deferredTasksListVa = directSubmission.deferredTasksListAllocation->getGpuAddress();
|
||||
uint64_t staticSchedulerGpuAddress = directSubmission.relaxedOrderingSchedulerAllocation->getGpuAddress();
|
||||
uint64_t semaphoreGpuVa = directSubmission.semaphoreGpuVa;
|
||||
|
||||
size_t endOffset = 0;
|
||||
|
||||
EXPECT_FALSE(verifySchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, deferredTasksListVa, semaphoreGpuVa, directSubmission.currentQueueWorkCount, 0, endOffset));
|
||||
EXPECT_FALSE(verifyDynamicSchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, 0, endOffset));
|
||||
|
||||
FlushStampTracker flushStamp(true);
|
||||
batchBuffer.hasStallingCmds = false;
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
|
||||
EXPECT_TRUE(verifySchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, deferredTasksListVa, semaphoreGpuVa, directSubmission.currentQueueWorkCount, offset, endOffset));
|
||||
EXPECT_FALSE(verifySchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, deferredTasksListVa, semaphoreGpuVa, directSubmission.currentQueueWorkCount, endOffset, endOffset));
|
||||
EXPECT_TRUE(verifyDynamicSchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, offset, endOffset));
|
||||
EXPECT_FALSE(verifyDynamicSchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, endOffset, endOffset));
|
||||
|
||||
offset = directSubmission.ringCommandStream.getUsed();
|
||||
|
||||
@ -1787,11 +1934,11 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenBbWithStallingCmdsWhenDispa
|
||||
|
||||
ASSERT_TRUE(success);
|
||||
offset = ptrDiff(++lriCmd, directSubmission.ringCommandStream.getCpuBase());
|
||||
EXPECT_TRUE(verifySchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, deferredTasksListVa, semaphoreGpuVa, directSubmission.currentQueueWorkCount - 1, offset, endOffset));
|
||||
EXPECT_TRUE(verifyDynamicSchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount - 1, offset, endOffset));
|
||||
|
||||
EXPECT_TRUE(endOffset > offset);
|
||||
|
||||
EXPECT_TRUE(verifySchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, deferredTasksListVa, semaphoreGpuVa, directSubmission.currentQueueWorkCount, endOffset, endOffset));
|
||||
EXPECT_TRUE(verifyDynamicSchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, endOffset, endOffset));
|
||||
}
|
||||
|
||||
HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenFirstBbWithStallingCmdsWhenDispatchingThenDontProgramSchedulerWithR5, IsAtLeastXeHpcCore) {
|
||||
@ -1804,12 +1951,12 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenFirstBbWithStallingCmdsWhen
|
||||
directSubmission.initialize(true, false);
|
||||
size_t offset = directSubmission.ringCommandStream.getUsed();
|
||||
|
||||
uint64_t deferredTasksListVa = directSubmission.deferredTasksListAllocation->getGpuAddress();
|
||||
uint64_t staticSchedulerGpuAddress = directSubmission.relaxedOrderingSchedulerAllocation->getGpuAddress();
|
||||
uint64_t semaphoreGpuVa = directSubmission.semaphoreGpuVa;
|
||||
|
||||
size_t endOffset = 0;
|
||||
|
||||
EXPECT_FALSE(verifySchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, deferredTasksListVa, semaphoreGpuVa, directSubmission.currentQueueWorkCount, 0, endOffset));
|
||||
EXPECT_FALSE(verifyDynamicSchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, 0, endOffset));
|
||||
|
||||
FlushStampTracker flushStamp(true);
|
||||
batchBuffer.hasStallingCmds = true;
|
||||
@ -1845,19 +1992,19 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, whenStoppingRingThenProgramSched
|
||||
directSubmission.initialize(true, false);
|
||||
size_t offset = directSubmission.ringCommandStream.getUsed();
|
||||
|
||||
uint64_t deferredTasksListVa = directSubmission.deferredTasksListAllocation->getGpuAddress();
|
||||
uint64_t staticSchedulerGpuAddress = directSubmission.relaxedOrderingSchedulerAllocation->getGpuAddress();
|
||||
uint64_t semaphoreGpuVa = directSubmission.semaphoreGpuVa;
|
||||
|
||||
size_t endOffset = 0;
|
||||
|
||||
EXPECT_FALSE(verifySchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, deferredTasksListVa, semaphoreGpuVa, directSubmission.currentQueueWorkCount, 0, endOffset));
|
||||
EXPECT_FALSE(verifyDynamicSchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, 0, endOffset));
|
||||
|
||||
FlushStampTracker flushStamp(true);
|
||||
batchBuffer.hasStallingCmds = false;
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
|
||||
EXPECT_TRUE(verifySchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, deferredTasksListVa, semaphoreGpuVa, directSubmission.currentQueueWorkCount, offset, endOffset));
|
||||
EXPECT_FALSE(verifySchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, deferredTasksListVa, semaphoreGpuVa, directSubmission.currentQueueWorkCount, endOffset, endOffset));
|
||||
EXPECT_TRUE(verifyDynamicSchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, offset, endOffset));
|
||||
EXPECT_FALSE(verifyDynamicSchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, endOffset, endOffset));
|
||||
|
||||
offset = directSubmission.ringCommandStream.getUsed();
|
||||
|
||||
@ -1889,11 +2036,11 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, whenStoppingRingThenProgramSched
|
||||
|
||||
ASSERT_TRUE(success);
|
||||
offset = ptrDiff(lriCmd, directSubmission.ringCommandStream.getCpuBase());
|
||||
EXPECT_TRUE(verifySchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, deferredTasksListVa, semaphoreGpuVa, directSubmission.currentQueueWorkCount, offset, endOffset));
|
||||
EXPECT_TRUE(verifyDynamicSchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, offset, endOffset));
|
||||
|
||||
EXPECT_TRUE(endOffset > offset);
|
||||
|
||||
EXPECT_FALSE(verifySchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, deferredTasksListVa, semaphoreGpuVa, directSubmission.currentQueueWorkCount, endOffset, endOffset));
|
||||
EXPECT_FALSE(verifyDynamicSchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, endOffset, endOffset));
|
||||
}
|
||||
|
||||
HWTEST2_F(DirectSubmissionRelaxedOrderingTests, WhenStoppingRingWithoutSubmissionThenDontProgramSchedulerWithR5, IsAtLeastXeHpcCore) {
|
||||
@ -1906,12 +2053,12 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, WhenStoppingRingWithoutSubmissio
|
||||
directSubmission.initialize(true, false);
|
||||
size_t offset = directSubmission.ringCommandStream.getUsed();
|
||||
|
||||
uint64_t deferredTasksListVa = directSubmission.deferredTasksListAllocation->getGpuAddress();
|
||||
uint64_t staticSchedulerGpuAddress = directSubmission.relaxedOrderingSchedulerAllocation->getGpuAddress();
|
||||
uint64_t semaphoreGpuVa = directSubmission.semaphoreGpuVa;
|
||||
|
||||
size_t endOffset = 0;
|
||||
|
||||
EXPECT_FALSE(verifySchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, deferredTasksListVa, semaphoreGpuVa, directSubmission.currentQueueWorkCount, 0, endOffset));
|
||||
EXPECT_FALSE(verifyDynamicSchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, 0, endOffset));
|
||||
|
||||
directSubmission.stopRingBuffer();
|
||||
|
||||
|
Reference in New Issue
Block a user