mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Relaxed ordering scheduler section for DirectSubmission
Related-To: NEO-7458 Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
20cbdf8683
commit
e6c1658bae
@ -110,8 +110,10 @@ class DirectSubmissionHw {
|
||||
|
||||
void cpuCachelineFlush(void *ptr, size_t size);
|
||||
|
||||
void dispatchSemaphoreSection(uint32_t value);
|
||||
size_t getSizeSemaphoreSection();
|
||||
void dispatchSemaphoreSection(uint32_t value, bool firstSubmission);
|
||||
size_t getSizeSemaphoreSection(bool firstSubmission);
|
||||
|
||||
void dispatchRelaxedOrderingSchedulerSection(uint32_t value);
|
||||
|
||||
void dispatchStartSection(uint64_t gpuStartAddress);
|
||||
size_t getSizeStartSection();
|
||||
@ -122,6 +124,8 @@ class DirectSubmissionHw {
|
||||
void dispatchTaskStoreSection(uint64_t taskStartSectionVa);
|
||||
MOCKABLE_VIRTUAL void preinitializeTaskStoreSection();
|
||||
|
||||
void initRelaxedOrderingRegisters();
|
||||
|
||||
void setReturnAddress(void *returnCmd, uint64_t returnAddress);
|
||||
|
||||
void *dispatchWorkloadSection(BatchBuffer &batchBuffer);
|
||||
|
@ -80,6 +80,145 @@ DirectSubmissionHw<GfxFamily, Dispatcher>::DirectSubmissionHw(const DirectSubmis
|
||||
relaxedOrderingEnabled = (DebugManager.flags.DirectSubmissionRelaxedOrdering.get() == 1);
|
||||
}
|
||||
|
||||
template <typename GfxFamily, typename Dispatcher>
|
||||
void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchRelaxedOrderingSchedulerSection(uint32_t value) {
|
||||
uint64_t schedulerStartAddress = getCommandBufferPositionGpuAddress(ringCommandStream.getSpace(0));
|
||||
uint64_t deferredTasksListGpuVa = deferredTasksListAllocation->getGpuAddress();
|
||||
|
||||
// 1. Init section
|
||||
{
|
||||
EncodeMiPredicate<GfxFamily>::encode(ringCommandStream, MiPredicateType::Disable);
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(ringCommandStream,
|
||||
schedulerStartAddress + RelaxedOrderingHelper::SchedulerSizeAndOffsetSection<GfxFamily>::semaphoreSectionStart,
|
||||
CS_GPR_R1, 0, CompareOperation::Equal, false);
|
||||
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R2, 0, true);
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R2 + 4, 0, true);
|
||||
|
||||
uint64_t removeTaskVa = schedulerStartAddress + RelaxedOrderingHelper::SchedulerSizeAndOffsetSection<GfxFamily>::removeTaskSectionStart;
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R3, static_cast<uint32_t>(removeTaskVa & 0xFFFF'FFFFULL), true);
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R3 + 4, static_cast<uint32_t>(removeTaskVa >> 32), true);
|
||||
|
||||
uint64_t walkersLoopConditionCheckVa = schedulerStartAddress + RelaxedOrderingHelper::SchedulerSizeAndOffsetSection<GfxFamily>::tasksListLoopCheckSectionStart;
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R4, static_cast<uint32_t>(walkersLoopConditionCheckVa & 0xFFFF'FFFFULL), true);
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R4 + 4, static_cast<uint32_t>(walkersLoopConditionCheckVa >> 32), true);
|
||||
}
|
||||
|
||||
// 2. Dispatch task section (loop start)
|
||||
{
|
||||
EncodeMiPredicate<GfxFamily>::encode(ringCommandStream, MiPredicateType::Disable);
|
||||
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R6, 8, true);
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R6 + 4, 0, true);
|
||||
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R8, static_cast<uint32_t>(deferredTasksListGpuVa & 0xFFFF'FFFFULL), true);
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R8 + 4, static_cast<uint32_t>(deferredTasksListGpuVa >> 32), true);
|
||||
|
||||
EncodeAluHelper<GfxFamily, 10> aluHelper;
|
||||
aluHelper.setNextAlu(AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_2);
|
||||
aluHelper.setNextAlu(AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCB, AluRegisters::R_6);
|
||||
aluHelper.setNextAlu(AluRegisters::OPCODE_SHL);
|
||||
aluHelper.setNextAlu(AluRegisters::OPCODE_STORE, AluRegisters::R_7, AluRegisters::R_ACCU);
|
||||
aluHelper.setNextAlu(AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_7);
|
||||
aluHelper.setNextAlu(AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCB, AluRegisters::R_8);
|
||||
aluHelper.setNextAlu(AluRegisters::OPCODE_ADD);
|
||||
aluHelper.setNextAlu(AluRegisters::OPCODE_STORE, AluRegisters::R_6, AluRegisters::R_ACCU);
|
||||
aluHelper.setNextAlu(AluRegisters::OPCODE_LOADIND, AluRegisters::R_0, AluRegisters::R_ACCU);
|
||||
aluHelper.setNextAlu(AluRegisters::OPCODE_FENCE_RD);
|
||||
|
||||
aluHelper.copyToCmdStream(ringCommandStream);
|
||||
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferStart(&ringCommandStream, 0, false, true, false);
|
||||
}
|
||||
|
||||
// 3. Remove task section
|
||||
{
|
||||
EncodeMiPredicate<GfxFamily>::encode(ringCommandStream, MiPredicateType::Disable);
|
||||
|
||||
EncodeMathMMIO<GfxFamily>::encodeDecrement(ringCommandStream, AluRegisters::R_1);
|
||||
EncodeMathMMIO<GfxFamily>::encodeDecrement(ringCommandStream, AluRegisters::R_2);
|
||||
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(ringCommandStream,
|
||||
schedulerStartAddress + RelaxedOrderingHelper::SchedulerSizeAndOffsetSection<GfxFamily>::semaphoreSectionStart,
|
||||
CS_GPR_R1, 0, CompareOperation::Equal, false);
|
||||
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R7, 8, true);
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R7 + 4, 0, true);
|
||||
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R8, static_cast<uint32_t>(deferredTasksListGpuVa & 0xFFFF'FFFFULL), true);
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R8 + 4, static_cast<uint32_t>(deferredTasksListGpuVa >> 32), true);
|
||||
|
||||
EncodeAluHelper<GfxFamily, 14> aluHelper;
|
||||
aluHelper.setNextAlu(AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_1);
|
||||
aluHelper.setNextAlu(AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCB, AluRegisters::R_7);
|
||||
aluHelper.setNextAlu(AluRegisters::OPCODE_SHL);
|
||||
aluHelper.setNextAlu(AluRegisters::OPCODE_STORE, AluRegisters::R_7, AluRegisters::R_ACCU);
|
||||
aluHelper.setNextAlu(AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_7);
|
||||
aluHelper.setNextAlu(AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCB, AluRegisters::R_8);
|
||||
aluHelper.setNextAlu(AluRegisters::OPCODE_ADD);
|
||||
aluHelper.setNextAlu(AluRegisters::OPCODE_LOADIND, AluRegisters::R_7, AluRegisters::R_ACCU);
|
||||
aluHelper.setNextAlu(AluRegisters::OPCODE_FENCE_RD);
|
||||
aluHelper.setNextAlu(AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_6);
|
||||
aluHelper.setNextAlu(AluRegisters::OPCODE_LOAD0, AluRegisters::R_SRCB, AluRegisters::OPCODE_NONE);
|
||||
aluHelper.setNextAlu(AluRegisters::OPCODE_ADD);
|
||||
aluHelper.setNextAlu(AluRegisters::OPCODE_STOREIND, AluRegisters::R_ACCU, AluRegisters::R_7);
|
||||
aluHelper.setNextAlu(AluRegisters::OPCODE_FENCE_WR);
|
||||
|
||||
aluHelper.copyToCmdStream(ringCommandStream);
|
||||
}
|
||||
|
||||
// 4. List loop check section
|
||||
{
|
||||
EncodeMiPredicate<GfxFamily>::encode(ringCommandStream, MiPredicateType::Disable);
|
||||
|
||||
EncodeMathMMIO<GfxFamily>::encodeIncrement(ringCommandStream, AluRegisters::R_2);
|
||||
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalRegRegBatchBufferStart(ringCommandStream,
|
||||
schedulerStartAddress + RelaxedOrderingHelper::SchedulerSizeAndOffsetSection<GfxFamily>::loopStartSectionStart,
|
||||
AluRegisters::R_1, AluRegisters::R_2, CompareOperation::NotEqual, false);
|
||||
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R2, 0, true);
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R2 + 4, 0, true);
|
||||
}
|
||||
|
||||
// 5. Drain request section
|
||||
{
|
||||
*ringCommandStream.getSpaceForCmd<typename GfxFamily::MI_ARB_CHECK>() = GfxFamily::cmdInitArbCheck;
|
||||
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(ringCommandStream,
|
||||
schedulerStartAddress + RelaxedOrderingHelper::SchedulerSizeAndOffsetSection<GfxFamily>::loopStartSectionStart,
|
||||
CS_GPR_R5, 1, CompareOperation::Equal, false);
|
||||
}
|
||||
|
||||
// 6. Scheduler loop check section
|
||||
{
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(ringCommandStream, schedulerStartAddress + RelaxedOrderingHelper::SchedulerSizeAndOffsetSection<GfxFamily>::endSectionStart,
|
||||
semaphoreGpuVa, value, CompareOperation::GreaterOrEqual, false);
|
||||
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferStart(&ringCommandStream, schedulerStartAddress + RelaxedOrderingHelper::SchedulerSizeAndOffsetSection<GfxFamily>::loopStartSectionStart, false, false, false);
|
||||
}
|
||||
|
||||
// 7. Semaphore section
|
||||
{
|
||||
using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT;
|
||||
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
|
||||
|
||||
EncodeMiPredicate<GfxFamily>::encode(ringCommandStream, MiPredicateType::Disable);
|
||||
|
||||
EncodeSempahore<GfxFamily>::addMiSemaphoreWaitCommand(ringCommandStream,
|
||||
semaphoreGpuVa,
|
||||
value,
|
||||
COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD);
|
||||
}
|
||||
|
||||
// 8. End section
|
||||
{
|
||||
EncodeMiPredicate<GfxFamily>::encode(ringCommandStream, MiPredicateType::Disable);
|
||||
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R5, 0, true);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily, typename Dispatcher>
|
||||
DirectSubmissionHw<GfxFamily, Dispatcher>::~DirectSubmissionHw() = default;
|
||||
|
||||
@ -213,7 +352,7 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::initialize(bool submitOnInit, bo
|
||||
initDiagnostic(submitOnInit);
|
||||
if (ret && submitOnInit) {
|
||||
size_t startBufferSize = Dispatcher::getSizePreemption() +
|
||||
getSizeSemaphoreSection();
|
||||
getSizeSemaphoreSection(true);
|
||||
|
||||
Dispatcher::dispatchPreemption(ringCommandStream);
|
||||
if (this->partitionedMode) {
|
||||
@ -230,13 +369,17 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::initialize(bool submitOnInit, bo
|
||||
}
|
||||
if (this->relaxedOrderingEnabled) {
|
||||
preinitializeTaskStoreSection();
|
||||
|
||||
initRelaxedOrderingRegisters();
|
||||
startBufferSize += RelaxedOrderingHelper::getSizeRegistersInit<GfxFamily>();
|
||||
|
||||
this->relaxedOrderingInitialized = true;
|
||||
}
|
||||
if (workloadMode == 1) {
|
||||
dispatchDiagnosticModeSection();
|
||||
startBufferSize += getDiagnosticModeSection();
|
||||
}
|
||||
dispatchSemaphoreSection(currentQueueWorkCount);
|
||||
dispatchSemaphoreSection(currentQueueWorkCount, true);
|
||||
|
||||
ringStart = submit(ringCommandStream.getGraphicsAllocation()->getGpuAddress(), startBufferSize);
|
||||
performDiagnosticMode();
|
||||
@ -251,13 +394,16 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::startRingBuffer() {
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t startSize = getSizeSemaphoreSection();
|
||||
size_t startSize = getSizeSemaphoreSection(true);
|
||||
if (!this->partitionConfigSet) {
|
||||
startSize += getSizePartitionRegisterConfigurationSection();
|
||||
}
|
||||
if (this->miMemFenceRequired && !this->systemMemoryFenceAddressSet) {
|
||||
startSize += getSizeSystemMemoryFenceAddress();
|
||||
}
|
||||
if (this->relaxedOrderingEnabled && !this->relaxedOrderingInitialized) {
|
||||
startSize += RelaxedOrderingHelper::getSizeRegistersInit<GfxFamily>();
|
||||
}
|
||||
|
||||
size_t requiredSize = startSize + getSizeDispatch() + getSizeEnd();
|
||||
if (ringCommandStream.getAvailableSpace() < requiredSize) {
|
||||
@ -277,11 +423,13 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::startRingBuffer() {
|
||||
|
||||
if (this->relaxedOrderingEnabled && !this->relaxedOrderingInitialized) {
|
||||
preinitializeTaskStoreSection();
|
||||
initRelaxedOrderingRegisters();
|
||||
|
||||
this->relaxedOrderingInitialized = true;
|
||||
}
|
||||
|
||||
currentQueueWorkCount++;
|
||||
dispatchSemaphoreSection(currentQueueWorkCount);
|
||||
dispatchSemaphoreSection(currentQueueWorkCount, true);
|
||||
|
||||
ringStart = submit(gpuStartVa, startSize);
|
||||
|
||||
@ -319,15 +467,20 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::stopRingBuffer() {
|
||||
}
|
||||
|
||||
template <typename GfxFamily, typename Dispatcher>
|
||||
inline void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchSemaphoreSection(uint32_t value) {
|
||||
inline void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchSemaphoreSection(uint32_t value, bool firstSubmission) {
|
||||
using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT;
|
||||
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
|
||||
|
||||
dispatchDisablePrefetcher(true);
|
||||
EncodeSempahore<GfxFamily>::addMiSemaphoreWaitCommand(ringCommandStream,
|
||||
semaphoreGpuVa,
|
||||
value,
|
||||
COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD);
|
||||
|
||||
if (this->relaxedOrderingEnabled && !firstSubmission) {
|
||||
dispatchRelaxedOrderingSchedulerSection(value);
|
||||
} else {
|
||||
EncodeSempahore<GfxFamily>::addMiSemaphoreWaitCommand(ringCommandStream,
|
||||
semaphoreGpuVa,
|
||||
value,
|
||||
COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD);
|
||||
}
|
||||
|
||||
if (miMemFenceRequired) {
|
||||
MemorySynchronizationCommands<GfxFamily>::addAdditionalSynchronizationForDirectSubmission(ringCommandStream, this->gpuVaForAdditionalSynchronizationWA, true, *hwInfo);
|
||||
@ -338,8 +491,9 @@ inline void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchSemaphoreSection(
|
||||
}
|
||||
|
||||
template <typename GfxFamily, typename Dispatcher>
|
||||
inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeSemaphoreSection() {
|
||||
size_t semaphoreSize = EncodeSempahore<GfxFamily>::getSizeMiSemaphoreWait();
|
||||
inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeSemaphoreSection(bool firstSubmission) {
|
||||
size_t semaphoreSize = (this->relaxedOrderingEnabled && !firstSubmission) ? RelaxedOrderingHelper::SchedulerSizeAndOffsetSection<GfxFamily>::totalSize
|
||||
: EncodeSempahore<GfxFamily>::getSizeMiSemaphoreWait();
|
||||
semaphoreSize += getSizePrefetchMitigation();
|
||||
|
||||
if (isDisablePrefetcherRequired) {
|
||||
@ -405,7 +559,7 @@ inline uint64_t DirectSubmissionHw<GfxFamily, Dispatcher>::getCommandBufferPosit
|
||||
|
||||
template <typename GfxFamily, typename Dispatcher>
|
||||
inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeDispatch() {
|
||||
size_t size = getSizeSemaphoreSection();
|
||||
size_t size = getSizeSemaphoreSection(false);
|
||||
if (workloadMode == 0) {
|
||||
size += getSizeStartSection();
|
||||
} else if (workloadMode == 1) {
|
||||
@ -475,10 +629,18 @@ void *DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchWorkloadSection(BatchBu
|
||||
this->useNotifyForPostSync, this->partitionedMode, this->dcFlushRequired);
|
||||
}
|
||||
|
||||
dispatchSemaphoreSection(currentQueueWorkCount + 1);
|
||||
dispatchSemaphoreSection(currentQueueWorkCount + 1, false);
|
||||
return currentPosition;
|
||||
}
|
||||
|
||||
template <typename GfxFamily, typename Dispatcher>
|
||||
void DirectSubmissionHw<GfxFamily, Dispatcher>::initRelaxedOrderingRegisters() {
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R1, 0, true);
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R1 + 4, 0, true);
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R5, 0, true);
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R5 + 4, 0, true);
|
||||
}
|
||||
|
||||
template <typename GfxFamily, typename Dispatcher>
|
||||
void DirectSubmissionHw<GfxFamily, Dispatcher>::preinitializeTaskStoreSection() {
|
||||
preinitializedTaskStoreSection = std::make_unique<uint8_t[]>(RelaxedOrderingHelper::getSizeTaskStoreSection<GfxFamily>());
|
||||
|
@ -21,5 +21,47 @@ constexpr size_t getSizeTaskStoreSection() {
|
||||
EncodeMiPredicate<GfxFamily>::getCmdSize());
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
constexpr size_t getSizeRegistersInit() {
|
||||
return (4 * sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM));
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
struct SchedulerSizeAndOffsetSection {
|
||||
using MI_MATH = typename GfxFamily::MI_MATH;
|
||||
using MI_MATH_ALU_INST_INLINE = typename GfxFamily::MI_MATH_ALU_INST_INLINE;
|
||||
using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM;
|
||||
using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START;
|
||||
|
||||
static constexpr uint64_t initSectionSize = EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataRegBatchBufferStart() + (6 * sizeof(MI_LOAD_REGISTER_IMM)) +
|
||||
EncodeMiPredicate<GfxFamily>::getCmdSize();
|
||||
|
||||
static constexpr uint64_t loopStartSectionStart = initSectionSize;
|
||||
static constexpr uint64_t loopStartSectionSize = (4 * sizeof(MI_LOAD_REGISTER_IMM)) + EncodeAluHelper<GfxFamily, 10>::getCmdsSize() + sizeof(MI_BATCH_BUFFER_START) +
|
||||
EncodeMiPredicate<GfxFamily>::getCmdSize();
|
||||
|
||||
static constexpr uint64_t removeTaskSectionStart = loopStartSectionStart + loopStartSectionSize;
|
||||
static constexpr uint64_t removeStartSectionSize = (2 * EncodeMathMMIO<GfxFamily>::getCmdSizeForIncrementOrDecrement()) + EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataRegBatchBufferStart() +
|
||||
(4 * sizeof(MI_LOAD_REGISTER_IMM)) + EncodeAluHelper<GfxFamily, 14>::getCmdsSize() + EncodeMiPredicate<GfxFamily>::getCmdSize();
|
||||
|
||||
static constexpr uint64_t tasksListLoopCheckSectionStart = removeTaskSectionStart + removeStartSectionSize;
|
||||
static constexpr uint64_t tasksListLoopCheckSectionSize = EncodeMathMMIO<GfxFamily>::getCmdSizeForIncrementOrDecrement() + EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalRegRegBatchBufferStart() +
|
||||
(2 * sizeof(MI_LOAD_REGISTER_IMM)) + EncodeMiPredicate<GfxFamily>::getCmdSize();
|
||||
|
||||
static constexpr uint64_t drainRequestSectionStart = tasksListLoopCheckSectionStart + tasksListLoopCheckSectionSize;
|
||||
static constexpr uint64_t drainRequestSectionSize = sizeof(typename GfxFamily::MI_ARB_CHECK) + EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataRegBatchBufferStart();
|
||||
|
||||
static constexpr uint64_t schedulerLoopCheckSectionStart = drainRequestSectionStart + drainRequestSectionSize;
|
||||
static constexpr uint64_t schedulerLoopCheckSectionSize = EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataMemBatchBufferStart() + sizeof(MI_BATCH_BUFFER_START);
|
||||
|
||||
static constexpr uint64_t semaphoreSectionStart = schedulerLoopCheckSectionStart + schedulerLoopCheckSectionSize;
|
||||
static constexpr uint64_t semaphoreSectionSize = EncodeSempahore<GfxFamily>::getSizeMiSemaphoreWait() + EncodeMiPredicate<GfxFamily>::getCmdSize();
|
||||
|
||||
static constexpr uint64_t endSectionStart = semaphoreSectionStart + semaphoreSectionSize;
|
||||
static constexpr uint64_t endSectionSize = sizeof(MI_LOAD_REGISTER_IMM) + EncodeMiPredicate<GfxFamily>::getCmdSize();
|
||||
|
||||
static constexpr uint64_t totalSize = endSectionStart + endSectionSize;
|
||||
};
|
||||
|
||||
} // namespace RelaxedOrderingHelper
|
||||
} // namespace NEO
|
@ -352,7 +352,7 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionStartWhenRingIsNotStartedAnd
|
||||
auto expectedRingBuffer = directSubmission.currentRingBuffer;
|
||||
GraphicsAllocation *oldRingBuffer = directSubmission.ringCommandStream.getGraphicsAllocation();
|
||||
|
||||
auto requiredSize = directSubmission.getSizeSemaphoreSection();
|
||||
auto requiredSize = directSubmission.getSizeSemaphoreSection(false);
|
||||
if (directSubmission.miMemFenceRequired) {
|
||||
requiredSize += directSubmission.getSizeSystemMemoryFenceAddress();
|
||||
}
|
||||
@ -441,8 +441,8 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWhenDispatchSemaphoreThenExp
|
||||
bool ret = directSubmission.initialize(false, false);
|
||||
EXPECT_TRUE(ret);
|
||||
|
||||
directSubmission.dispatchSemaphoreSection(1u);
|
||||
EXPECT_EQ(directSubmission.getSizeSemaphoreSection(), directSubmission.ringCommandStream.getUsed());
|
||||
directSubmission.dispatchSemaphoreSection(1u, false);
|
||||
EXPECT_EQ(directSubmission.getSizeSemaphoreSection(false), directSubmission.ringCommandStream.getUsed());
|
||||
}
|
||||
|
||||
HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWhenDispatchStartSectionThenExpectCorrectSizeUsed) {
|
||||
@ -509,7 +509,7 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWhenGetDispatchSizeThenExpec
|
||||
size_t expectedSize = directSubmission.getSizeStartSection() +
|
||||
Dispatcher::getSizeCacheFlush(*directSubmission.hwInfo) +
|
||||
Dispatcher::getSizeMonitorFence(*directSubmission.hwInfo) +
|
||||
directSubmission.getSizeSemaphoreSection();
|
||||
directSubmission.getSizeSemaphoreSection(false);
|
||||
size_t actualSize = directSubmission.getSizeDispatch();
|
||||
EXPECT_EQ(expectedSize, actualSize);
|
||||
}
|
||||
@ -525,7 +525,7 @@ HWTEST_F(DirectSubmissionTest,
|
||||
size_t expectedSize = Dispatcher::getSizeStoreDwordCommand() +
|
||||
Dispatcher::getSizeCacheFlush(*directSubmission.hwInfo) +
|
||||
Dispatcher::getSizeMonitorFence(*directSubmission.hwInfo) +
|
||||
directSubmission.getSizeSemaphoreSection();
|
||||
directSubmission.getSizeSemaphoreSection(false);
|
||||
size_t actualSize = directSubmission.getSizeDispatch();
|
||||
EXPECT_EQ(expectedSize, actualSize);
|
||||
}
|
||||
@ -540,7 +540,7 @@ HWTEST_F(DirectSubmissionTest,
|
||||
directSubmission.workloadMode = 2;
|
||||
size_t expectedSize = Dispatcher::getSizeCacheFlush(*directSubmission.hwInfo) +
|
||||
Dispatcher::getSizeMonitorFence(*directSubmission.hwInfo) +
|
||||
directSubmission.getSizeSemaphoreSection();
|
||||
directSubmission.getSizeSemaphoreSection(false);
|
||||
size_t actualSize = directSubmission.getSizeDispatch();
|
||||
EXPECT_EQ(expectedSize, actualSize);
|
||||
}
|
||||
@ -553,7 +553,7 @@ HWTEST_F(DirectSubmissionTest,
|
||||
directSubmission.disableCacheFlush = true;
|
||||
size_t expectedSize = directSubmission.getSizeStartSection() +
|
||||
Dispatcher::getSizeMonitorFence(*directSubmission.hwInfo) +
|
||||
directSubmission.getSizeSemaphoreSection();
|
||||
directSubmission.getSizeSemaphoreSection(false);
|
||||
size_t actualSize = directSubmission.getSizeDispatch();
|
||||
EXPECT_EQ(expectedSize, actualSize);
|
||||
}
|
||||
@ -567,7 +567,7 @@ HWTEST_F(DirectSubmissionTest,
|
||||
directSubmission.disableMonitorFence = true;
|
||||
size_t expectedSize = directSubmission.getSizeStartSection() +
|
||||
Dispatcher::getSizeCacheFlush(*directSubmission.hwInfo) +
|
||||
directSubmission.getSizeSemaphoreSection();
|
||||
directSubmission.getSizeSemaphoreSection(false);
|
||||
size_t actualSize = directSubmission.getSizeDispatch();
|
||||
EXPECT_EQ(expectedSize, actualSize);
|
||||
}
|
||||
@ -797,7 +797,7 @@ HWTEST_F(DirectSubmissionTest,
|
||||
EXPECT_EQ(0u, NEO::IoFunctions::mockVfptrinfCalled);
|
||||
EXPECT_EQ(0u, NEO::IoFunctions::mockFcloseCalled);
|
||||
size_t expectedSize = Dispatcher::getSizePreemption() +
|
||||
directSubmission.getSizeSemaphoreSection();
|
||||
directSubmission.getSizeSemaphoreSection(false);
|
||||
if (directSubmission.miMemFenceRequired) {
|
||||
expectedSize += directSubmission.getSizeSystemMemoryFenceAddress();
|
||||
}
|
||||
@ -870,7 +870,7 @@ HWTEST_F(DirectSubmissionTest,
|
||||
expectedSemaphoreValue += expectedExecCount;
|
||||
EXPECT_EQ(expectedExecCount, directSubmission.diagnostic->getExecutionsCount());
|
||||
size_t expectedSize = Dispatcher::getSizePreemption() +
|
||||
directSubmission.getSizeSemaphoreSection() +
|
||||
directSubmission.getSizeSemaphoreSection(false) +
|
||||
directSubmission.getDiagnosticModeSection();
|
||||
expectedSize += expectedExecCount * directSubmission.getSizeDispatch();
|
||||
|
||||
@ -967,8 +967,8 @@ HWTEST_F(DirectSubmissionTest,
|
||||
expectedSemaphoreValue += expectedExecCount;
|
||||
EXPECT_EQ(expectedExecCount, directSubmission.diagnostic->getExecutionsCount());
|
||||
size_t expectedSize = Dispatcher::getSizePreemption() +
|
||||
directSubmission.getSizeSemaphoreSection();
|
||||
size_t expectedDispatch = directSubmission.getSizeSemaphoreSection();
|
||||
directSubmission.getSizeSemaphoreSection(false);
|
||||
size_t expectedDispatch = directSubmission.getSizeSemaphoreSection(false);
|
||||
EXPECT_EQ(expectedDispatch, directSubmission.getSizeDispatch());
|
||||
expectedSize += expectedExecCount * expectedDispatch;
|
||||
|
||||
|
@ -5,11 +5,13 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_container/command_encoder.h"
|
||||
#include "shared/source/command_container/implicit_scaling.h"
|
||||
#include "shared/source/command_stream/submissions_aggregator.h"
|
||||
#include "shared/source/debug_settings/debug_settings_manager.h"
|
||||
#include "shared/source/direct_submission/direct_submission_hw.h"
|
||||
#include "shared/source/direct_submission/dispatchers/render_dispatcher.h"
|
||||
#include "shared/source/direct_submission/relaxed_ordering_helper.h"
|
||||
#include "shared/source/helpers/flush_stamp.h"
|
||||
#include "shared/source/helpers/register_offsets.h"
|
||||
#include "shared/source/utilities/cpuintrinsics.h"
|
||||
@ -199,7 +201,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DirectSubmissionDispatchBufferTest,
|
||||
EXPECT_EQ(1u, directSubmission.currentQueueWorkCount);
|
||||
EXPECT_EQ(1u, directSubmission.submitCount);
|
||||
size_t submitSize = RenderDispatcher<FamilyType>::getSizePreemption() +
|
||||
directSubmission.getSizeSemaphoreSection() +
|
||||
directSubmission.getSizeSemaphoreSection(false) +
|
||||
sizeof(MI_LOAD_REGISTER_IMM) +
|
||||
sizeof(MI_LOAD_REGISTER_MEM);
|
||||
if (directSubmission.miMemFenceRequired) {
|
||||
@ -444,7 +446,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
|
||||
EXPECT_EQ(1u, directSubmission.currentQueueWorkCount);
|
||||
EXPECT_EQ(1u, directSubmission.submitCount);
|
||||
size_t submitSize = RenderDispatcher<FamilyType>::getSizePreemption() +
|
||||
directSubmission.getSizeSemaphoreSection();
|
||||
directSubmission.getSizeSemaphoreSection(false);
|
||||
if (directSubmission.miMemFenceRequired) {
|
||||
submitSize += directSubmission.getSizeSystemMemoryFenceAddress();
|
||||
}
|
||||
@ -492,7 +494,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
|
||||
EXPECT_EQ(2u, directSubmission.semaphoreData->QueueWorkCount);
|
||||
EXPECT_EQ(3u, directSubmission.currentQueueWorkCount);
|
||||
EXPECT_EQ(1u, directSubmission.submitCount);
|
||||
size_t submitSize = directSubmission.getSizeSemaphoreSection();
|
||||
size_t submitSize = directSubmission.getSizeSemaphoreSection(false);
|
||||
if (directSubmission.miMemFenceRequired) {
|
||||
submitSize += directSubmission.getSizeSystemMemoryFenceAddress();
|
||||
}
|
||||
@ -521,7 +523,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
|
||||
EXPECT_EQ(1u, directSubmission.currentQueueWorkCount);
|
||||
EXPECT_EQ(1u, directSubmission.submitCount);
|
||||
size_t submitSize = RenderDispatcher<FamilyType>::getSizePreemption() +
|
||||
directSubmission.getSizeSemaphoreSection();
|
||||
directSubmission.getSizeSemaphoreSection(false);
|
||||
if (directSubmission.miMemFenceRequired) {
|
||||
submitSize += directSubmission.getSizeSystemMemoryFenceAddress();
|
||||
}
|
||||
@ -567,7 +569,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest,
|
||||
EXPECT_EQ(2u, directSubmission.semaphoreData->QueueWorkCount);
|
||||
EXPECT_EQ(3u, directSubmission.currentQueueWorkCount);
|
||||
EXPECT_EQ(1u, directSubmission.submitCount);
|
||||
size_t submitSize = directSubmission.getSizeSemaphoreSection();
|
||||
size_t submitSize = directSubmission.getSizeSemaphoreSection(false);
|
||||
if (directSubmission.miMemFenceRequired) {
|
||||
submitSize += directSubmission.getSizeSystemMemoryFenceAddress();
|
||||
}
|
||||
@ -631,7 +633,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DirectSubmissionDispatchBufferTest,
|
||||
EXPECT_NE(0x0u, directSubmission.ringCommandStream.getUsed());
|
||||
|
||||
size_t submitSize = RenderDispatcher<FamilyType>::getSizePreemption() +
|
||||
directSubmission.getSizeSemaphoreSection() +
|
||||
directSubmission.getSizeSemaphoreSection(false) +
|
||||
sizeof(MI_LOAD_REGISTER_IMM) +
|
||||
sizeof(MI_LOAD_REGISTER_MEM);
|
||||
if (directSubmission.miMemFenceRequired) {
|
||||
@ -910,9 +912,604 @@ struct DirectSubmissionRelaxedOrderingTests : public DirectSubmissionDispatchBuf
|
||||
DirectSubmissionDispatchBufferTest::SetUp();
|
||||
}
|
||||
|
||||
template <typename FamilyType>
|
||||
bool verifySchedulerProgramming(LinearStream &cs, uint64_t deferredTaskListVa, uint64_t semaphoreGpuVa, uint32_t semaphoreValue, size_t offset);
|
||||
|
||||
template <typename FamilyType>
|
||||
bool verifyMiPredicate(void *miPredicateCmd, MiPredicateType predicateType);
|
||||
|
||||
template <typename FamilyType>
|
||||
bool verifyAlu(typename FamilyType::MI_MATH_ALU_INST_INLINE *miAluCmd, AluRegisters opcode, AluRegisters operand1, AluRegisters operand2);
|
||||
|
||||
template <typename FamilyType>
|
||||
bool verifyLri(typename FamilyType::MI_LOAD_REGISTER_IMM *lriCmd, uint32_t registerOffset, uint32_t data);
|
||||
|
||||
template <typename FamilyType>
|
||||
bool verifyLrr(typename FamilyType::MI_LOAD_REGISTER_REG *lrrCmd, uint32_t dstOffset, uint32_t srcOffset);
|
||||
|
||||
template <typename FamilyType>
|
||||
bool verifyIncrementOrDecrement(void *cmds, AluRegisters aluRegister, bool increment);
|
||||
|
||||
template <typename FamilyType>
|
||||
bool verifyConditionalDataRegBbStart(void *cmd, uint64_t startAddress, uint32_t compareReg, uint32_t compareData, CompareOperation compareOperation, bool indirect);
|
||||
|
||||
template <typename FamilyType>
|
||||
bool verifyConditionalDataMemBbStart(void *cmd, uint64_t startAddress, uint64_t compareAddress, uint32_t compareData, CompareOperation compareOperation, bool indirect);
|
||||
|
||||
template <typename FamilyType>
|
||||
bool verifyConditionalRegRegBbStart(void *cmd, uint64_t startAddress, AluRegisters compareReg0, AluRegisters compareReg1, CompareOperation compareOperation, bool indirect);
|
||||
|
||||
template <typename FamilyType>
|
||||
bool verifyBaseConditionalBbStart(void *cmd, CompareOperation compareOperation, uint64_t startAddress, bool indirect, AluRegisters regA, AluRegisters regB);
|
||||
|
||||
template <typename FamilyType>
|
||||
bool verifyBbStart(typename FamilyType::MI_BATCH_BUFFER_START *cmd, uint64_t startAddress, bool indirect, bool predicate);
|
||||
|
||||
DebugManagerStateRestore restore;
|
||||
};
|
||||
|
||||
template <typename FamilyType>
|
||||
bool DirectSubmissionRelaxedOrderingTests::verifyMiPredicate(void *miPredicateCmd, MiPredicateType predicateType) {
|
||||
if constexpr (FamilyType::isUsingMiSetPredicate) {
|
||||
using MI_SET_PREDICATE = typename FamilyType::MI_SET_PREDICATE;
|
||||
using PREDICATE_ENABLE = typename MI_SET_PREDICATE::PREDICATE_ENABLE;
|
||||
|
||||
auto miSetPredicate = reinterpret_cast<MI_SET_PREDICATE *>(miPredicateCmd);
|
||||
if (static_cast<PREDICATE_ENABLE>(predicateType) != miSetPredicate->getPredicateEnable()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename FamilyType>
|
||||
bool DirectSubmissionRelaxedOrderingTests::verifyLri(typename FamilyType::MI_LOAD_REGISTER_IMM *lriCmd, uint32_t registerOffset, uint32_t data) {
|
||||
if ((lriCmd->getRegisterOffset() != registerOffset) || (lriCmd->getDataDword() != data)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename FamilyType>
|
||||
bool DirectSubmissionRelaxedOrderingTests::verifyLrr(typename FamilyType::MI_LOAD_REGISTER_REG *lrrCmd, uint32_t dstOffset, uint32_t srcOffset) {
|
||||
if ((dstOffset != lrrCmd->getDestinationRegisterAddress()) || (srcOffset != lrrCmd->getSourceRegisterAddress())) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename FamilyType>
|
||||
bool DirectSubmissionRelaxedOrderingTests::verifyIncrementOrDecrement(void *cmds, AluRegisters aluRegister, bool increment) {
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
using MI_MATH_ALU_INST_INLINE = typename FamilyType::MI_MATH_ALU_INST_INLINE;
|
||||
using MI_MATH = typename FamilyType::MI_MATH;
|
||||
|
||||
auto lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(cmds);
|
||||
if (!verifyLri<FamilyType>(lriCmd, CS_GPR_R7, 1)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
lriCmd++;
|
||||
if (!verifyLri<FamilyType>(lriCmd, CS_GPR_R7 + 4, 0)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto miMathCmd = reinterpret_cast<MI_MATH *>(++lriCmd);
|
||||
if (miMathCmd->DW0.BitField.DwordLength != 3) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto miAluCmd = reinterpret_cast<MI_MATH_ALU_INST_INLINE *>(++miMathCmd);
|
||||
if (!verifyAlu<FamilyType>(miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, aluRegister)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
miAluCmd++;
|
||||
if (!verifyAlu<FamilyType>(miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCB, AluRegisters::R_7)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
miAluCmd++;
|
||||
|
||||
if (increment && !verifyAlu<FamilyType>(miAluCmd, AluRegisters::OPCODE_ADD, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!increment && !verifyAlu<FamilyType>(miAluCmd, AluRegisters::OPCODE_SUB, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
miAluCmd++;
|
||||
if (!verifyAlu<FamilyType>(miAluCmd, AluRegisters::OPCODE_STORE, aluRegister, AluRegisters::R_ACCU)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename FamilyType>
|
||||
bool DirectSubmissionRelaxedOrderingTests::verifyAlu(typename FamilyType::MI_MATH_ALU_INST_INLINE *miAluCmd, AluRegisters opcode, AluRegisters operand1, AluRegisters operand2) {
|
||||
if ((static_cast<uint32_t>(opcode) != miAluCmd->DW0.BitField.ALUOpcode) ||
|
||||
(static_cast<uint32_t>(operand1) != miAluCmd->DW0.BitField.Operand1) ||
|
||||
(static_cast<uint32_t>(operand2) != miAluCmd->DW0.BitField.Operand2)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename FamilyType>
|
||||
bool DirectSubmissionRelaxedOrderingTests::verifyBbStart(typename FamilyType::MI_BATCH_BUFFER_START *bbStartCmd, uint64_t startAddress, bool indirect, bool predicate) {
|
||||
if constexpr (FamilyType::isUsingMiSetPredicate) {
|
||||
if ((predicate != !!bbStartCmd->getPredicationEnable()) ||
|
||||
(indirect != !!bbStartCmd->getIndirectAddressEnable())) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!indirect && startAddress != bbStartCmd->getBatchBufferStartAddress()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename FamilyType>
|
||||
bool DirectSubmissionRelaxedOrderingTests::verifyBaseConditionalBbStart(void *cmd, CompareOperation compareOperation, uint64_t startAddress, bool indirect, AluRegisters regA, AluRegisters regB) {
|
||||
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
|
||||
using MI_SET_PREDICATE = typename FamilyType::MI_SET_PREDICATE;
|
||||
using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG;
|
||||
using MI_MATH_ALU_INST_INLINE = typename FamilyType::MI_MATH_ALU_INST_INLINE;
|
||||
using MI_MATH = typename FamilyType::MI_MATH;
|
||||
|
||||
auto miMathCmd = reinterpret_cast<MI_MATH *>(cmd);
|
||||
if (miMathCmd->DW0.BitField.DwordLength != 3) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto miAluCmd = reinterpret_cast<MI_MATH_ALU_INST_INLINE *>(++miMathCmd);
|
||||
if (!verifyAlu<FamilyType>(miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, regA)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
miAluCmd++;
|
||||
if (!verifyAlu<FamilyType>(miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCB, regB)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
miAluCmd++;
|
||||
if (!verifyAlu<FamilyType>(miAluCmd, AluRegisters::OPCODE_SUB, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
miAluCmd++;
|
||||
|
||||
if (compareOperation == CompareOperation::Equal || compareOperation == CompareOperation::NotEqual) {
|
||||
if (!verifyAlu<FamilyType>(miAluCmd, AluRegisters::OPCODE_STORE, AluRegisters::R_7, AluRegisters::R_ZF)) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
if (!verifyAlu<FamilyType>(miAluCmd, AluRegisters::OPCODE_STORE, AluRegisters::R_7, AluRegisters::R_CF)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
auto lrrCmd = reinterpret_cast<MI_LOAD_REGISTER_REG *>(++miAluCmd);
|
||||
if (!verifyLrr<FamilyType>(lrrCmd, CS_PREDICATE_RESULT_2, CS_GPR_R7)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto predicateCmd = reinterpret_cast<MI_SET_PREDICATE *>(++lrrCmd);
|
||||
if (compareOperation == CompareOperation::Equal) {
|
||||
if (!verifyMiPredicate<FamilyType>(predicateCmd, MiPredicateType::NoopOnResult2Clear)) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
if (!verifyMiPredicate<FamilyType>(predicateCmd, MiPredicateType::NoopOnResult2Set)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
auto bbStartCmd = reinterpret_cast<MI_BATCH_BUFFER_START *>(++predicateCmd);
|
||||
if (!verifyBbStart<FamilyType>(bbStartCmd, startAddress, indirect, true)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
predicateCmd = reinterpret_cast<MI_SET_PREDICATE *>(++bbStartCmd);
|
||||
if (!verifyMiPredicate<FamilyType>(predicateCmd, MiPredicateType::Disable)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename FamilyType>
|
||||
bool DirectSubmissionRelaxedOrderingTests::verifyConditionalRegRegBbStart(void *cmd, uint64_t startAddress, AluRegisters compareReg0, AluRegisters compareReg1, CompareOperation compareOperation, bool indirect) {
|
||||
return verifyBaseConditionalBbStart<FamilyType>(cmd, compareOperation, startAddress, indirect, compareReg0, compareReg1);
|
||||
}
|
||||
|
||||
template <typename FamilyType>
|
||||
bool DirectSubmissionRelaxedOrderingTests::verifyConditionalDataMemBbStart(void *cmd, uint64_t startAddress, uint64_t compareAddress, uint32_t compareData, CompareOperation compareOperation, bool indirect) {
|
||||
using MI_LOAD_REGISTER_MEM = typename FamilyType::MI_LOAD_REGISTER_MEM;
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
|
||||
auto lrmCmd = reinterpret_cast<MI_LOAD_REGISTER_MEM *>(cmd);
|
||||
if ((lrmCmd->getRegisterAddress() != CS_GPR_R7) || (lrmCmd->getMemoryAddress() != compareAddress)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(++lrmCmd);
|
||||
if (!verifyLri<FamilyType>(lriCmd, CS_GPR_R7 + 4, 0)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!verifyLri<FamilyType>(++lriCmd, CS_GPR_R8, compareData)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!verifyLri<FamilyType>(++lriCmd, CS_GPR_R8 + 4, 0)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return verifyBaseConditionalBbStart<FamilyType>(++lriCmd, compareOperation, startAddress, indirect, AluRegisters::R_7, AluRegisters::R_8);
|
||||
}
|
||||
|
||||
template <typename FamilyType>
|
||||
bool DirectSubmissionRelaxedOrderingTests::verifyConditionalDataRegBbStart(void *cmds, uint64_t startAddress, uint32_t compareReg, uint32_t compareData,
|
||||
CompareOperation compareOperation, bool indirect) {
|
||||
using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG;
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
|
||||
auto lrrCmd = reinterpret_cast<MI_LOAD_REGISTER_REG *>(cmds);
|
||||
if (!verifyLrr<FamilyType>(lrrCmd, CS_GPR_R7, compareReg)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(++lrrCmd);
|
||||
if (!verifyLri<FamilyType>(lriCmd, CS_GPR_R7 + 4, 0)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
lriCmd++;
|
||||
if (!verifyLri<FamilyType>(lriCmd, CS_GPR_R8, compareData)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
lriCmd++;
|
||||
if (!verifyLri<FamilyType>(lriCmd, CS_GPR_R8 + 4, 0)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return verifyBaseConditionalBbStart<FamilyType>(++lriCmd, compareOperation, startAddress, indirect, AluRegisters::R_7, AluRegisters::R_8);
|
||||
}
|
||||
|
||||
template <typename FamilyType>
|
||||
bool DirectSubmissionRelaxedOrderingTests::verifySchedulerProgramming(LinearStream &cs, uint64_t deferredTaskListVa, uint64_t semaphoreGpuVa, uint32_t semaphoreValue, size_t offset) {
|
||||
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
using MI_SET_PREDICATE = typename FamilyType::MI_SET_PREDICATE;
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
using MI_MATH_ALU_INST_INLINE = typename FamilyType::MI_MATH_ALU_INST_INLINE;
|
||||
using MI_MATH = typename FamilyType::MI_MATH;
|
||||
using MI_ARB_CHECK = typename FamilyType::MI_ARB_CHECK;
|
||||
|
||||
HardwareParse hwParse;
|
||||
hwParse.parseCommands<FamilyType>(cs, offset);
|
||||
hwParse.findHardwareCommands<FamilyType>();
|
||||
|
||||
bool success = false;
|
||||
|
||||
for (auto &it : hwParse.cmdList) {
|
||||
if (auto miPredicate = genCmdCast<MI_SET_PREDICATE *>(it)) {
|
||||
// 1. Init section
|
||||
if (!verifyMiPredicate<FamilyType>(miPredicate, MiPredicateType::Disable)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
uint64_t schedulerStartAddress = cs.getGraphicsAllocation()->getGpuAddress() + ptrDiff(miPredicate, cs.getCpuBase());
|
||||
|
||||
miPredicate++;
|
||||
if (!verifyConditionalDataRegBbStart<FamilyType>(miPredicate, schedulerStartAddress + RelaxedOrderingHelper::SchedulerSizeAndOffsetSection<FamilyType>::semaphoreSectionStart,
|
||||
CS_GPR_R1, 0, CompareOperation::Equal, false)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(ptrOffset(miPredicate, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart()));
|
||||
if (!verifyLri<FamilyType>(lriCmd, CS_GPR_R2, 0)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyLri<FamilyType>(++lriCmd, CS_GPR_R2 + 4, 0)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
uint64_t removeTaskVa = schedulerStartAddress + RelaxedOrderingHelper::SchedulerSizeAndOffsetSection<FamilyType>::removeTaskSectionStart;
|
||||
|
||||
if (!verifyLri<FamilyType>(++lriCmd, CS_GPR_R3, static_cast<uint32_t>(removeTaskVa & 0xFFFF'FFFFULL))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyLri<FamilyType>(++lriCmd, CS_GPR_R3 + 4, static_cast<uint32_t>(removeTaskVa >> 32))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
uint64_t walkersLoopConditionCheckVa = schedulerStartAddress + RelaxedOrderingHelper::SchedulerSizeAndOffsetSection<FamilyType>::tasksListLoopCheckSectionStart;
|
||||
|
||||
if (!verifyLri<FamilyType>(++lriCmd, CS_GPR_R4, static_cast<uint32_t>(walkersLoopConditionCheckVa & 0xFFFF'FFFFULL))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyLri<FamilyType>(++lriCmd, CS_GPR_R4 + 4, static_cast<uint32_t>(walkersLoopConditionCheckVa >> 32))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// 2. Dispatch task section (loop start)
|
||||
miPredicate = reinterpret_cast<MI_SET_PREDICATE *>(++lriCmd);
|
||||
|
||||
if (!verifyMiPredicate<FamilyType>(miPredicate, MiPredicateType::Disable)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(++miPredicate);
|
||||
if (!verifyLri<FamilyType>(lriCmd, CS_GPR_R6, 8)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyLri<FamilyType>(++lriCmd, CS_GPR_R6 + 4, 0)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyLri<FamilyType>(++lriCmd, CS_GPR_R8, static_cast<uint32_t>(deferredTaskListVa & 0xFFFF'FFFFULL))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyLri<FamilyType>(++lriCmd, CS_GPR_R8 + 4, static_cast<uint32_t>(deferredTaskListVa >> 32))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto miMathCmd = reinterpret_cast<MI_MATH *>(++lriCmd);
|
||||
if (miMathCmd->DW0.BitField.DwordLength != 9) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto miAluCmd = reinterpret_cast<MI_MATH_ALU_INST_INLINE *>(++miMathCmd);
|
||||
if (!verifyAlu<FamilyType>(miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_2)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCB, AluRegisters::R_6)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_SHL, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_STORE, AluRegisters::R_7, AluRegisters::R_ACCU)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_7)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCB, AluRegisters::R_8)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_ADD, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_STORE, AluRegisters::R_6, AluRegisters::R_ACCU)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_LOADIND, AluRegisters::R_0, AluRegisters::R_ACCU)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_FENCE_RD, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto bbStart = reinterpret_cast<MI_BATCH_BUFFER_START *>(++miAluCmd);
|
||||
if (!verifyBbStart<FamilyType>(bbStart, 0, true, false)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// 3. Remove task section
|
||||
miPredicate = reinterpret_cast<MI_SET_PREDICATE *>(++bbStart);
|
||||
if (!verifyMiPredicate<FamilyType>(miPredicate, MiPredicateType::Disable)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
miPredicate++;
|
||||
if (!verifyIncrementOrDecrement<FamilyType>(miPredicate, AluRegisters::R_1, false)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto cmds = ptrOffset(miPredicate, EncodeMathMMIO<FamilyType>::getCmdSizeForIncrementOrDecrement());
|
||||
|
||||
if (!verifyIncrementOrDecrement<FamilyType>(cmds, AluRegisters::R_2, false)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
cmds = ptrOffset(cmds, EncodeMathMMIO<FamilyType>::getCmdSizeForIncrementOrDecrement());
|
||||
|
||||
if (!verifyConditionalDataRegBbStart<FamilyType>(cmds, schedulerStartAddress + RelaxedOrderingHelper::SchedulerSizeAndOffsetSection<FamilyType>::semaphoreSectionStart,
|
||||
CS_GPR_R1, 0, CompareOperation::Equal, false)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(ptrOffset(cmds, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart()));
|
||||
if (!verifyLri<FamilyType>(lriCmd, CS_GPR_R7, 8)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyLri<FamilyType>(++lriCmd, CS_GPR_R7 + 4, 0)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyLri<FamilyType>(++lriCmd, CS_GPR_R8, static_cast<uint32_t>(deferredTaskListVa & 0xFFFF'FFFFULL))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyLri<FamilyType>(++lriCmd, CS_GPR_R8 + 4, static_cast<uint32_t>(deferredTaskListVa >> 32))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
miMathCmd = reinterpret_cast<MI_MATH *>(++lriCmd);
|
||||
if (miMathCmd->DW0.BitField.DwordLength != 13) {
|
||||
continue;
|
||||
}
|
||||
|
||||
miAluCmd = reinterpret_cast<MI_MATH_ALU_INST_INLINE *>(++miMathCmd);
|
||||
if (!verifyAlu<FamilyType>(miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_1)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCB, AluRegisters::R_7)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_SHL, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_STORE, AluRegisters::R_7, AluRegisters::R_ACCU)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_7)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCB, AluRegisters::R_8)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_ADD, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_LOADIND, AluRegisters::R_7, AluRegisters::R_ACCU)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_FENCE_RD, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_6)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_LOAD0, AluRegisters::R_SRCB, AluRegisters::OPCODE_NONE)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_ADD, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_STOREIND, AluRegisters::R_ACCU, AluRegisters::R_7)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_FENCE_WR, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// 4. List loop check section
|
||||
|
||||
miPredicate = reinterpret_cast<MI_SET_PREDICATE *>(++miAluCmd);
|
||||
if (!verifyMiPredicate<FamilyType>(miPredicate, MiPredicateType::Disable)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
miPredicate++;
|
||||
if (!verifyIncrementOrDecrement<FamilyType>(miPredicate, AluRegisters::R_2, true)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
cmds = ptrOffset(miPredicate, EncodeMathMMIO<FamilyType>::getCmdSizeForIncrementOrDecrement());
|
||||
|
||||
if (!verifyConditionalRegRegBbStart<FamilyType>(cmds, schedulerStartAddress + RelaxedOrderingHelper::SchedulerSizeAndOffsetSection<FamilyType>::loopStartSectionStart,
|
||||
AluRegisters::R_1, AluRegisters::R_2, CompareOperation::NotEqual, false)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(ptrOffset(cmds, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalRegRegBatchBufferStart()));
|
||||
|
||||
if (!verifyLri<FamilyType>(lriCmd, CS_GPR_R2, 0)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyLri<FamilyType>(++lriCmd, CS_GPR_R2 + 4, 0)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// 5. Drain request section
|
||||
auto arbCheck = reinterpret_cast<MI_ARB_CHECK *>(++lriCmd);
|
||||
if (memcmp(arbCheck, &FamilyType::cmdInitArbCheck, sizeof(MI_ARB_CHECK)) != 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!verifyConditionalDataRegBbStart<FamilyType>(++arbCheck, schedulerStartAddress + RelaxedOrderingHelper::SchedulerSizeAndOffsetSection<FamilyType>::loopStartSectionStart,
|
||||
CS_GPR_R5, 1, CompareOperation::Equal, false)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// 6. Scheduler loop check section
|
||||
auto cmds2 = ptrOffset(arbCheck, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart());
|
||||
|
||||
if (!verifyConditionalDataMemBbStart<FamilyType>(cmds2, schedulerStartAddress + RelaxedOrderingHelper::SchedulerSizeAndOffsetSection<FamilyType>::endSectionStart,
|
||||
semaphoreGpuVa, semaphoreValue, CompareOperation::GreaterOrEqual, false)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
bbStart = reinterpret_cast<MI_BATCH_BUFFER_START *>(ptrOffset(cmds2, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart()));
|
||||
if (!verifyBbStart<FamilyType>(bbStart, schedulerStartAddress + RelaxedOrderingHelper::SchedulerSizeAndOffsetSection<FamilyType>::loopStartSectionStart, false, false)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// 7. Semaphore section
|
||||
miPredicate = reinterpret_cast<MI_SET_PREDICATE *>(++bbStart);
|
||||
if (!verifyMiPredicate<FamilyType>(miPredicate, MiPredicateType::Disable)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto semaphore = reinterpret_cast<MI_SEMAPHORE_WAIT *>(++miPredicate);
|
||||
if ((semaphore->getSemaphoreGraphicsAddress() != semaphoreGpuVa) ||
|
||||
(semaphore->getSemaphoreDataDword() != semaphoreValue) ||
|
||||
(semaphore->getCompareOperation() != MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// 8. End section
|
||||
|
||||
miPredicate = reinterpret_cast<MI_SET_PREDICATE *>(++semaphore);
|
||||
if (!verifyMiPredicate<FamilyType>(miPredicate, MiPredicateType::Disable)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(++miPredicate);
|
||||
if (!verifyLri<FamilyType>(lriCmd, CS_GPR_R5, 0)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
success = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return success;
|
||||
}
|
||||
|
||||
HWTEST_F(DirectSubmissionRelaxedOrderingTests, whenAllocatingResourcesThenCreateDeferredTasksAllocation) {
|
||||
using Dispatcher = RenderDispatcher<FamilyType>;
|
||||
|
||||
@ -931,12 +1528,50 @@ HWTEST_F(DirectSubmissionRelaxedOrderingTests, whenAllocatingResourcesThenCreate
|
||||
EXPECT_EQ(directSubmission.deferredTasksListAllocation, mockMemoryOperations->gfxAllocationsForMakeResident.back());
|
||||
}
|
||||
|
||||
HWTEST_F(DirectSubmissionRelaxedOrderingTests, whenInitializingThenPreinitializeTaskStoreSection) {
|
||||
HWTEST_F(DirectSubmissionRelaxedOrderingTests, whenInitializingThenPreinitializeTaskStoreSectionAndInitRegs) {
|
||||
using Dispatcher = RenderDispatcher<FamilyType>;
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
|
||||
auto verifyInitRegisters = [&](LinearStream &cs, size_t offset) {
|
||||
HardwareParse hwParse;
|
||||
hwParse.parseCommands<FamilyType>(cs, offset);
|
||||
hwParse.findHardwareCommands<FamilyType>();
|
||||
|
||||
bool success = false;
|
||||
|
||||
for (auto &it : hwParse.cmdList) {
|
||||
if (auto lriCmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(it)) {
|
||||
if (CS_GPR_R1 == lriCmd->getRegisterOffset()) {
|
||||
EXPECT_EQ(0u, lriCmd->getDataDword());
|
||||
|
||||
lriCmd++;
|
||||
if (!verifyLri<FamilyType>(lriCmd, CS_GPR_R1 + 4, 0)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
lriCmd++;
|
||||
if (!verifyLri<FamilyType>(lriCmd, CS_GPR_R5, 0)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
lriCmd++;
|
||||
if (!verifyLri<FamilyType>(lriCmd, CS_GPR_R5 + 4, 0)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
success = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return success;
|
||||
};
|
||||
|
||||
{
|
||||
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
directSubmission.initialize(false, false);
|
||||
EXPECT_FALSE(verifyInitRegisters(directSubmission.ringCommandStream, 0));
|
||||
|
||||
EXPECT_EQ(0u, directSubmission.preinitializeTaskStoreSectionCalled);
|
||||
EXPECT_FALSE(directSubmission.relaxedOrderingInitialized);
|
||||
@ -946,12 +1581,16 @@ HWTEST_F(DirectSubmissionRelaxedOrderingTests, whenInitializingThenPreinitialize
|
||||
{
|
||||
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
directSubmission.initialize(true, false);
|
||||
EXPECT_TRUE(verifyInitRegisters(directSubmission.ringCommandStream, 0));
|
||||
|
||||
EXPECT_EQ(1u, directSubmission.preinitializeTaskStoreSectionCalled);
|
||||
EXPECT_TRUE(directSubmission.relaxedOrderingInitialized);
|
||||
EXPECT_NE(nullptr, directSubmission.preinitializedTaskStoreSection.get());
|
||||
|
||||
size_t offset = directSubmission.ringCommandStream.getUsed();
|
||||
|
||||
directSubmission.startRingBuffer();
|
||||
EXPECT_FALSE(verifyInitRegisters(directSubmission.ringCommandStream, offset));
|
||||
|
||||
EXPECT_EQ(1u, directSubmission.preinitializeTaskStoreSectionCalled);
|
||||
}
|
||||
@ -967,7 +1606,9 @@ HWTEST_F(DirectSubmissionRelaxedOrderingTests, whenInitializingThenPreinitialize
|
||||
EXPECT_TRUE(directSubmission.relaxedOrderingInitialized);
|
||||
EXPECT_NE(nullptr, directSubmission.preinitializedTaskStoreSection.get());
|
||||
|
||||
size_t offset = directSubmission.ringCommandStream.getUsed();
|
||||
directSubmission.startRingBuffer();
|
||||
EXPECT_FALSE(verifyInitRegisters(directSubmission.ringCommandStream, offset));
|
||||
EXPECT_EQ(1u, directSubmission.preinitializeTaskStoreSectionCalled);
|
||||
}
|
||||
}
|
||||
@ -989,121 +1630,50 @@ HWTEST_F(DirectSubmissionRelaxedOrderingTests, whenDispatchingWorkThenDispatchTa
|
||||
auto taskStoreSection = ptrOffset(directSubmission.ringCommandStream.getCpuBase(), offset);
|
||||
|
||||
if constexpr (FamilyType::isUsingMiSetPredicate) {
|
||||
using MI_SET_PREDICATE = typename FamilyType::MI_SET_PREDICATE;
|
||||
using PREDICATE_ENABLE = typename MI_SET_PREDICATE::PREDICATE_ENABLE;
|
||||
EXPECT_TRUE(verifyMiPredicate<FamilyType>(taskStoreSection, MiPredicateType::Disable));
|
||||
|
||||
auto miSetPredicate = reinterpret_cast<MI_SET_PREDICATE *>(taskStoreSection);
|
||||
EXPECT_EQ(PREDICATE_ENABLE::PREDICATE_ENABLE_PREDICATE_DISABLE, miSetPredicate->getPredicateEnable());
|
||||
|
||||
taskStoreSection = ptrOffset(taskStoreSection, sizeof(MI_SET_PREDICATE));
|
||||
taskStoreSection = ptrOffset(taskStoreSection, sizeof(typename FamilyType::MI_SET_PREDICATE));
|
||||
}
|
||||
|
||||
uint64_t deferredTasksVa = directSubmission.deferredTasksListAllocation->getGpuAddress();
|
||||
|
||||
auto lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(taskStoreSection);
|
||||
|
||||
EXPECT_EQ(CS_GPR_R6, lriCmd->getRegisterOffset());
|
||||
EXPECT_EQ(static_cast<uint32_t>(deferredTasksVa & 0xFFFF'FFFFULL), lriCmd->getDataDword());
|
||||
EXPECT_TRUE(verifyLri<FamilyType>(lriCmd, CS_GPR_R6, static_cast<uint32_t>(deferredTasksVa & 0xFFFF'FFFFULL)));
|
||||
|
||||
lriCmd++;
|
||||
EXPECT_EQ(CS_GPR_R6 + 4, lriCmd->getRegisterOffset());
|
||||
EXPECT_EQ(static_cast<uint32_t>(deferredTasksVa >> 32), lriCmd->getDataDword());
|
||||
EXPECT_TRUE(verifyLri<FamilyType>(++lriCmd, CS_GPR_R6 + 4, static_cast<uint32_t>(deferredTasksVa >> 32)));
|
||||
|
||||
lriCmd++;
|
||||
EXPECT_EQ(CS_GPR_R7, lriCmd->getRegisterOffset());
|
||||
EXPECT_EQ(0u, lriCmd->getDataDword());
|
||||
EXPECT_TRUE(verifyLri<FamilyType>(++lriCmd, CS_GPR_R7, 0));
|
||||
|
||||
lriCmd++;
|
||||
EXPECT_EQ(CS_GPR_R7 + 4, lriCmd->getRegisterOffset());
|
||||
EXPECT_EQ(0u, lriCmd->getDataDword());
|
||||
EXPECT_TRUE(verifyLri<FamilyType>(++lriCmd, CS_GPR_R7 + 4, 0));
|
||||
|
||||
lriCmd++;
|
||||
EXPECT_EQ(CS_GPR_R8, lriCmd->getRegisterOffset());
|
||||
EXPECT_EQ(8u, lriCmd->getDataDword());
|
||||
EXPECT_TRUE(verifyLri<FamilyType>(++lriCmd, CS_GPR_R8, 8));
|
||||
|
||||
lriCmd++;
|
||||
EXPECT_EQ(CS_GPR_R8 + 4, lriCmd->getRegisterOffset());
|
||||
EXPECT_EQ(0u, lriCmd->getDataDword());
|
||||
EXPECT_TRUE(verifyLri<FamilyType>(++lriCmd, CS_GPR_R8 + 4, 0));
|
||||
|
||||
auto miMathCmd = reinterpret_cast<MI_MATH *>(++lriCmd);
|
||||
EXPECT_EQ(8u, miMathCmd->DW0.BitField.DwordLength);
|
||||
|
||||
auto miAluCmd = reinterpret_cast<MI_MATH_ALU_INST_INLINE *>(++miMathCmd);
|
||||
EXPECT_EQ(static_cast<uint32_t>(AluRegisters::OPCODE_LOAD), miAluCmd->DW0.BitField.ALUOpcode);
|
||||
EXPECT_EQ(static_cast<uint32_t>(AluRegisters::R_SRCA), miAluCmd->DW0.BitField.Operand1);
|
||||
EXPECT_EQ(static_cast<uint32_t>(AluRegisters::R_1), miAluCmd->DW0.BitField.Operand2);
|
||||
EXPECT_TRUE(verifyAlu<FamilyType>(miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_1));
|
||||
|
||||
miAluCmd++;
|
||||
EXPECT_EQ(static_cast<uint32_t>(AluRegisters::OPCODE_LOAD), miAluCmd->DW0.BitField.ALUOpcode);
|
||||
EXPECT_EQ(static_cast<uint32_t>(AluRegisters::R_SRCB), miAluCmd->DW0.BitField.Operand1);
|
||||
EXPECT_EQ(static_cast<uint32_t>(AluRegisters::R_8), miAluCmd->DW0.BitField.Operand2);
|
||||
EXPECT_TRUE(verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCB, AluRegisters::R_8));
|
||||
|
||||
miAluCmd++;
|
||||
EXPECT_EQ(static_cast<uint32_t>(AluRegisters::OPCODE_SHL), miAluCmd->DW0.BitField.ALUOpcode);
|
||||
EXPECT_EQ(0u, miAluCmd->DW0.BitField.Operand1);
|
||||
EXPECT_EQ(0u, miAluCmd->DW0.BitField.Operand2);
|
||||
EXPECT_TRUE(verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_SHL, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE));
|
||||
|
||||
miAluCmd++;
|
||||
EXPECT_EQ(static_cast<uint32_t>(AluRegisters::OPCODE_STORE), miAluCmd->DW0.BitField.ALUOpcode);
|
||||
EXPECT_EQ(static_cast<uint32_t>(AluRegisters::R_8), miAluCmd->DW0.BitField.Operand1);
|
||||
EXPECT_EQ(static_cast<uint32_t>(AluRegisters::R_ACCU), miAluCmd->DW0.BitField.Operand2);
|
||||
EXPECT_TRUE(verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_STORE, AluRegisters::R_8, AluRegisters::R_ACCU));
|
||||
|
||||
miAluCmd++;
|
||||
EXPECT_EQ(static_cast<uint32_t>(AluRegisters::OPCODE_LOAD), miAluCmd->DW0.BitField.ALUOpcode);
|
||||
EXPECT_EQ(static_cast<uint32_t>(AluRegisters::R_SRCA), miAluCmd->DW0.BitField.Operand1);
|
||||
EXPECT_EQ(static_cast<uint32_t>(AluRegisters::R_8), miAluCmd->DW0.BitField.Operand2);
|
||||
EXPECT_TRUE(verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_8));
|
||||
|
||||
miAluCmd++;
|
||||
EXPECT_EQ(static_cast<uint32_t>(AluRegisters::OPCODE_LOAD), miAluCmd->DW0.BitField.ALUOpcode);
|
||||
EXPECT_EQ(static_cast<uint32_t>(AluRegisters::R_SRCB), miAluCmd->DW0.BitField.Operand1);
|
||||
EXPECT_EQ(static_cast<uint32_t>(AluRegisters::R_6), miAluCmd->DW0.BitField.Operand2);
|
||||
EXPECT_TRUE(verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCB, AluRegisters::R_6));
|
||||
|
||||
miAluCmd++;
|
||||
EXPECT_EQ(static_cast<uint32_t>(AluRegisters::OPCODE_ADD), miAluCmd->DW0.BitField.ALUOpcode);
|
||||
EXPECT_EQ(0u, miAluCmd->DW0.BitField.Operand1);
|
||||
EXPECT_EQ(0u, miAluCmd->DW0.BitField.Operand2);
|
||||
EXPECT_TRUE(verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_ADD, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE));
|
||||
|
||||
miAluCmd++;
|
||||
EXPECT_EQ(static_cast<uint32_t>(AluRegisters::OPCODE_STOREIND), miAluCmd->DW0.BitField.ALUOpcode);
|
||||
EXPECT_EQ(static_cast<uint32_t>(AluRegisters::R_ACCU), miAluCmd->DW0.BitField.Operand1);
|
||||
EXPECT_EQ(static_cast<uint32_t>(AluRegisters::R_7), miAluCmd->DW0.BitField.Operand2);
|
||||
EXPECT_TRUE(verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_STOREIND, AluRegisters::R_ACCU, AluRegisters::R_7));
|
||||
|
||||
miAluCmd++;
|
||||
EXPECT_EQ(static_cast<uint32_t>(AluRegisters::OPCODE_FENCE_WR), miAluCmd->DW0.BitField.ALUOpcode);
|
||||
EXPECT_EQ(0u, miAluCmd->DW0.BitField.Operand1);
|
||||
EXPECT_EQ(0u, miAluCmd->DW0.BitField.Operand2);
|
||||
EXPECT_TRUE(verifyAlu<FamilyType>(++miAluCmd, AluRegisters::OPCODE_FENCE_WR, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE));
|
||||
|
||||
// increment
|
||||
lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(++miAluCmd);
|
||||
EXPECT_EQ(lriCmd->getRegisterOffset(), CS_GPR_R7);
|
||||
EXPECT_EQ(lriCmd->getDataDword(), 1u);
|
||||
|
||||
lriCmd++;
|
||||
EXPECT_EQ(CS_GPR_R7 + 4, lriCmd->getRegisterOffset());
|
||||
EXPECT_EQ(0u, lriCmd->getDataDword());
|
||||
|
||||
miMathCmd = reinterpret_cast<MI_MATH *>(++lriCmd);
|
||||
EXPECT_EQ(3u, miMathCmd->DW0.BitField.DwordLength);
|
||||
|
||||
miAluCmd = reinterpret_cast<MI_MATH_ALU_INST_INLINE *>(++miMathCmd);
|
||||
EXPECT_EQ(static_cast<uint32_t>(AluRegisters::OPCODE_LOAD), miAluCmd->DW0.BitField.ALUOpcode);
|
||||
EXPECT_EQ(static_cast<uint32_t>(AluRegisters::R_SRCA), miAluCmd->DW0.BitField.Operand1);
|
||||
EXPECT_EQ(static_cast<uint32_t>(AluRegisters::R_1), miAluCmd->DW0.BitField.Operand2);
|
||||
|
||||
miAluCmd++;
|
||||
EXPECT_EQ(static_cast<uint32_t>(AluRegisters::OPCODE_LOAD), miAluCmd->DW0.BitField.ALUOpcode);
|
||||
EXPECT_EQ(static_cast<uint32_t>(AluRegisters::R_SRCB), miAluCmd->DW0.BitField.Operand1);
|
||||
EXPECT_EQ(static_cast<uint32_t>(AluRegisters::R_7), miAluCmd->DW0.BitField.Operand2);
|
||||
|
||||
miAluCmd++;
|
||||
EXPECT_EQ(static_cast<uint32_t>(AluRegisters::OPCODE_ADD), miAluCmd->DW0.BitField.ALUOpcode);
|
||||
EXPECT_EQ(0u, miAluCmd->DW0.BitField.Operand1);
|
||||
EXPECT_EQ(0u, miAluCmd->DW0.BitField.Operand2);
|
||||
|
||||
miAluCmd++;
|
||||
EXPECT_EQ(static_cast<uint32_t>(AluRegisters::OPCODE_STORE), miAluCmd->DW0.BitField.ALUOpcode);
|
||||
EXPECT_EQ(static_cast<uint32_t>(AluRegisters::R_1), miAluCmd->DW0.BitField.Operand1);
|
||||
EXPECT_EQ(static_cast<uint32_t>(AluRegisters::R_ACCU), miAluCmd->DW0.BitField.Operand2);
|
||||
EXPECT_TRUE(verifyIncrementOrDecrement<FamilyType>(++miAluCmd, AluRegisters::R_1, true));
|
||||
}
|
||||
|
||||
HWTEST_F(DirectSubmissionRelaxedOrderingTests, givenNotEnoughSpaceForTaskStoreSectionWhenDispatchingThenSwitchRingBuffers) {
|
||||
@ -1125,4 +1695,32 @@ HWTEST_F(DirectSubmissionRelaxedOrderingTests, givenNotEnoughSpaceForTaskStoreSe
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
|
||||
EXPECT_NE(oldAllocation, directSubmission.ringCommandStream.getGraphicsAllocation());
|
||||
}
|
||||
|
||||
HWTEST2_F(DirectSubmissionRelaxedOrderingTests, whenDispatchingWorkThenDispatchScheduler, IsAtLeastXeHpcCore) {
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
using MI_MATH_ALU_INST_INLINE = typename FamilyType::MI_MATH_ALU_INST_INLINE;
|
||||
using MI_MATH = typename FamilyType::MI_MATH;
|
||||
using Dispatcher = RenderDispatcher<FamilyType>;
|
||||
|
||||
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
|
||||
directSubmission.initialize(true, false);
|
||||
auto offset = directSubmission.ringCommandStream.getUsed();
|
||||
|
||||
uint64_t deferredTasksListVa = directSubmission.deferredTasksListAllocation->getGpuAddress();
|
||||
uint64_t semaphoreGpuVa = directSubmission.semaphoreGpuVa;
|
||||
|
||||
EXPECT_FALSE(verifySchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, deferredTasksListVa, semaphoreGpuVa, directSubmission.currentQueueWorkCount, 0));
|
||||
|
||||
FlushStampTracker flushStamp(true);
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
|
||||
EXPECT_TRUE(verifySchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, deferredTasksListVa, semaphoreGpuVa, directSubmission.currentQueueWorkCount, offset));
|
||||
|
||||
offset = directSubmission.ringCommandStream.getUsed();
|
||||
|
||||
directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
|
||||
EXPECT_TRUE(verifySchedulerProgramming<FamilyType>(directSubmission.ringCommandStream, deferredTasksListVa, semaphoreGpuVa, directSubmission.currentQueueWorkCount, offset));
|
||||
}
|
@ -1161,7 +1161,7 @@ HWTEST_TEMPLATED_F(WddmCommandStreamMockGdiTest, givenDirectSubmissionEnabledOnR
|
||||
auto directSubmission = reinterpret_cast<MockSubmission *>(mockCsr->directSubmission.get());
|
||||
EXPECT_TRUE(directSubmission->ringStart);
|
||||
size_t actualDispatchSize = directSubmission->ringCommandStream.getUsed();
|
||||
size_t expectedSize = directSubmission->getSizeSemaphoreSection() +
|
||||
size_t expectedSize = directSubmission->getSizeSemaphoreSection(false) +
|
||||
Dispatcher::getSizePreemption() +
|
||||
directSubmission->getSizeDispatch();
|
||||
|
||||
@ -1202,7 +1202,7 @@ HWTEST_TEMPLATED_F(WddmCommandStreamMockGdiTest, givenDirectSubmissionEnabledOnB
|
||||
auto directSubmission = reinterpret_cast<MockSubmission *>(mockCsr->blitterDirectSubmission.get());
|
||||
EXPECT_TRUE(directSubmission->ringStart);
|
||||
size_t actualDispatchSize = directSubmission->ringCommandStream.getUsed();
|
||||
size_t expectedSize = directSubmission->getSizeSemaphoreSection() +
|
||||
size_t expectedSize = directSubmission->getSizeSemaphoreSection(false) +
|
||||
Dispatcher::getSizePreemption() +
|
||||
directSubmission->getSizeDispatch();
|
||||
|
||||
|
Reference in New Issue
Block a user