mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-20 08:53:55 +08:00
refactor: rename register offsets and constants
Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
4fca0e16b6
commit
c48e57e2de
@@ -2432,8 +2432,8 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(std::sh
|
||||
if (isQwordInOrderCounter()) {
|
||||
indirectMode = true;
|
||||
|
||||
auto lri1 = NEO::LriHelper<GfxFamily>::program(commandContainer.getCommandStream(), CS_GPR_R0, getLowPart(waitValue), true);
|
||||
auto lri2 = NEO::LriHelper<GfxFamily>::program(commandContainer.getCommandStream(), CS_GPR_R0 + 4, getHighPart(waitValue), true);
|
||||
auto lri1 = NEO::LriHelper<GfxFamily>::program(commandContainer.getCommandStream(), RegisterOffsets::csGprR0, getLowPart(waitValue), true);
|
||||
auto lri2 = NEO::LriHelper<GfxFamily>::program(commandContainer.getCommandStream(), RegisterOffsets::csGprR0 + 4, getHighPart(waitValue), true);
|
||||
|
||||
if (inOrderExecInfo->isRegularCmdList()) {
|
||||
addCmdForPatching((implicitDependency ? nullptr : &inOrderExecInfo), lri1, lri2, waitValue, InOrderPatchCommandHelpers::PatchCmdType::Lri64b);
|
||||
@@ -2614,11 +2614,11 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWriteKernelTimestamp(Event *eve
|
||||
uint64_t contextAddress = ptrOffset(baseAddr, contextOffset);
|
||||
|
||||
if (maskLsb) {
|
||||
NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, REG_GLOBAL_TIMESTAMP_LDW, mask, globalAddress, workloadPartition);
|
||||
NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, mask, contextAddress, workloadPartition);
|
||||
NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, RegisterOffsets::globalTimestampLdw, mask, globalAddress, workloadPartition);
|
||||
NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, mask, contextAddress, workloadPartition);
|
||||
} else {
|
||||
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), REG_GLOBAL_TIMESTAMP_LDW, globalAddress, workloadPartition);
|
||||
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextAddress, workloadPartition);
|
||||
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), RegisterOffsets::globalTimestampLdw, globalAddress, workloadPartition);
|
||||
NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextAddress, workloadPartition);
|
||||
}
|
||||
|
||||
adjustWriteKernelTimestamp(globalAddress, contextAddress, maskLsb, mask, workloadPartition);
|
||||
@@ -2875,11 +2875,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::prepareIndirectParams(const ze
|
||||
|
||||
auto groupCount = ptrOffset(alloc->getGpuAddress(), groupCountOffset);
|
||||
|
||||
NEO::EncodeSetMMIO<GfxFamily>::encodeMEM(commandContainer, GPUGPU_DISPATCHDIMX,
|
||||
NEO::EncodeSetMMIO<GfxFamily>::encodeMEM(commandContainer, RegisterOffsets::gpgpuDispatchDimX,
|
||||
ptrOffset(groupCount, offsetof(ze_group_count_t, groupCountX)));
|
||||
NEO::EncodeSetMMIO<GfxFamily>::encodeMEM(commandContainer, GPUGPU_DISPATCHDIMY,
|
||||
NEO::EncodeSetMMIO<GfxFamily>::encodeMEM(commandContainer, RegisterOffsets::gpgpuDispatchDimY,
|
||||
ptrOffset(groupCount, offsetof(ze_group_count_t, groupCountY)));
|
||||
NEO::EncodeSetMMIO<GfxFamily>::encodeMEM(commandContainer, GPUGPU_DISPATCHDIMZ,
|
||||
NEO::EncodeSetMMIO<GfxFamily>::encodeMEM(commandContainer, RegisterOffsets::gpgpuDispatchDimZ,
|
||||
ptrOffset(groupCount, offsetof(ze_group_count_t, groupCountZ)));
|
||||
}
|
||||
|
||||
@@ -3377,8 +3377,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnMemory(void *desc,
|
||||
if (isQwordInOrderCounter()) {
|
||||
indirectMode = true;
|
||||
|
||||
NEO::LriHelper<GfxFamily>::program(commandContainer.getCommandStream(), CS_GPR_R0, getLowPart(data), true);
|
||||
NEO::LriHelper<GfxFamily>::program(commandContainer.getCommandStream(), CS_GPR_R0 + 4, getHighPart(data), true);
|
||||
NEO::LriHelper<GfxFamily>::program(commandContainer.getCommandStream(), RegisterOffsets::csGprR0, getLowPart(data), true);
|
||||
NEO::LriHelper<GfxFamily>::program(commandContainer.getCommandStream(), RegisterOffsets::csGprR0 + 4, getHighPart(data), true);
|
||||
|
||||
} else {
|
||||
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
|
||||
|
||||
@@ -140,8 +140,8 @@ HWTEST2_F(DebuggerSingleAddressSpaceAub, GivenSingleAddressSpaceWhenCmdListIsExe
|
||||
uint32_t low = sbaAddress & 0xffffffff;
|
||||
uint32_t high = (sbaAddress >> 32) & 0xffffffff;
|
||||
|
||||
expectMMIO<FamilyType>(CS_GPR_R15, low);
|
||||
expectMMIO<FamilyType>(CS_GPR_R15 + 4, high);
|
||||
expectMMIO<FamilyType>(RegisterOffsets::csGprR15, low);
|
||||
expectMMIO<FamilyType>(RegisterOffsets::csGprR15 + 4, high);
|
||||
|
||||
auto instructionHeapBaseAddress = memoryManager->getInternalHeapBaseAddress(rootDeviceIndex,
|
||||
memoryManager->isLocalMemoryUsedForIsa(rootDeviceIndex));
|
||||
|
||||
@@ -43,14 +43,14 @@ void validateTimestampRegisters(GenCmdList &cmdList,
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdLoadReg = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
|
||||
EXPECT_EQ(firstLoadRegisterRegSrcAddress, cmdLoadReg->getSourceRegisterAddress());
|
||||
EXPECT_EQ(CS_GPR_R13, cmdLoadReg->getDestinationRegisterAddress());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR13, cmdLoadReg->getDestinationRegisterAddress());
|
||||
}
|
||||
|
||||
itor++;
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdLoadImm = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itor);
|
||||
EXPECT_EQ(CS_GPR_R14, cmdLoadImm->getRegisterOffset());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR14, cmdLoadImm->getRegisterOffset());
|
||||
EXPECT_EQ(mask, cmdLoadImm->getDataDword());
|
||||
}
|
||||
|
||||
@@ -65,7 +65,7 @@ void validateTimestampRegisters(GenCmdList &cmdList,
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(CS_GPR_R15, cmdMem->getRegisterAddress());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR15, cmdMem->getRegisterAddress());
|
||||
EXPECT_EQ(firstStoreRegMemAddress, cmdMem->getMemoryAddress());
|
||||
if (workloadPartition) {
|
||||
EXPECT_TRUE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
|
||||
@@ -79,14 +79,14 @@ void validateTimestampRegisters(GenCmdList &cmdList,
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdLoadReg = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
|
||||
EXPECT_EQ(secondLoadRegisterRegSrcAddress, cmdLoadReg->getSourceRegisterAddress());
|
||||
EXPECT_EQ(CS_GPR_R13, cmdLoadReg->getDestinationRegisterAddress());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR13, cmdLoadReg->getDestinationRegisterAddress());
|
||||
}
|
||||
|
||||
itor++;
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdLoadImm = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itor);
|
||||
EXPECT_EQ(CS_GPR_R14, cmdLoadImm->getRegisterOffset());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR14, cmdLoadImm->getRegisterOffset());
|
||||
EXPECT_EQ(mask, cmdLoadImm->getDataDword());
|
||||
}
|
||||
|
||||
@@ -101,7 +101,7 @@ void validateTimestampRegisters(GenCmdList &cmdList,
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(CS_GPR_R15, cmdMem->getRegisterAddress());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR15, cmdMem->getRegisterAddress());
|
||||
EXPECT_EQ(secondStoreRegMemAddress, cmdMem->getMemoryAddress());
|
||||
if (workloadPartition) {
|
||||
EXPECT_TRUE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
|
||||
|
||||
@@ -2850,11 +2850,11 @@ HWTEST2_F(CommandListCreate, givenCopyCommandListWhenProfilingBeforeCommandForCo
|
||||
auto itor = find<MI_STORE_REGISTER_MEM *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
auto cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(cmd->getRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW);
|
||||
EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::globalTimestampLdw);
|
||||
EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, globalOffset));
|
||||
EXPECT_NE(cmdList.end(), ++itor);
|
||||
cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(cmd->getRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
|
||||
EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::gpThreadTimeRegAddressOffsetLow);
|
||||
EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, contextOffset));
|
||||
}
|
||||
|
||||
@@ -2885,11 +2885,11 @@ HWTEST2_F(CommandListCreate, givenCopyCommandListWhenProfilingAfterCommandForCop
|
||||
auto itor = find<MI_STORE_REGISTER_MEM *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
auto cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(cmd->getRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW);
|
||||
EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::globalTimestampLdw);
|
||||
EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, globalOffset));
|
||||
EXPECT_NE(cmdList.end(), ++itor);
|
||||
cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(cmd->getRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
|
||||
EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::gpThreadTimeRegAddressOffsetLow);
|
||||
EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, contextOffset));
|
||||
}
|
||||
|
||||
|
||||
@@ -384,8 +384,8 @@ struct MultiTileCommandListAppendBarrierFixture : public MultiTileCommandListFix
|
||||
auto begin = cmdList.begin();
|
||||
validateTimestampRegisters<FamilyType>(cmdList,
|
||||
begin,
|
||||
REG_GLOBAL_TIMESTAMP_LDW, globalStartAddress,
|
||||
GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextStartAddress,
|
||||
RegisterOffsets::globalTimestampLdw, globalStartAddress,
|
||||
RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextStartAddress,
|
||||
true);
|
||||
|
||||
auto barrierOffset = timestampRegisters;
|
||||
@@ -418,8 +418,8 @@ struct MultiTileCommandListAppendBarrierFixture : public MultiTileCommandListFix
|
||||
begin = cmdList.begin();
|
||||
validateTimestampRegisters<FamilyType>(cmdList,
|
||||
begin,
|
||||
REG_GLOBAL_TIMESTAMP_LDW, globalEndAddress,
|
||||
GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextEndAddress,
|
||||
RegisterOffsets::globalTimestampLdw, globalEndAddress,
|
||||
RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextEndAddress,
|
||||
true);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -882,7 +882,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenTimestampEventsWhenAppendingKernel
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
{
|
||||
auto cmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
|
||||
EXPECT_EQ(REG_GLOBAL_TIMESTAMP_LDW, cmd->getSourceRegisterAddress());
|
||||
EXPECT_EQ(RegisterOffsets::globalTimestampLdw, cmd->getSourceRegisterAddress());
|
||||
}
|
||||
itor++;
|
||||
|
||||
@@ -890,7 +890,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenTimestampEventsWhenAppendingKernel
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
{
|
||||
auto cmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
|
||||
EXPECT_EQ(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, cmd->getSourceRegisterAddress());
|
||||
EXPECT_EQ(RegisterOffsets::gpThreadTimeRegAddressOffsetLow, cmd->getSourceRegisterAddress());
|
||||
}
|
||||
itor++;
|
||||
|
||||
@@ -911,7 +911,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenTimestampEventsWhenAppendingKernel
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
{
|
||||
auto cmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
|
||||
EXPECT_EQ(REG_GLOBAL_TIMESTAMP_LDW, cmd->getSourceRegisterAddress());
|
||||
EXPECT_EQ(RegisterOffsets::globalTimestampLdw, cmd->getSourceRegisterAddress());
|
||||
}
|
||||
itor++;
|
||||
|
||||
@@ -919,7 +919,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenTimestampEventsWhenAppendingKernel
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
{
|
||||
auto cmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
|
||||
EXPECT_EQ(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, cmd->getSourceRegisterAddress());
|
||||
EXPECT_EQ(RegisterOffsets::gpThreadTimeRegAddressOffsetLow, cmd->getSourceRegisterAddress());
|
||||
}
|
||||
itor++;
|
||||
|
||||
|
||||
@@ -112,7 +112,7 @@ HWTEST_F(CommandListDualStorage, givenIndirectDispatchWithSharedDualStorageMemor
|
||||
regAddress = cmd->getRegisterAddress();
|
||||
gpuAddress = cmd->getMemoryAddress();
|
||||
|
||||
EXPECT_EQ(GPUGPU_DISPATCHDIMX, regAddress);
|
||||
EXPECT_EQ(RegisterOffsets::gpgpuDispatchDimX, regAddress);
|
||||
EXPECT_EQ(expectedXAddress, gpuAddress);
|
||||
|
||||
itor = find<MI_LOAD_REGISTER_MEM *>(++itor, cmdList.end());
|
||||
@@ -122,7 +122,7 @@ HWTEST_F(CommandListDualStorage, givenIndirectDispatchWithSharedDualStorageMemor
|
||||
regAddress = cmd->getRegisterAddress();
|
||||
gpuAddress = cmd->getMemoryAddress();
|
||||
|
||||
EXPECT_EQ(GPUGPU_DISPATCHDIMY, regAddress);
|
||||
EXPECT_EQ(RegisterOffsets::gpgpuDispatchDimY, regAddress);
|
||||
EXPECT_EQ(expectedYAddress, gpuAddress);
|
||||
|
||||
itor = find<MI_LOAD_REGISTER_MEM *>(++itor, cmdList.end());
|
||||
@@ -132,7 +132,7 @@ HWTEST_F(CommandListDualStorage, givenIndirectDispatchWithSharedDualStorageMemor
|
||||
regAddress = cmd->getRegisterAddress();
|
||||
gpuAddress = cmd->getMemoryAddress();
|
||||
|
||||
EXPECT_EQ(GPUGPU_DISPATCHDIMZ, regAddress);
|
||||
EXPECT_EQ(RegisterOffsets::gpgpuDispatchDimZ, regAddress);
|
||||
EXPECT_EQ(expectedZAddress, gpuAddress);
|
||||
|
||||
MI_STORE_REGISTER_MEM *cmd2 = nullptr;
|
||||
@@ -140,11 +140,11 @@ HWTEST_F(CommandListDualStorage, givenIndirectDispatchWithSharedDualStorageMemor
|
||||
do {
|
||||
itor = find<MI_STORE_REGISTER_MEM *>(++itor, cmdList.end());
|
||||
cmd2 = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
} while (itor != cmdList.end() && cmd2->getRegisterAddress() != GPUGPU_DISPATCHDIMX);
|
||||
} while (itor != cmdList.end() && cmd2->getRegisterAddress() != RegisterOffsets::gpgpuDispatchDimX);
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
|
||||
auto groupCountStoreRegisterMemCmd = FamilyType::cmdInitStoreRegisterMem;
|
||||
groupCountStoreRegisterMemCmd.setRegisterAddress(GPUGPU_DISPATCHDIMX);
|
||||
groupCountStoreRegisterMemCmd.setRegisterAddress(RegisterOffsets::gpgpuDispatchDimX);
|
||||
groupCountStoreRegisterMemCmd.setMemoryAddress(commandList->getCmdContainer().getIndirectHeap(HeapType::INDIRECT_OBJECT)->getGraphicsAllocation()->getGpuAddress() + numWorkGroupXOffset);
|
||||
|
||||
EXPECT_EQ(cmd2->getRegisterAddress(), groupCountStoreRegisterMemCmd.getRegisterAddress());
|
||||
@@ -154,7 +154,7 @@ HWTEST_F(CommandListDualStorage, givenIndirectDispatchWithSharedDualStorageMemor
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
cmd2 = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
|
||||
groupCountStoreRegisterMemCmd.setRegisterAddress(GPUGPU_DISPATCHDIMY);
|
||||
groupCountStoreRegisterMemCmd.setRegisterAddress(RegisterOffsets::gpgpuDispatchDimY);
|
||||
groupCountStoreRegisterMemCmd.setMemoryAddress(commandList->getCmdContainer().getIndirectHeap(HeapType::INDIRECT_OBJECT)->getGraphicsAllocation()->getGpuAddress() + numWorkGroupYOffset);
|
||||
|
||||
EXPECT_EQ(cmd2->getRegisterAddress(), groupCountStoreRegisterMemCmd.getRegisterAddress());
|
||||
@@ -164,14 +164,14 @@ HWTEST_F(CommandListDualStorage, givenIndirectDispatchWithSharedDualStorageMemor
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
cmd2 = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
|
||||
groupCountStoreRegisterMemCmd.setRegisterAddress(GPUGPU_DISPATCHDIMZ);
|
||||
groupCountStoreRegisterMemCmd.setRegisterAddress(RegisterOffsets::gpgpuDispatchDimZ);
|
||||
groupCountStoreRegisterMemCmd.setMemoryAddress(commandList->getCmdContainer().getIndirectHeap(HeapType::INDIRECT_OBJECT)->getGraphicsAllocation()->getGpuAddress() + numWorkGroupZOffset);
|
||||
|
||||
EXPECT_EQ(cmd2->getRegisterAddress(), groupCountStoreRegisterMemCmd.getRegisterAddress());
|
||||
EXPECT_EQ(cmd2->getMemoryAddress(), groupCountStoreRegisterMemCmd.getMemoryAddress());
|
||||
|
||||
auto workSizeStoreRegisterMemCmd = FamilyType::cmdInitStoreRegisterMem;
|
||||
workSizeStoreRegisterMemCmd.setRegisterAddress(CS_GPR_R1);
|
||||
workSizeStoreRegisterMemCmd.setRegisterAddress(RegisterOffsets::csGprR1);
|
||||
|
||||
// Find workgroup size cmds
|
||||
itor = find<MI_LOAD_REGISTER_REG *>(++itor, cmdList.end());
|
||||
@@ -284,7 +284,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandListDualStorage, givenIndirectDispatchWithSh
|
||||
regAddress = cmd->getRegisterAddress();
|
||||
gpuAddress = cmd->getMemoryAddress();
|
||||
|
||||
EXPECT_EQ(GPUGPU_DISPATCHDIMX, regAddress);
|
||||
EXPECT_EQ(RegisterOffsets::gpgpuDispatchDimX, regAddress);
|
||||
EXPECT_EQ(expectedXAddress, gpuAddress);
|
||||
|
||||
itor = find<MI_LOAD_REGISTER_MEM *>(++itor, cmdList.end());
|
||||
@@ -294,7 +294,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandListDualStorage, givenIndirectDispatchWithSh
|
||||
regAddress = cmd->getRegisterAddress();
|
||||
gpuAddress = cmd->getMemoryAddress();
|
||||
|
||||
EXPECT_EQ(GPUGPU_DISPATCHDIMY, regAddress);
|
||||
EXPECT_EQ(RegisterOffsets::gpgpuDispatchDimY, regAddress);
|
||||
EXPECT_EQ(expectedYAddress, gpuAddress);
|
||||
|
||||
itor = find<MI_LOAD_REGISTER_MEM *>(++itor, cmdList.end());
|
||||
@@ -304,7 +304,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandListDualStorage, givenIndirectDispatchWithSh
|
||||
regAddress = cmd->getRegisterAddress();
|
||||
gpuAddress = cmd->getMemoryAddress();
|
||||
|
||||
EXPECT_EQ(GPUGPU_DISPATCHDIMZ, regAddress);
|
||||
EXPECT_EQ(RegisterOffsets::gpgpuDispatchDimZ, regAddress);
|
||||
EXPECT_EQ(expectedZAddress, gpuAddress);
|
||||
|
||||
MI_STORE_REGISTER_MEM *cmd2 = nullptr;
|
||||
@@ -312,11 +312,11 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandListDualStorage, givenIndirectDispatchWithSh
|
||||
do {
|
||||
itor = find<MI_STORE_REGISTER_MEM *>(++itor, cmdList.end());
|
||||
cmd2 = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
} while (itor != cmdList.end() && cmd2->getRegisterAddress() != GPUGPU_DISPATCHDIMX);
|
||||
} while (itor != cmdList.end() && cmd2->getRegisterAddress() != RegisterOffsets::gpgpuDispatchDimX);
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
|
||||
auto groupCountStoreRegisterMemCmd = FamilyType::cmdInitStoreRegisterMem;
|
||||
groupCountStoreRegisterMemCmd.setRegisterAddress(GPUGPU_DISPATCHDIMX);
|
||||
groupCountStoreRegisterMemCmd.setRegisterAddress(RegisterOffsets::gpgpuDispatchDimX);
|
||||
groupCountStoreRegisterMemCmd.setMemoryAddress(commandList->getCmdContainer().getIndirectHeap(HeapType::INDIRECT_OBJECT)->getGraphicsAllocation()->getGpuAddress() + numWorkGroupXOffset - sizeof(INLINE_DATA));
|
||||
|
||||
EXPECT_EQ(cmd2->getRegisterAddress(), groupCountStoreRegisterMemCmd.getRegisterAddress());
|
||||
@@ -326,7 +326,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandListDualStorage, givenIndirectDispatchWithSh
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
cmd2 = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
|
||||
groupCountStoreRegisterMemCmd.setRegisterAddress(GPUGPU_DISPATCHDIMY);
|
||||
groupCountStoreRegisterMemCmd.setRegisterAddress(RegisterOffsets::gpgpuDispatchDimY);
|
||||
groupCountStoreRegisterMemCmd.setMemoryAddress(commandList->getCmdContainer().getIndirectHeap(HeapType::INDIRECT_OBJECT)->getGraphicsAllocation()->getGpuAddress() + numWorkGroupYOffset - sizeof(INLINE_DATA));
|
||||
|
||||
EXPECT_EQ(cmd2->getRegisterAddress(), groupCountStoreRegisterMemCmd.getRegisterAddress());
|
||||
@@ -336,14 +336,14 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandListDualStorage, givenIndirectDispatchWithSh
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
cmd2 = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
|
||||
groupCountStoreRegisterMemCmd.setRegisterAddress(GPUGPU_DISPATCHDIMZ);
|
||||
groupCountStoreRegisterMemCmd.setRegisterAddress(RegisterOffsets::gpgpuDispatchDimZ);
|
||||
groupCountStoreRegisterMemCmd.setMemoryAddress(commandList->getCmdContainer().getIndirectHeap(HeapType::INDIRECT_OBJECT)->getGraphicsAllocation()->getGpuAddress() + numWorkGroupZOffset - sizeof(INLINE_DATA));
|
||||
|
||||
EXPECT_EQ(cmd2->getRegisterAddress(), groupCountStoreRegisterMemCmd.getRegisterAddress());
|
||||
EXPECT_EQ(cmd2->getMemoryAddress(), groupCountStoreRegisterMemCmd.getMemoryAddress());
|
||||
|
||||
auto workSizeStoreRegisterMemCmd = FamilyType::cmdInitStoreRegisterMem;
|
||||
workSizeStoreRegisterMemCmd.setRegisterAddress(CS_GPR_R1);
|
||||
workSizeStoreRegisterMemCmd.setRegisterAddress(RegisterOffsets::csGprR1);
|
||||
|
||||
// Find workgroup size cmds
|
||||
itor = find<MI_LOAD_REGISTER_REG *>(++itor, cmdList.end());
|
||||
|
||||
@@ -839,12 +839,12 @@ bool InOrderCmdListTests::verifyInOrderDependency(GenCmdList::iterator &cmd, uin
|
||||
return false;
|
||||
}
|
||||
EXPECT_EQ(getLowPart(counter), lri->getDataDword());
|
||||
EXPECT_EQ(CS_GPR_R0, lri->getRegisterOffset());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR0, lri->getRegisterOffset());
|
||||
|
||||
lri++;
|
||||
|
||||
EXPECT_EQ(getHighPart(counter), lri->getDataDword());
|
||||
EXPECT_EQ(CS_GPR_R0 + 4, lri->getRegisterOffset());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR0 + 4, lri->getRegisterOffset());
|
||||
|
||||
std::advance(cmd, 2);
|
||||
}
|
||||
@@ -2105,11 +2105,11 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenAddingRelaxedOrderingEventsTh
|
||||
auto lrrCmd = genCmdCast<typename FamilyType::MI_LOAD_REGISTER_REG *>(*cmdList.begin());
|
||||
ASSERT_NE(nullptr, lrrCmd);
|
||||
|
||||
EXPECT_EQ(CS_GPR_R4, lrrCmd->getSourceRegisterAddress());
|
||||
EXPECT_EQ(CS_GPR_R0, lrrCmd->getDestinationRegisterAddress());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR4, lrrCmd->getSourceRegisterAddress());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR0, lrrCmd->getDestinationRegisterAddress());
|
||||
lrrCmd++;
|
||||
EXPECT_EQ(CS_GPR_R4 + 4, lrrCmd->getSourceRegisterAddress());
|
||||
EXPECT_EQ(CS_GPR_R0 + 4, lrrCmd->getDestinationRegisterAddress());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR4 + 4, lrrCmd->getSourceRegisterAddress());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR0 + 4, lrrCmd->getDestinationRegisterAddress());
|
||||
}
|
||||
|
||||
HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingWalkerThenSignalSyncAllocation, IsAtLeastXeHpCore) {
|
||||
@@ -2370,11 +2370,11 @@ HWTEST2_F(InOrderCmdListTests, givenRelaxedOrderingWhenProgrammingTimestampEvent
|
||||
auto lrrCmd = genCmdCast<typename FamilyType::MI_LOAD_REGISTER_REG *>(*cmdList.begin());
|
||||
ASSERT_NE(nullptr, lrrCmd);
|
||||
|
||||
EXPECT_EQ(CS_GPR_R4, lrrCmd->getSourceRegisterAddress());
|
||||
EXPECT_EQ(CS_GPR_R0, lrrCmd->getDestinationRegisterAddress());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR4, lrrCmd->getSourceRegisterAddress());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR0, lrrCmd->getDestinationRegisterAddress());
|
||||
lrrCmd++;
|
||||
EXPECT_EQ(CS_GPR_R4 + 4, lrrCmd->getSourceRegisterAddress());
|
||||
EXPECT_EQ(CS_GPR_R0 + 4, lrrCmd->getDestinationRegisterAddress());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR4 + 4, lrrCmd->getSourceRegisterAddress());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR0 + 4, lrrCmd->getDestinationRegisterAddress());
|
||||
|
||||
lrrCmd++;
|
||||
|
||||
@@ -5451,31 +5451,31 @@ HWTEST_F(CommandListAppendLaunchKernelWithImplicitArgs, givenIndirectDispatchWit
|
||||
uint64_t pImplicitArgsGPUVA = heap->getGraphicsAllocation()->getGpuAddress() + getIndirectHeapOffsetForImplicitArgsBuffer<FamilyType>(kernel);
|
||||
|
||||
auto workDimStoreRegisterMemCmd = FamilyType::cmdInitStoreRegisterMem;
|
||||
workDimStoreRegisterMemCmd.setRegisterAddress(CS_GPR_R0);
|
||||
workDimStoreRegisterMemCmd.setRegisterAddress(RegisterOffsets::csGprR0);
|
||||
workDimStoreRegisterMemCmd.setMemoryAddress(pImplicitArgsGPUVA);
|
||||
|
||||
auto groupCountXStoreRegisterMemCmd = FamilyType::cmdInitStoreRegisterMem;
|
||||
groupCountXStoreRegisterMemCmd.setRegisterAddress(GPUGPU_DISPATCHDIMX);
|
||||
groupCountXStoreRegisterMemCmd.setRegisterAddress(RegisterOffsets::gpgpuDispatchDimX);
|
||||
groupCountXStoreRegisterMemCmd.setMemoryAddress(pImplicitArgsGPUVA + offsetof(ImplicitArgs, groupCountX));
|
||||
|
||||
auto groupCountYStoreRegisterMemCmd = FamilyType::cmdInitStoreRegisterMem;
|
||||
groupCountYStoreRegisterMemCmd.setRegisterAddress(GPUGPU_DISPATCHDIMY);
|
||||
groupCountYStoreRegisterMemCmd.setRegisterAddress(RegisterOffsets::gpgpuDispatchDimY);
|
||||
groupCountYStoreRegisterMemCmd.setMemoryAddress(pImplicitArgsGPUVA + offsetof(ImplicitArgs, groupCountY));
|
||||
|
||||
auto groupCountZStoreRegisterMemCmd = FamilyType::cmdInitStoreRegisterMem;
|
||||
groupCountZStoreRegisterMemCmd.setRegisterAddress(GPUGPU_DISPATCHDIMZ);
|
||||
groupCountZStoreRegisterMemCmd.setRegisterAddress(RegisterOffsets::gpgpuDispatchDimZ);
|
||||
groupCountZStoreRegisterMemCmd.setMemoryAddress(pImplicitArgsGPUVA + offsetof(ImplicitArgs, groupCountZ));
|
||||
|
||||
auto globalSizeXStoreRegisterMemCmd = FamilyType::cmdInitStoreRegisterMem;
|
||||
globalSizeXStoreRegisterMemCmd.setRegisterAddress(CS_GPR_R1);
|
||||
globalSizeXStoreRegisterMemCmd.setRegisterAddress(RegisterOffsets::csGprR1);
|
||||
globalSizeXStoreRegisterMemCmd.setMemoryAddress(pImplicitArgsGPUVA + offsetof(ImplicitArgs, globalSizeX));
|
||||
|
||||
auto globalSizeYStoreRegisterMemCmd = FamilyType::cmdInitStoreRegisterMem;
|
||||
globalSizeYStoreRegisterMemCmd.setRegisterAddress(CS_GPR_R1);
|
||||
globalSizeYStoreRegisterMemCmd.setRegisterAddress(RegisterOffsets::csGprR1);
|
||||
globalSizeYStoreRegisterMemCmd.setMemoryAddress(pImplicitArgsGPUVA + offsetof(ImplicitArgs, globalSizeY));
|
||||
|
||||
auto globalSizeZStoreRegisterMemCmd = FamilyType::cmdInitStoreRegisterMem;
|
||||
globalSizeZStoreRegisterMemCmd.setRegisterAddress(CS_GPR_R1);
|
||||
globalSizeZStoreRegisterMemCmd.setRegisterAddress(RegisterOffsets::csGprR1);
|
||||
globalSizeZStoreRegisterMemCmd.setMemoryAddress(pImplicitArgsGPUVA + offsetof(ImplicitArgs, globalSizeZ));
|
||||
|
||||
GenCmdList cmdList;
|
||||
|
||||
@@ -697,11 +697,11 @@ HWTEST2_F(AppendMemoryCopy, givenCopyCommandListWhenTimestampPassedToMemoryCopyT
|
||||
auto itor = find<MI_STORE_REGISTER_MEM *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
auto cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(cmd->getRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW);
|
||||
EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::globalTimestampLdw);
|
||||
itor++;
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(cmd->getRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
|
||||
EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::gpThreadTimeRegAddressOffsetLow);
|
||||
|
||||
itor = find<MI_FLUSH_DW *>(itor, cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
@@ -709,11 +709,11 @@ HWTEST2_F(AppendMemoryCopy, givenCopyCommandListWhenTimestampPassedToMemoryCopyT
|
||||
itor = find<MI_STORE_REGISTER_MEM *>(itor, cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(cmd->getRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW);
|
||||
EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::globalTimestampLdw);
|
||||
itor++;
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(cmd->getRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
|
||||
EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::gpThreadTimeRegAddressOffsetLow);
|
||||
itor++;
|
||||
EXPECT_EQ(cmdList.end(), itor);
|
||||
}
|
||||
@@ -766,14 +766,14 @@ HWTEST2_F(AppendMemoryCopy,
|
||||
|
||||
validateTimestampRegisters<FamilyType>(cmdList,
|
||||
begin,
|
||||
REG_GLOBAL_TIMESTAMP_LDW, globalStartAddress,
|
||||
GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextStartAddress,
|
||||
RegisterOffsets::globalTimestampLdw, globalStartAddress,
|
||||
RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextStartAddress,
|
||||
false);
|
||||
|
||||
validateTimestampRegisters<FamilyType>(cmdList,
|
||||
secondWalker,
|
||||
REG_GLOBAL_TIMESTAMP_LDW, globalEndAddress,
|
||||
GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextEndAddress,
|
||||
RegisterOffsets::globalTimestampLdw, globalEndAddress,
|
||||
RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextEndAddress,
|
||||
false);
|
||||
}
|
||||
|
||||
@@ -824,14 +824,14 @@ HWTEST2_F(AppendMemoryCopy,
|
||||
|
||||
validateTimestampRegisters<FamilyType>(cmdList,
|
||||
begin,
|
||||
REG_GLOBAL_TIMESTAMP_LDW, globalStartAddress,
|
||||
GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextStartAddress,
|
||||
RegisterOffsets::globalTimestampLdw, globalStartAddress,
|
||||
RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextStartAddress,
|
||||
false);
|
||||
|
||||
validateTimestampRegisters<FamilyType>(cmdList,
|
||||
thirdWalker,
|
||||
REG_GLOBAL_TIMESTAMP_LDW, globalEndAddress,
|
||||
GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextEndAddress,
|
||||
RegisterOffsets::globalTimestampLdw, globalEndAddress,
|
||||
RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextEndAddress,
|
||||
false);
|
||||
}
|
||||
|
||||
|
||||
@@ -537,8 +537,8 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent,
|
||||
auto startCmdList = cmdList.begin();
|
||||
validateTimestampRegisters<FamilyType>(cmdList,
|
||||
startCmdList,
|
||||
REG_GLOBAL_TIMESTAMP_LDW, globalStartAddress,
|
||||
GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextStartAddress,
|
||||
RegisterOffsets::globalTimestampLdw, globalStartAddress,
|
||||
RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextStartAddress,
|
||||
true);
|
||||
|
||||
if (UnitTestHelper<FamilyType>::timestampRegisterHighAddress()) {
|
||||
@@ -546,15 +546,15 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent,
|
||||
uint64_t contextStartAddressHigh = contextStartAddress + sizeof(uint32_t);
|
||||
validateTimestampRegisters<FamilyType>(cmdList,
|
||||
startCmdList,
|
||||
REG_GLOBAL_TIMESTAMP_UN, globalStartAddressHigh,
|
||||
RegisterOffsets::globalTimestampUn, globalStartAddressHigh,
|
||||
0x23AC, contextStartAddressHigh,
|
||||
true);
|
||||
}
|
||||
|
||||
validateTimestampRegisters<FamilyType>(cmdList,
|
||||
startCmdList,
|
||||
REG_GLOBAL_TIMESTAMP_LDW, globalEndAddress,
|
||||
GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextEndAddress,
|
||||
RegisterOffsets::globalTimestampLdw, globalEndAddress,
|
||||
RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextEndAddress,
|
||||
true);
|
||||
|
||||
if (UnitTestHelper<FamilyType>::timestampRegisterHighAddress()) {
|
||||
@@ -562,7 +562,7 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent,
|
||||
uint64_t contextEndAddressHigh = contextEndAddress + sizeof(uint32_t);
|
||||
validateTimestampRegisters<FamilyType>(cmdList,
|
||||
startCmdList,
|
||||
REG_GLOBAL_TIMESTAMP_UN, globalEndAddressHigh,
|
||||
RegisterOffsets::globalTimestampUn, globalEndAddressHigh,
|
||||
0x23AC, contextEndAddressHigh,
|
||||
true);
|
||||
}
|
||||
|
||||
@@ -102,29 +102,29 @@ HWTEST2_F(CommandListAppendWaitOnEvent, givenImmediateCmdListWithDirectSubmissio
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
|
||||
auto lrrCmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
|
||||
EXPECT_EQ(lrrCmd->getSourceRegisterAddress(), CS_GPR_R4);
|
||||
EXPECT_EQ(lrrCmd->getDestinationRegisterAddress(), CS_GPR_R0);
|
||||
EXPECT_EQ(lrrCmd->getSourceRegisterAddress(), RegisterOffsets::csGprR4);
|
||||
EXPECT_EQ(lrrCmd->getDestinationRegisterAddress(), RegisterOffsets::csGprR0);
|
||||
lrrCmd++;
|
||||
EXPECT_EQ(lrrCmd->getSourceRegisterAddress(), CS_GPR_R4 + 4);
|
||||
EXPECT_EQ(lrrCmd->getDestinationRegisterAddress(), CS_GPR_R0 + 4);
|
||||
EXPECT_EQ(lrrCmd->getSourceRegisterAddress(), RegisterOffsets::csGprR4 + 4);
|
||||
EXPECT_EQ(lrrCmd->getDestinationRegisterAddress(), RegisterOffsets::csGprR0 + 4);
|
||||
|
||||
auto eventGpuAddr = event->getCompletionFieldGpuAddress(this->device);
|
||||
|
||||
// conditional bb_start
|
||||
auto lrmCmd = reinterpret_cast<MI_LOAD_REGISTER_MEM *>(++lrrCmd);
|
||||
EXPECT_EQ(lrmCmd->getRegisterAddress(), CS_GPR_R7);
|
||||
EXPECT_EQ(lrmCmd->getRegisterAddress(), RegisterOffsets::csGprR7);
|
||||
EXPECT_EQ(lrmCmd->getMemoryAddress(), eventGpuAddr);
|
||||
|
||||
auto lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(++lrmCmd);
|
||||
EXPECT_EQ(lriCmd->getRegisterOffset(), CS_GPR_R7 + 4);
|
||||
EXPECT_EQ(lriCmd->getRegisterOffset(), RegisterOffsets::csGprR7 + 4);
|
||||
EXPECT_EQ(lriCmd->getDataDword(), 0u);
|
||||
|
||||
lriCmd++;
|
||||
EXPECT_EQ(lriCmd->getRegisterOffset(), CS_GPR_R8);
|
||||
EXPECT_EQ(lriCmd->getRegisterOffset(), RegisterOffsets::csGprR8);
|
||||
EXPECT_EQ(lriCmd->getDataDword(), static_cast<uint32_t>(Event::State::STATE_CLEARED));
|
||||
|
||||
lriCmd++;
|
||||
EXPECT_EQ(lriCmd->getRegisterOffset(), CS_GPR_R8 + 4);
|
||||
EXPECT_EQ(lriCmd->getRegisterOffset(), RegisterOffsets::csGprR8 + 4);
|
||||
EXPECT_EQ(lriCmd->getDataDword(), 0u);
|
||||
|
||||
auto miMathCmd = reinterpret_cast<MI_MATH *>(++lriCmd);
|
||||
@@ -151,8 +151,8 @@ HWTEST2_F(CommandListAppendWaitOnEvent, givenImmediateCmdListWithDirectSubmissio
|
||||
EXPECT_EQ(static_cast<uint32_t>(AluRegisters::R_ZF), miAluCmd->DW0.BitField.Operand2);
|
||||
|
||||
lrrCmd = reinterpret_cast<MI_LOAD_REGISTER_REG *>(++miAluCmd);
|
||||
EXPECT_EQ(lrrCmd->getSourceRegisterAddress(), CS_GPR_R7);
|
||||
EXPECT_EQ(lrrCmd->getDestinationRegisterAddress(), CS_PREDICATE_RESULT_2);
|
||||
EXPECT_EQ(lrrCmd->getSourceRegisterAddress(), RegisterOffsets::csGprR7);
|
||||
EXPECT_EQ(lrrCmd->getDestinationRegisterAddress(), RegisterOffsets::csPredicateResult2);
|
||||
|
||||
auto predicateCmd = reinterpret_cast<MI_SET_PREDICATE *>(++lrrCmd);
|
||||
EXPECT_EQ(static_cast<typename MI_SET_PREDICATE::PREDICATE_ENABLE>(MiPredicateType::NoopOnResult2Clear), predicateCmd->getPredicateEnable());
|
||||
|
||||
@@ -262,23 +262,23 @@ HWTEST2_F(AppendMemoryCopy, givenCopyCommandListWhenTimestampPassedToMemoryCopyR
|
||||
auto itor = find<MI_STORE_REGISTER_MEM *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
auto cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(cmd->getRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW);
|
||||
EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::globalTimestampLdw);
|
||||
EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, globalStartOffset));
|
||||
itor++;
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(cmd->getRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
|
||||
EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::gpThreadTimeRegAddressOffsetLow);
|
||||
EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, contextStartOffset));
|
||||
itor++;
|
||||
itor = find<MI_STORE_REGISTER_MEM *>(itor, cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(cmd->getRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW);
|
||||
EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::globalTimestampLdw);
|
||||
EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, globalEndOffset));
|
||||
itor++;
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(cmd->getRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
|
||||
EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::gpThreadTimeRegAddressOffsetLow);
|
||||
EXPECT_EQ(cmd->getMemoryAddress(), ptrOffset(baseAddr, contextEndOffset));
|
||||
itor++;
|
||||
EXPECT_EQ(cmdList.end(), itor);
|
||||
@@ -314,7 +314,7 @@ HWTEST2_F(AppendMemoryCopy, givenCopyCommandListWhenTimestampPassedToImageCopyBl
|
||||
auto itor = find<MI_STORE_REGISTER_MEM *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
auto cmd = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(cmd->getRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW);
|
||||
EXPECT_EQ(cmd->getRegisterAddress(), RegisterOffsets::globalTimestampLdw);
|
||||
}
|
||||
|
||||
using ImageSupport = IsWithinProducts<IGFX_SKYLAKE, IGFX_TIGERLAKE_LP>;
|
||||
|
||||
@@ -325,14 +325,14 @@ HWTEST2_F(AppendFillTest,
|
||||
|
||||
validateTimestampRegisters<FamilyType>(cmdList,
|
||||
begin,
|
||||
REG_GLOBAL_TIMESTAMP_LDW, globalStartAddress,
|
||||
GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextStartAddress,
|
||||
RegisterOffsets::globalTimestampLdw, globalStartAddress,
|
||||
RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextStartAddress,
|
||||
false);
|
||||
|
||||
validateTimestampRegisters<FamilyType>(cmdList,
|
||||
secondWalker,
|
||||
REG_GLOBAL_TIMESTAMP_LDW, globalEndAddress,
|
||||
GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextEndAddress,
|
||||
RegisterOffsets::globalTimestampLdw, globalEndAddress,
|
||||
RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextEndAddress,
|
||||
false);
|
||||
}
|
||||
|
||||
@@ -379,14 +379,14 @@ HWTEST2_F(AppendFillTest,
|
||||
|
||||
validateTimestampRegisters<FamilyType>(cmdList,
|
||||
begin,
|
||||
REG_GLOBAL_TIMESTAMP_LDW, globalStartAddress,
|
||||
GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextStartAddress,
|
||||
RegisterOffsets::globalTimestampLdw, globalStartAddress,
|
||||
RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextStartAddress,
|
||||
false);
|
||||
|
||||
validateTimestampRegisters<FamilyType>(cmdList,
|
||||
secondWalker,
|
||||
REG_GLOBAL_TIMESTAMP_LDW, globalEndAddress,
|
||||
GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, contextEndAddress,
|
||||
RegisterOffsets::globalTimestampLdw, globalEndAddress,
|
||||
RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextEndAddress,
|
||||
false);
|
||||
}
|
||||
|
||||
|
||||
@@ -236,7 +236,7 @@ bool validateProgramming(const GenCmdList &cmdList, uint64_t compareData, uint64
|
||||
}
|
||||
|
||||
EXPECT_EQ(getLowPart(compareData), lri->getDataDword());
|
||||
EXPECT_EQ(CS_GPR_R0, lri->getRegisterOffset());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR0, lri->getRegisterOffset());
|
||||
|
||||
lri = genCmdCast<MI_LOAD_REGISTER_IMM *>(*(++itor));
|
||||
if (!lri) {
|
||||
@@ -244,7 +244,7 @@ bool validateProgramming(const GenCmdList &cmdList, uint64_t compareData, uint64
|
||||
}
|
||||
|
||||
EXPECT_EQ(getHighPart(compareData), lri->getDataDword());
|
||||
EXPECT_EQ(CS_GPR_R0 + 4, lri->getRegisterOffset());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR0 + 4, lri->getRegisterOffset());
|
||||
|
||||
itor++;
|
||||
}
|
||||
|
||||
@@ -147,7 +147,7 @@ HWTEST2_F(singleAddressSpaceModeTest, givenImmediateCommandListWhenExecutingWith
|
||||
MI_LOAD_REGISTER_IMM *miLoad = genCmdCast<MI_LOAD_REGISTER_IMM *>(*miLoadImm[i]);
|
||||
ASSERT_NE(nullptr, miLoad);
|
||||
|
||||
if (miLoad->getRegisterOffset() == CS_GPR_R15) {
|
||||
if (miLoad->getRegisterOffset() == RegisterOffsets::csGprR15) {
|
||||
gpr15Found = true;
|
||||
break;
|
||||
}
|
||||
@@ -192,7 +192,7 @@ HWTEST2_F(singleAddressSpaceModeTest, givenUseCsrImmediateSubmissionEnabledAndSh
|
||||
MI_LOAD_REGISTER_IMM *miLoad = genCmdCast<MI_LOAD_REGISTER_IMM *>(*miLoadImm[i]);
|
||||
ASSERT_NE(nullptr, miLoad);
|
||||
|
||||
if (miLoad->getRegisterOffset() == CS_GPR_R15) {
|
||||
if (miLoad->getRegisterOffset() == RegisterOffsets::csGprR15) {
|
||||
gpr15Found = true;
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -446,11 +446,11 @@ HWTEST2_F(L0DebuggerSingleAddressSpace, givenDebuggingEnabledWhenCommandListIsEx
|
||||
MI_LOAD_REGISTER_IMM *miLoad = genCmdCast<MI_LOAD_REGISTER_IMM *>(*miLoadImm[i]);
|
||||
ASSERT_NE(nullptr, miLoad);
|
||||
|
||||
if (miLoad->getRegisterOffset() == CS_GPR_R15) {
|
||||
if (miLoad->getRegisterOffset() == RegisterOffsets::csGprR15) {
|
||||
gpr15RegisterCount++;
|
||||
gprMiLoadindex = i;
|
||||
}
|
||||
if (miLoad->getRegisterOffset() == CS_GPR_R15 + 4) {
|
||||
if (miLoad->getRegisterOffset() == RegisterOffsets::csGprR15 + 4) {
|
||||
gpr15RegisterCount++;
|
||||
}
|
||||
}
|
||||
@@ -463,11 +463,11 @@ HWTEST2_F(L0DebuggerSingleAddressSpace, givenDebuggingEnabledWhenCommandListIsEx
|
||||
uint32_t high = (sbaGpuVa >> 32) & 0xffffffff;
|
||||
|
||||
MI_LOAD_REGISTER_IMM *miLoad = genCmdCast<MI_LOAD_REGISTER_IMM *>(*miLoadImm[gprMiLoadindex]);
|
||||
EXPECT_EQ(CS_GPR_R15, miLoad->getRegisterOffset());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR15, miLoad->getRegisterOffset());
|
||||
EXPECT_EQ(low, miLoad->getDataDword());
|
||||
|
||||
miLoad = genCmdCast<MI_LOAD_REGISTER_IMM *>(*miLoadImm[gprMiLoadindex + 1]);
|
||||
EXPECT_EQ(CS_GPR_R15 + 4, miLoad->getRegisterOffset());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR15 + 4, miLoad->getRegisterOffset());
|
||||
EXPECT_EQ(high, miLoad->getDataDword());
|
||||
|
||||
for (auto i = 0u; i < numCommandLists; i++) {
|
||||
|
||||
@@ -101,12 +101,12 @@ HWTEST2_F(CommandListCreate, givenNotCopyCommandListWhenProfilingEventBeforeComm
|
||||
auto itor = find<MI_LOAD_REGISTER_REG *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
auto cmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
|
||||
EXPECT_EQ(cmd->getSourceRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW);
|
||||
EXPECT_EQ(cmd->getSourceRegisterAddress(), RegisterOffsets::globalTimestampLdw);
|
||||
itor++;
|
||||
itor = find<MI_LOAD_REGISTER_REG *>(itor, cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
cmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
|
||||
EXPECT_EQ(cmd->getSourceRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
|
||||
EXPECT_EQ(cmd->getSourceRegisterAddress(), RegisterOffsets::gpThreadTimeRegAddressOffsetLow);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListCreate, givenNotCopyCommandListWhenProfilingEventAfterCommandThenPipeControlAndStoreRegMemAdded, IsXeHpgCore) {
|
||||
@@ -140,12 +140,12 @@ HWTEST2_F(CommandListCreate, givenNotCopyCommandListWhenProfilingEventAfterComma
|
||||
itor = find<MI_LOAD_REGISTER_REG *>(itor, cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
auto cmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
|
||||
EXPECT_EQ(cmd->getSourceRegisterAddress(), REG_GLOBAL_TIMESTAMP_LDW);
|
||||
EXPECT_EQ(cmd->getSourceRegisterAddress(), RegisterOffsets::globalTimestampLdw);
|
||||
itor++;
|
||||
itor = find<MI_LOAD_REGISTER_REG *>(itor, cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), itor);
|
||||
cmd = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
|
||||
EXPECT_EQ(cmd->getSourceRegisterAddress(), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
|
||||
EXPECT_EQ(cmd->getSourceRegisterAddress(), RegisterOffsets::gpThreadTimeRegAddressOffsetLow);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListCreate, givenCopyCommandListWhenProfilingEventThenStoreRegCommandIsAdded, IsXeHpgCore) {
|
||||
|
||||
@@ -691,7 +691,7 @@ ze_result_t DebugSessionWindows::readSbaBuffer(EuThread::ThreadId threadId, NEO:
|
||||
void DebugSessionWindows::getSbaBufferGpuVa(uint64_t &gpuVa) {
|
||||
KM_ESCAPE_INFO escapeInfo = {};
|
||||
escapeInfo.KmEuDbgL0EscapeInfo.EscapeActionType = DBGUMD_ACTION_READ_MMIO;
|
||||
escapeInfo.KmEuDbgL0EscapeInfo.MmioReadParams.MmioOffset = CS_GPR_R15;
|
||||
escapeInfo.KmEuDbgL0EscapeInfo.MmioReadParams.MmioOffset = RegisterOffsets::csGprR15;
|
||||
escapeInfo.KmEuDbgL0EscapeInfo.MmioReadParams.RegisterOutBufferPtr = reinterpret_cast<uint64_t>(&gpuVa);
|
||||
|
||||
auto status = runEscape(escapeInfo);
|
||||
|
||||
@@ -711,14 +711,14 @@ void CommandQueueHw<GfxFamily>::processDispatchForMarkerWithTimestampPacket(Comm
|
||||
auto timestampContextStartGpuAddress = TimestampPacketHelper::getContextStartGpuAddress(*currentTimestampPacketNode);
|
||||
auto timestampGlobalStartAddress = TimestampPacketHelper::getGlobalStartGpuAddress(*currentTimestampPacketNode);
|
||||
|
||||
EncodeStoreMMIO<GfxFamily>::encode(*commandStream, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, timestampContextStartGpuAddress, false);
|
||||
EncodeStoreMMIO<GfxFamily>::encode(*commandStream, REG_GLOBAL_TIMESTAMP_LDW, timestampGlobalStartAddress, false);
|
||||
EncodeStoreMMIO<GfxFamily>::encode(*commandStream, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, timestampContextStartGpuAddress, false);
|
||||
EncodeStoreMMIO<GfxFamily>::encode(*commandStream, RegisterOffsets::globalTimestampLdw, timestampGlobalStartAddress, false);
|
||||
|
||||
auto timestampContextEndGpuAddress = TimestampPacketHelper::getContextEndGpuAddress(*currentTimestampPacketNode);
|
||||
auto timestampGlobalEndAddress = TimestampPacketHelper::getGlobalEndGpuAddress(*currentTimestampPacketNode);
|
||||
|
||||
EncodeStoreMMIO<GfxFamily>::encode(*commandStream, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, timestampContextEndGpuAddress, false);
|
||||
EncodeStoreMMIO<GfxFamily>::encode(*commandStream, REG_GLOBAL_TIMESTAMP_LDW, timestampGlobalEndAddress, false);
|
||||
EncodeStoreMMIO<GfxFamily>::encode(*commandStream, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, timestampContextEndGpuAddress, false);
|
||||
EncodeStoreMMIO<GfxFamily>::encode(*commandStream, RegisterOffsets::globalTimestampLdw, timestampGlobalEndAddress, false);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
||||
@@ -38,23 +38,23 @@ void GpgpuWalkerHelper<GfxFamily>::addAluReadModifyWriteRegister(
|
||||
auto pCmd = pCommandStream->getSpaceForCmd<MI_LOAD_REGISTER_REG>();
|
||||
MI_LOAD_REGISTER_REG cmdReg = GfxFamily::cmdInitLoadRegisterReg;
|
||||
cmdReg.setSourceRegisterAddress(aluRegister);
|
||||
cmdReg.setDestinationRegisterAddress(CS_GPR_R0);
|
||||
cmdReg.setDestinationRegisterAddress(RegisterOffsets::csGprR0);
|
||||
*pCmd = cmdReg;
|
||||
|
||||
// Load "Mask" into CS_GPR_R1
|
||||
LriHelper<GfxFamily>::program(pCommandStream,
|
||||
CS_GPR_R1,
|
||||
RegisterOffsets::csGprR1,
|
||||
mask,
|
||||
false);
|
||||
|
||||
// Add instruction MI_MATH with 4 MI_MATH_ALU_INST_INLINE operands
|
||||
auto pCmd3 = reinterpret_cast<uint32_t *>(pCommandStream->getSpace(sizeof(MI_MATH) + NUM_ALU_INST_FOR_READ_MODIFY_WRITE * sizeof(MI_MATH_ALU_INST_INLINE)));
|
||||
auto pCmd3 = reinterpret_cast<uint32_t *>(pCommandStream->getSpace(sizeof(MI_MATH) + RegisterConstants::numAluInstForReadModifyWrite * sizeof(MI_MATH_ALU_INST_INLINE)));
|
||||
MI_MATH mathCmd;
|
||||
mathCmd.DW0.Value = 0x0;
|
||||
mathCmd.DW0.BitField.InstructionType = MI_MATH::COMMAND_TYPE_MI_COMMAND;
|
||||
mathCmd.DW0.BitField.InstructionOpcode = MI_MATH::MI_COMMAND_OPCODE_MI_MATH;
|
||||
// 0x3 - 5 Dwords length cmd (-2): 1 for MI_MATH, 4 for MI_MATH_ALU_INST_INLINE
|
||||
mathCmd.DW0.BitField.DwordLength = NUM_ALU_INST_FOR_READ_MODIFY_WRITE - 1;
|
||||
mathCmd.DW0.BitField.DwordLength = RegisterConstants::numAluInstForReadModifyWrite - 1;
|
||||
*reinterpret_cast<MI_MATH *>(pCmd3) = mathCmd;
|
||||
|
||||
pCmd3++;
|
||||
@@ -104,7 +104,7 @@ void GpgpuWalkerHelper<GfxFamily>::addAluReadModifyWriteRegister(
|
||||
// LOAD value of CS_GPR_R0 into "Register"
|
||||
auto pCmd4 = pCommandStream->getSpaceForCmd<MI_LOAD_REGISTER_REG>();
|
||||
cmdReg = GfxFamily::cmdInitLoadRegisterReg;
|
||||
cmdReg.setSourceRegisterAddress(CS_GPR_R0);
|
||||
cmdReg.setSourceRegisterAddress(RegisterOffsets::csGprR0);
|
||||
cmdReg.setDestinationRegisterAddress(aluRegister);
|
||||
*pCmd4 = cmdReg;
|
||||
|
||||
|
||||
@@ -131,7 +131,7 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchProfilingCommandsStart(
|
||||
auto pMICmdLow = commandStream->getSpaceForCmd<MI_STORE_REGISTER_MEM>();
|
||||
MI_STORE_REGISTER_MEM cmd = GfxFamily::cmdInitStoreRegisterMem;
|
||||
adjustMiStoreRegMemMode(&cmd);
|
||||
cmd.setRegisterAddress(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
|
||||
cmd.setRegisterAddress(RegisterOffsets::gpThreadTimeRegAddressOffsetLow);
|
||||
cmd.setMemoryAddress(timeStampAddress);
|
||||
*pMICmdLow = cmd;
|
||||
}
|
||||
@@ -164,7 +164,7 @@ void GpgpuWalkerHelper<GfxFamily>::dispatchProfilingCommandsEnd(
|
||||
auto pMICmdLow = commandStream->getSpaceForCmd<MI_STORE_REGISTER_MEM>();
|
||||
MI_STORE_REGISTER_MEM cmd = GfxFamily::cmdInitStoreRegisterMem;
|
||||
adjustMiStoreRegMemMode(&cmd);
|
||||
cmd.setRegisterAddress(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
|
||||
cmd.setRegisterAddress(RegisterOffsets::gpThreadTimeRegAddressOffsetLow);
|
||||
cmd.setMemoryAddress(timeStampAddress);
|
||||
*pMICmdLow = cmd;
|
||||
}
|
||||
|
||||
@@ -20,7 +20,7 @@ void GpgpuWalkerHelper<Family>::applyWADisableLSQCROPERFforOCL(NEO::LinearStream
|
||||
if (disablePerfMode) {
|
||||
if (kernel.getKernelInfo().kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages) {
|
||||
// Set bit L3SQC_BIT_LQSC_RO_PERF_DIS in L3SQC_REG4
|
||||
GpgpuWalkerHelper<Family>::addAluReadModifyWriteRegister(pCommandStream, L3SQC_REG4, AluRegisters::OPCODE_OR, L3SQC_BIT_LQSC_RO_PERF_DIS);
|
||||
GpgpuWalkerHelper<Family>::addAluReadModifyWriteRegister(pCommandStream, RegisterOffsets::l3sqcReg4, AluRegisters::OPCODE_OR, RegisterConstants::l3SqcBitLqscR0PerfDis);
|
||||
}
|
||||
} else {
|
||||
if (kernel.getKernelInfo().kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages) {
|
||||
@@ -31,7 +31,7 @@ void GpgpuWalkerHelper<Family>::applyWADisableLSQCROPERFforOCL(NEO::LinearStream
|
||||
pipeControl.setCommandStreamerStallEnable(true);
|
||||
*pipeControlSpace = pipeControl;
|
||||
// Clear bit L3SQC_BIT_LQSC_RO_PERF_DIS in L3SQC_REG4
|
||||
GpgpuWalkerHelper<Family>::addAluReadModifyWriteRegister(pCommandStream, L3SQC_REG4, AluRegisters::OPCODE_AND, ~L3SQC_BIT_LQSC_RO_PERF_DIS);
|
||||
GpgpuWalkerHelper<Family>::addAluReadModifyWriteRegister(pCommandStream, RegisterOffsets::l3sqcReg4, AluRegisters::OPCODE_AND, ~RegisterConstants::l3SqcBitLqscR0PerfDis);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -50,7 +50,7 @@ size_t GpgpuWalkerHelper<Family>::getSizeForWADisableLSQCROPERFforOCL(const Kern
|
||||
sizeof(MI_LOAD_REGISTER_IMM) +
|
||||
sizeof(PIPE_CONTROL) +
|
||||
sizeof(MI_MATH) +
|
||||
NUM_ALU_INST_FOR_READ_MODIFY_WRITE * sizeof(MI_MATH_ALU_INST_INLINE)) *
|
||||
RegisterConstants::numAluInstForReadModifyWrite * sizeof(MI_MATH_ALU_INST_INLINE)) *
|
||||
2; // For 2 WADisableLSQCROPERFforOCL WAs
|
||||
}
|
||||
return n;
|
||||
|
||||
@@ -19,7 +19,7 @@ void GpgpuWalkerHelper<Family>::applyWADisableLSQCROPERFforOCL(NEO::LinearStream
|
||||
if (disablePerfMode) {
|
||||
if (kernel.getKernelInfo().kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages) {
|
||||
// Set bit L3SQC_BIT_LQSC_RO_PERF_DIS in L3SQC_REG4
|
||||
GpgpuWalkerHelper<Family>::addAluReadModifyWriteRegister(pCommandStream, L3SQC_REG4, AluRegisters::OPCODE_OR, L3SQC_BIT_LQSC_RO_PERF_DIS);
|
||||
GpgpuWalkerHelper<Family>::addAluReadModifyWriteRegister(pCommandStream, RegisterOffsets::l3sqcReg4, AluRegisters::OPCODE_OR, RegisterConstants::l3SqcBitLqscR0PerfDis);
|
||||
}
|
||||
} else {
|
||||
if (kernel.getKernelInfo().kernelDescriptor.kernelAttributes.flags.usesFencesForReadWriteImages) {
|
||||
@@ -31,7 +31,7 @@ void GpgpuWalkerHelper<Family>::applyWADisableLSQCROPERFforOCL(NEO::LinearStream
|
||||
*pipeControlSpace = pipeControl;
|
||||
|
||||
// Clear bit L3SQC_BIT_LQSC_RO_PERF_DIS in L3SQC_REG4
|
||||
GpgpuWalkerHelper<Family>::addAluReadModifyWriteRegister(pCommandStream, L3SQC_REG4, AluRegisters::OPCODE_AND, ~L3SQC_BIT_LQSC_RO_PERF_DIS);
|
||||
GpgpuWalkerHelper<Family>::addAluReadModifyWriteRegister(pCommandStream, RegisterOffsets::l3sqcReg4, AluRegisters::OPCODE_AND, ~RegisterConstants::l3SqcBitLqscR0PerfDis);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -50,7 +50,7 @@ size_t GpgpuWalkerHelper<Family>::getSizeForWADisableLSQCROPERFforOCL(const Kern
|
||||
sizeof(MI_LOAD_REGISTER_IMM) +
|
||||
sizeof(PIPE_CONTROL) +
|
||||
sizeof(MI_MATH) +
|
||||
NUM_ALU_INST_FOR_READ_MODIFY_WRITE * sizeof(MI_MATH_ALU_INST_INLINE)) *
|
||||
RegisterConstants::numAluInstForReadModifyWrite * sizeof(MI_MATH_ALU_INST_INLINE)) *
|
||||
2; // For 2 WADisableLSQCROPERFforOCL WAs
|
||||
}
|
||||
return n;
|
||||
|
||||
@@ -148,8 +148,8 @@ HWTEST2_F(MiMath, givenLoadIndirectFromMemoryWhenUseMiMathToSimpleOperationThenS
|
||||
uint32_t valueToAdd = 5u;
|
||||
uint64_t valueAfterMiMathOperation = bufferMemory[0] + valueToAdd;
|
||||
|
||||
loadAddressToRegisters<FamilyType>(CS_GPR_R0, CS_GPR_R1, CS_GPR_R2, allocation->getGpuAddress()); // prepare registers to mi_math operation
|
||||
loadValueToRegister<FamilyType>(valueToAdd, CS_GPR_R3);
|
||||
loadAddressToRegisters<FamilyType>(RegisterOffsets::csGprR0, RegisterOffsets::csGprR1, RegisterOffsets::csGprR2, allocation->getGpuAddress()); // prepare registers to mi_math operation
|
||||
loadValueToRegister<FamilyType>(valueToAdd, RegisterOffsets::csGprR3);
|
||||
|
||||
auto pCmd = reinterpret_cast<uint32_t *>(taskStream->getSpace(sizeof(MI_MATH)));
|
||||
reinterpret_cast<MI_MATH *>(pCmd)->DW0.Value = 0x0;
|
||||
@@ -236,8 +236,8 @@ HWTEST2_F(MiMath, givenLoadIndirectFromMemoryWhenUseMiMathThenStoreIndirectToAno
|
||||
csr->makeResident(*buffer->getGraphicsAllocation(rootDeviceIndex));
|
||||
csr->makeResident(*bufferB->getGraphicsAllocation(rootDeviceIndex));
|
||||
|
||||
loadAddressToRegisters<FamilyType>(CS_GPR_R0, CS_GPR_R1, CS_GPR_R2, buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress()); // prepare registers to mi_math operation
|
||||
loadAddressToRegisters<FamilyType>(CS_GPR_R3, CS_GPR_R4, CS_GPR_R2, bufferB->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress()); // prepare registers to mi_math operation
|
||||
loadAddressToRegisters<FamilyType>(RegisterOffsets::csGprR0, RegisterOffsets::csGprR1, RegisterOffsets::csGprR2, buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress()); // prepare registers to mi_math operation
|
||||
loadAddressToRegisters<FamilyType>(RegisterOffsets::csGprR3, RegisterOffsets::csGprR4, RegisterOffsets::csGprR2, bufferB->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress()); // prepare registers to mi_math operation
|
||||
|
||||
auto pCmd = reinterpret_cast<uint32_t *>(taskStream->getSpace(sizeof(MI_MATH)));
|
||||
reinterpret_cast<MI_MATH *>(pCmd)->DW0.Value = 0x0;
|
||||
@@ -299,9 +299,9 @@ HWTEST2_F(MiMath, givenValueToMakeLeftLogicalShiftWhenUseMiMathThenShiftIsDonePr
|
||||
uint32_t notPowerOfTwoShift = 5u;
|
||||
uint32_t expectedUsedShift = 4u;
|
||||
|
||||
loadValueToRegister<FamilyType>(value, CS_GPR_R0);
|
||||
loadValueToRegister<FamilyType>(shift, CS_GPR_R1);
|
||||
loadValueToRegister<FamilyType>(notPowerOfTwoShift, CS_GPR_R2);
|
||||
loadValueToRegister<FamilyType>(value, RegisterOffsets::csGprR0);
|
||||
loadValueToRegister<FamilyType>(shift, RegisterOffsets::csGprR1);
|
||||
loadValueToRegister<FamilyType>(notPowerOfTwoShift, RegisterOffsets::csGprR2);
|
||||
auto pCmd = reinterpret_cast<uint32_t *>(taskStream->getSpace(sizeof(MI_MATH)));
|
||||
reinterpret_cast<MI_MATH *>(pCmd)->DW0.Value = 0x0;
|
||||
reinterpret_cast<MI_MATH *>(pCmd)->DW0.BitField.InstructionType = MI_MATH::COMMAND_TYPE_MI_COMMAND;
|
||||
@@ -337,8 +337,8 @@ HWTEST2_F(MiMath, givenValueToMakeLeftLogicalShiftWhenUseMiMathThenShiftIsDonePr
|
||||
pAluParam->DW0.BitField.Operand1 = static_cast<uint32_t>(AluRegisters::R_2);
|
||||
pAluParam->DW0.BitField.Operand2 = static_cast<uint32_t>(AluRegisters::R_ACCU);
|
||||
|
||||
storeValueInRegisterToMemory<FamilyType>(buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(), CS_GPR_R1);
|
||||
storeValueInRegisterToMemory<FamilyType>(buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress() + 4, CS_GPR_R2);
|
||||
storeValueInRegisterToMemory<FamilyType>(buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress(), RegisterOffsets::csGprR1);
|
||||
storeValueInRegisterToMemory<FamilyType>(buffer->getGraphicsAllocation(rootDeviceIndex)->getGpuAddress() + 4, RegisterOffsets::csGprR2);
|
||||
flushStream();
|
||||
|
||||
uint32_t firstShift = value << shift;
|
||||
@@ -370,9 +370,9 @@ HWTEST2_F(MiMath, givenValueToMakeRightLogicalShiftWhenUseMiMathThenShiftIsDoneP
|
||||
uint32_t notPowerOfTwoShift = 5u;
|
||||
uint32_t expectedUsedShift = 4u;
|
||||
|
||||
loadValueToRegister<FamilyType>(value, CS_GPR_R0);
|
||||
loadValueToRegister<FamilyType>(shift, CS_GPR_R1);
|
||||
loadValueToRegister<FamilyType>(notPowerOfTwoShift, CS_GPR_R2);
|
||||
loadValueToRegister<FamilyType>(value, RegisterOffsets::csGprR0);
|
||||
loadValueToRegister<FamilyType>(shift, RegisterOffsets::csGprR1);
|
||||
loadValueToRegister<FamilyType>(notPowerOfTwoShift, RegisterOffsets::csGprR2);
|
||||
auto pCmd = reinterpret_cast<uint32_t *>(taskStream->getSpace(sizeof(MI_MATH)));
|
||||
reinterpret_cast<MI_MATH *>(pCmd)->DW0.Value = 0x0;
|
||||
reinterpret_cast<MI_MATH *>(pCmd)->DW0.BitField.InstructionType = MI_MATH::COMMAND_TYPE_MI_COMMAND;
|
||||
@@ -408,8 +408,8 @@ HWTEST2_F(MiMath, givenValueToMakeRightLogicalShiftWhenUseMiMathThenShiftIsDoneP
|
||||
pAluParam->DW0.BitField.Operand1 = static_cast<uint32_t>(AluRegisters::R_2);
|
||||
pAluParam->DW0.BitField.Operand2 = static_cast<uint32_t>(AluRegisters::R_ACCU);
|
||||
|
||||
storeValueInRegisterToMemory<FamilyType>(allocation->getGpuAddress(), CS_GPR_R1);
|
||||
storeValueInRegisterToMemory<FamilyType>(allocation->getGpuAddress() + 4, CS_GPR_R2);
|
||||
storeValueInRegisterToMemory<FamilyType>(allocation->getGpuAddress(), RegisterOffsets::csGprR1);
|
||||
storeValueInRegisterToMemory<FamilyType>(allocation->getGpuAddress() + 4, RegisterOffsets::csGprR2);
|
||||
flushStream();
|
||||
|
||||
uint32_t firstShift = value >> shift;
|
||||
@@ -441,9 +441,9 @@ HWTEST2_F(MiMath, givenValueToMakeRightAritmeticShiftWhenUseMiMathThenShiftIsDon
|
||||
uint32_t notPowerOfTwoShift = 5u;
|
||||
uint32_t expectedUsedShift = 4u;
|
||||
|
||||
loadAddressToRegisters<FamilyType>(CS_GPR_R0, CS_GPR_R1, CS_GPR_R2, allocation->getGpuAddress()); // prepare registers to mi_math operation
|
||||
loadValueToRegister<FamilyType>(shift, CS_GPR_R4);
|
||||
loadValueToRegister<FamilyType>(notPowerOfTwoShift, CS_GPR_R5);
|
||||
loadAddressToRegisters<FamilyType>(RegisterOffsets::csGprR0, RegisterOffsets::csGprR1, RegisterOffsets::csGprR2, allocation->getGpuAddress()); // prepare registers to mi_math operation
|
||||
loadValueToRegister<FamilyType>(shift, RegisterOffsets::csGprR4);
|
||||
loadValueToRegister<FamilyType>(notPowerOfTwoShift, RegisterOffsets::csGprR5);
|
||||
|
||||
auto pCmd = reinterpret_cast<uint32_t *>(taskStream->getSpace(sizeof(MI_MATH)));
|
||||
reinterpret_cast<MI_MATH *>(pCmd)->DW0.Value = 0x0;
|
||||
@@ -488,8 +488,8 @@ HWTEST2_F(MiMath, givenValueToMakeRightAritmeticShiftWhenUseMiMathThenShiftIsDon
|
||||
pAluParam->DW0.BitField.Operand1 = static_cast<uint32_t>(AluRegisters::R_5);
|
||||
pAluParam->DW0.BitField.Operand2 = static_cast<uint32_t>(AluRegisters::R_ACCU);
|
||||
|
||||
storeValueInRegisterToMemory<FamilyType>(allocation->getGpuAddress(), CS_GPR_R4);
|
||||
storeValueInRegisterToMemory<FamilyType>(allocation->getGpuAddress() + 4, CS_GPR_R5);
|
||||
storeValueInRegisterToMemory<FamilyType>(allocation->getGpuAddress(), RegisterOffsets::csGprR4);
|
||||
storeValueInRegisterToMemory<FamilyType>(allocation->getGpuAddress() + 4, RegisterOffsets::csGprR5);
|
||||
flushStream();
|
||||
|
||||
int64_t firstShift = bufferMemory[0];
|
||||
|
||||
@@ -1161,12 +1161,12 @@ HWTEST2_F(RelaxedOrderingEnqueueKernelTests, givenEnqueueKernelWhenProgrammingDe
|
||||
EXPECT_TRUE(ultCsr.recordedDispatchFlags.hasRelaxedOrderingDependencies);
|
||||
|
||||
auto lrrCmd = reinterpret_cast<MI_LOAD_REGISTER_REG *>(ptrOffset(mockCmdQueueHw.getCS(0).getCpuBase(), cmdsOffset));
|
||||
EXPECT_EQ(lrrCmd->getSourceRegisterAddress(), CS_GPR_R4);
|
||||
EXPECT_EQ(lrrCmd->getDestinationRegisterAddress(), CS_GPR_R0);
|
||||
EXPECT_EQ(lrrCmd->getSourceRegisterAddress(), RegisterOffsets::csGprR4);
|
||||
EXPECT_EQ(lrrCmd->getDestinationRegisterAddress(), RegisterOffsets::csGprR0);
|
||||
|
||||
lrrCmd++;
|
||||
EXPECT_EQ(lrrCmd->getSourceRegisterAddress(), CS_GPR_R4 + 4);
|
||||
EXPECT_EQ(lrrCmd->getDestinationRegisterAddress(), CS_GPR_R0 + 4);
|
||||
EXPECT_EQ(lrrCmd->getSourceRegisterAddress(), RegisterOffsets::csGprR4 + 4);
|
||||
EXPECT_EQ(lrrCmd->getDestinationRegisterAddress(), RegisterOffsets::csGprR0 + 4);
|
||||
|
||||
auto eventNode = castToObject<Event>(outEvent)->getTimestampPacketNodes()->peekNodes()[0];
|
||||
auto compareAddress = eventNode->getGpuAddress() + eventNode->getContextEndOffset();
|
||||
@@ -1259,12 +1259,12 @@ HWTEST2_F(RelaxedOrderingEnqueueKernelTests, givenBarrierWithDependenciesWhenFlu
|
||||
EXPECT_TRUE(ultCsr.recordedDispatchFlags.hasRelaxedOrderingDependencies);
|
||||
|
||||
auto lrrCmd = reinterpret_cast<MI_LOAD_REGISTER_REG *>(ptrOffset(cmdStream.getCpuBase(), cmdsOffset));
|
||||
EXPECT_EQ(lrrCmd->getSourceRegisterAddress(), CS_GPR_R4);
|
||||
EXPECT_EQ(lrrCmd->getDestinationRegisterAddress(), CS_GPR_R0);
|
||||
EXPECT_EQ(lrrCmd->getSourceRegisterAddress(), RegisterOffsets::csGprR4);
|
||||
EXPECT_EQ(lrrCmd->getDestinationRegisterAddress(), RegisterOffsets::csGprR0);
|
||||
|
||||
lrrCmd++;
|
||||
EXPECT_EQ(lrrCmd->getSourceRegisterAddress(), CS_GPR_R4 + 4);
|
||||
EXPECT_EQ(lrrCmd->getDestinationRegisterAddress(), CS_GPR_R0 + 4);
|
||||
EXPECT_EQ(lrrCmd->getSourceRegisterAddress(), RegisterOffsets::csGprR4 + 4);
|
||||
EXPECT_EQ(lrrCmd->getDestinationRegisterAddress(), RegisterOffsets::csGprR0 + 4);
|
||||
|
||||
auto eventNode = castToObject<Event>(outEvent)->getTimestampPacketNodes()->peekNodes()[0];
|
||||
auto compareAddress = eventNode->getGpuAddress() + eventNode->getContextEndOffset();
|
||||
|
||||
@@ -1086,12 +1086,12 @@ HWTEST2_F(RelaxedOrderingBcsTests, givenDependenciesWhenFlushingThenProgramCorre
|
||||
EXPECT_TRUE(csr.latestFlushedBatchBuffer.hasRelaxedOrderingDependencies);
|
||||
|
||||
auto lrrCmd = reinterpret_cast<MI_LOAD_REGISTER_REG *>(ptrOffset(csr.commandStream.getCpuBase(), cmdsOffset));
|
||||
EXPECT_EQ(lrrCmd->getSourceRegisterAddress(), CS_GPR_R4);
|
||||
EXPECT_EQ(lrrCmd->getDestinationRegisterAddress(), CS_GPR_R0);
|
||||
EXPECT_EQ(lrrCmd->getSourceRegisterAddress(), RegisterOffsets::csGprR4);
|
||||
EXPECT_EQ(lrrCmd->getDestinationRegisterAddress(), RegisterOffsets::csGprR0);
|
||||
|
||||
lrrCmd++;
|
||||
EXPECT_EQ(lrrCmd->getSourceRegisterAddress(), CS_GPR_R4 + 4);
|
||||
EXPECT_EQ(lrrCmd->getDestinationRegisterAddress(), CS_GPR_R0 + 4);
|
||||
EXPECT_EQ(lrrCmd->getSourceRegisterAddress(), RegisterOffsets::csGprR4 + 4);
|
||||
EXPECT_EQ(lrrCmd->getDestinationRegisterAddress(), RegisterOffsets::csGprR0 + 4);
|
||||
|
||||
auto eventNode = timestamp.peekNodes()[0];
|
||||
auto compareAddress = eventNode->getGpuAddress() + eventNode->getContextEndOffset();
|
||||
|
||||
@@ -251,13 +251,13 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueWithProflingWhenWal
|
||||
auto pBeforeMI = genCmdCast<MI_STORE_REGISTER_MEM *>(*itorBeforeMI);
|
||||
pBeforeMI = genCmdCast<MI_STORE_REGISTER_MEM *>(*itorBeforeMI);
|
||||
ASSERT_NE(nullptr, pBeforeMI);
|
||||
EXPECT_EQ(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, pBeforeMI->getRegisterAddress());
|
||||
EXPECT_EQ(RegisterOffsets::gpThreadTimeRegAddressOffsetLow, pBeforeMI->getRegisterAddress());
|
||||
|
||||
auto itorAfterMI = find<MI_STORE_REGISTER_MEM *>(itorGPGPUWalkerCmd, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), itorAfterMI);
|
||||
auto pAfterMI = genCmdCast<MI_STORE_REGISTER_MEM *>(*itorAfterMI);
|
||||
ASSERT_NE(nullptr, pAfterMI);
|
||||
EXPECT_EQ(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, pAfterMI->getRegisterAddress());
|
||||
EXPECT_EQ(RegisterOffsets::gpThreadTimeRegAddressOffsetLow, pAfterMI->getRegisterAddress());
|
||||
++itorAfterMI;
|
||||
pAfterMI = genCmdCast<MI_STORE_REGISTER_MEM *>(*itorAfterMI);
|
||||
EXPECT_EQ(nullptr, pAfterMI);
|
||||
@@ -370,13 +370,13 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingTests, GivenCommandQueueBlockedWithProfilin
|
||||
auto pBeforeMI = genCmdCast<MI_STORE_REGISTER_MEM *>(*itorBeforeMI);
|
||||
pBeforeMI = genCmdCast<MI_STORE_REGISTER_MEM *>(*itorBeforeMI);
|
||||
ASSERT_NE(nullptr, pBeforeMI);
|
||||
EXPECT_EQ(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, pBeforeMI->getRegisterAddress());
|
||||
EXPECT_EQ(RegisterOffsets::gpThreadTimeRegAddressOffsetLow, pBeforeMI->getRegisterAddress());
|
||||
|
||||
auto itorAfterMI = find<MI_STORE_REGISTER_MEM *>(itorGPGPUWalkerCmd, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), itorAfterMI);
|
||||
auto pAfterMI = genCmdCast<MI_STORE_REGISTER_MEM *>(*itorAfterMI);
|
||||
ASSERT_NE(nullptr, pAfterMI);
|
||||
EXPECT_EQ(GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, pAfterMI->getRegisterAddress());
|
||||
EXPECT_EQ(RegisterOffsets::gpThreadTimeRegAddressOffsetLow, pAfterMI->getRegisterAddress());
|
||||
++itorAfterMI;
|
||||
EXPECT_EQ(itorAfterMI, cmdList.end());
|
||||
clReleaseEvent(event);
|
||||
@@ -1050,10 +1050,10 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingWithPerfCountersTests, GivenCommandQueueWit
|
||||
auto &cmdList = parse.cmdList;
|
||||
parse.parseCommands<FamilyType>(*pCmdQ);
|
||||
|
||||
auto itor = expectStoreRegister<FamilyType>(cmdList, cmdList.begin(), timeStampGpuAddress + offsetof(HwTimeStamps, contextStartTS), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
|
||||
auto itor = expectStoreRegister<FamilyType>(cmdList, cmdList.begin(), timeStampGpuAddress + offsetof(HwTimeStamps, contextStartTS), RegisterOffsets::gpThreadTimeRegAddressOffsetLow);
|
||||
// after WALKER:
|
||||
|
||||
itor = expectStoreRegister<FamilyType>(cmdList, itor, timeStampGpuAddress + offsetof(HwTimeStamps, contextEndTS), GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW);
|
||||
itor = expectStoreRegister<FamilyType>(cmdList, itor, timeStampGpuAddress + offsetof(HwTimeStamps, contextEndTS), RegisterOffsets::gpThreadTimeRegAddressOffsetLow);
|
||||
|
||||
EXPECT_TRUE(pEvent->calcProfilingData());
|
||||
|
||||
|
||||
@@ -189,7 +189,7 @@ template <typename GfxFamily>
|
||||
struct EncodeMath {
|
||||
using MI_MATH_ALU_INST_INLINE = typename GfxFamily::MI_MATH_ALU_INST_INLINE;
|
||||
using MI_MATH = typename GfxFamily::MI_MATH;
|
||||
constexpr static size_t streamCommandSize = sizeof(MI_MATH) + sizeof(MI_MATH_ALU_INST_INLINE) * NUM_ALU_INST_FOR_READ_MODIFY_WRITE;
|
||||
constexpr static size_t streamCommandSize = sizeof(MI_MATH) + sizeof(MI_MATH_ALU_INST_INLINE) * RegisterConstants::numAluInstForReadModifyWrite;
|
||||
|
||||
static uint32_t *commandReserve(CommandContainer &container);
|
||||
static uint32_t *commandReserve(LinearStream &cmdStream);
|
||||
|
||||
@@ -105,22 +105,22 @@ void EncodeMathMMIO<Family>::encodeMulRegVal(CommandContainer &container, uint32
|
||||
logLws++;
|
||||
}
|
||||
|
||||
EncodeSetMMIO<Family>::encodeREG(container, CS_GPR_R0, offset);
|
||||
EncodeSetMMIO<Family>::encodeIMM(container, CS_GPR_R1, 0, true);
|
||||
EncodeSetMMIO<Family>::encodeREG(container, RegisterOffsets::csGprR0, offset);
|
||||
EncodeSetMMIO<Family>::encodeIMM(container, RegisterOffsets::csGprR1, 0, true);
|
||||
|
||||
i = 0;
|
||||
while (i < logLws) {
|
||||
if (val & (1 << i)) {
|
||||
EncodeMath<Family>::addition(container, AluRegisters::R_1,
|
||||
AluRegisters::R_0, AluRegisters::R_2);
|
||||
EncodeSetMMIO<Family>::encodeREG(container, CS_GPR_R1, CS_GPR_R2);
|
||||
EncodeSetMMIO<Family>::encodeREG(container, RegisterOffsets::csGprR1, RegisterOffsets::csGprR2);
|
||||
}
|
||||
EncodeMath<Family>::addition(container, AluRegisters::R_0,
|
||||
AluRegisters::R_0, AluRegisters::R_2);
|
||||
EncodeSetMMIO<Family>::encodeREG(container, CS_GPR_R0, CS_GPR_R2);
|
||||
EncodeSetMMIO<Family>::encodeREG(container, RegisterOffsets::csGprR0, RegisterOffsets::csGprR2);
|
||||
i++;
|
||||
}
|
||||
EncodeStoreMMIO<Family>::encode(*container.getCommandStream(), CS_GPR_R1, dstAddress, false);
|
||||
EncodeStoreMMIO<Family>::encode(*container.getCommandStream(), RegisterOffsets::csGprR1, dstAddress, false);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -133,14 +133,14 @@ void EncodeMathMMIO<Family>::encodeMulRegVal(CommandContainer &container, uint32
|
||||
*/
|
||||
template <typename Family>
|
||||
void EncodeMathMMIO<Family>::encodeGreaterThanPredicate(CommandContainer &container, uint64_t firstOperand, uint32_t secondOperand) {
|
||||
EncodeSetMMIO<Family>::encodeMEM(container, CS_GPR_R0, firstOperand);
|
||||
EncodeSetMMIO<Family>::encodeIMM(container, CS_GPR_R1, secondOperand, true);
|
||||
EncodeSetMMIO<Family>::encodeMEM(container, RegisterOffsets::csGprR0, firstOperand);
|
||||
EncodeSetMMIO<Family>::encodeIMM(container, RegisterOffsets::csGprR1, secondOperand, true);
|
||||
|
||||
/* CS_GPR_R* registers map to AluRegisters::R_* registers */
|
||||
/* RegisterOffsets::csGprR* registers map to AluRegisters::R_* registers */
|
||||
EncodeMath<Family>::greaterThan(container, AluRegisters::R_0,
|
||||
AluRegisters::R_1, AluRegisters::R_2);
|
||||
|
||||
EncodeSetMMIO<Family>::encodeREG(container, CS_PREDICATE_RESULT, CS_GPR_R2);
|
||||
EncodeSetMMIO<Family>::encodeREG(container, RegisterOffsets::csPredicateResult, RegisterOffsets::csGprR2);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -150,13 +150,13 @@ void EncodeMathMMIO<Family>::encodeGreaterThanPredicate(CommandContainer &contai
|
||||
template <typename Family>
|
||||
void EncodeMathMMIO<Family>::encodeBitwiseAndVal(CommandContainer &container, uint32_t regOffset, uint32_t immVal, uint64_t dstAddress,
|
||||
bool workloadPartition) {
|
||||
EncodeSetMMIO<Family>::encodeREG(container, CS_GPR_R13, regOffset);
|
||||
EncodeSetMMIO<Family>::encodeIMM(container, CS_GPR_R14, immVal, true);
|
||||
EncodeSetMMIO<Family>::encodeREG(container, RegisterOffsets::csGprR13, regOffset);
|
||||
EncodeSetMMIO<Family>::encodeIMM(container, RegisterOffsets::csGprR14, immVal, true);
|
||||
EncodeMath<Family>::bitwiseAnd(container, AluRegisters::R_13,
|
||||
AluRegisters::R_14,
|
||||
AluRegisters::R_15);
|
||||
EncodeStoreMMIO<Family>::encode(*container.getCommandStream(),
|
||||
CS_GPR_R15, dstAddress, workloadPartition);
|
||||
RegisterOffsets::csGprR15, dstAddress, workloadPartition);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -211,14 +211,14 @@ uint32_t *EncodeMath<Family>::commandReserve(CommandContainer &container) {
|
||||
|
||||
template <typename Family>
|
||||
uint32_t *EncodeMath<Family>::commandReserve(LinearStream &cmdStream) {
|
||||
size_t size = sizeof(MI_MATH) + sizeof(MI_MATH_ALU_INST_INLINE) * NUM_ALU_INST_FOR_READ_MODIFY_WRITE;
|
||||
size_t size = sizeof(MI_MATH) + sizeof(MI_MATH_ALU_INST_INLINE) * RegisterConstants::numAluInstForReadModifyWrite;
|
||||
|
||||
auto cmd = reinterpret_cast<uint32_t *>(cmdStream.getSpace(size));
|
||||
MI_MATH mathBuffer;
|
||||
mathBuffer.DW0.Value = 0x0;
|
||||
mathBuffer.DW0.BitField.InstructionType = MI_MATH::COMMAND_TYPE_MI_COMMAND;
|
||||
mathBuffer.DW0.BitField.InstructionOpcode = MI_MATH::MI_COMMAND_OPCODE_MI_MATH;
|
||||
mathBuffer.DW0.BitField.DwordLength = NUM_ALU_INST_FOR_READ_MODIFY_WRITE - 1;
|
||||
mathBuffer.DW0.BitField.DwordLength = RegisterConstants::numAluInstForReadModifyWrite - 1;
|
||||
*reinterpret_cast<MI_MATH *>(cmd) = mathBuffer;
|
||||
cmd++;
|
||||
|
||||
@@ -249,8 +249,8 @@ void EncodeMathMMIO<Family>::encodeAluAnd(MI_MATH_ALU_INST_INLINE *pAluParam,
|
||||
|
||||
template <typename Family>
|
||||
void EncodeMathMMIO<Family>::encodeIncrementOrDecrement(LinearStream &cmdStream, AluRegisters operandRegister, IncrementOrDecrementOperation operationType) {
|
||||
LriHelper<Family>::program(&cmdStream, CS_GPR_R7, 1, true);
|
||||
LriHelper<Family>::program(&cmdStream, CS_GPR_R7 + 4, 0, true);
|
||||
LriHelper<Family>::program(&cmdStream, RegisterOffsets::csGprR7, 1, true);
|
||||
LriHelper<Family>::program(&cmdStream, RegisterOffsets::csGprR7 + 4, 0, true);
|
||||
|
||||
EncodeAluHelper<Family, 4> aluHelper;
|
||||
aluHelper.setNextAlu(AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, operandRegister);
|
||||
@@ -605,7 +605,7 @@ void EncodeIndirectParams<Family>::setGroupCountIndirect(CommandContainer &conta
|
||||
if (NEO::isUndefinedOffset(offsets[i])) {
|
||||
continue;
|
||||
}
|
||||
EncodeStoreMMIO<Family>::encode(*container.getCommandStream(), GPUGPU_DISPATCHDIM[i], ptrOffset(crossThreadAddress, offsets[i]), false);
|
||||
EncodeStoreMMIO<Family>::encode(*container.getCommandStream(), RegisterOffsets::gpgpuDispatchDim[i], ptrOffset(crossThreadAddress, offsets[i]), false);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -613,7 +613,7 @@ template <typename Family>
|
||||
void EncodeIndirectParams<Family>::setWorkDimIndirect(CommandContainer &container, const NEO::CrossThreadDataOffset workDimOffset, uint64_t crossThreadAddress, const uint32_t *groupSize) {
|
||||
if (NEO::isValidOffset(workDimOffset)) {
|
||||
auto dstPtr = ptrOffset(crossThreadAddress, workDimOffset);
|
||||
constexpr uint32_t resultRegister = CS_GPR_R0;
|
||||
constexpr uint32_t resultRegister = RegisterOffsets::csGprR0;
|
||||
constexpr AluRegisters resultAluRegister = AluRegisters::R_0;
|
||||
const uint32_t offset = static_cast<uint32_t>((1ull << 8 * (dstPtr & 0b11)) - 1);
|
||||
const uint32_t memoryMask = std::numeric_limits<uint32_t>::max() - static_cast<uint32_t>((1ull << 8 * ((dstPtr & 0b11) + 1)) - 1) + offset;
|
||||
@@ -628,13 +628,13 @@ void EncodeIndirectParams<Family>::setWorkDimIndirect(CommandContainer &containe
|
||||
EncodeSetMMIO<Family>::encodeIMM(container, resultRegister, 3 << (8 * (dstPtr & 0b11)), true);
|
||||
} else {
|
||||
|
||||
constexpr uint32_t groupCount2Register = CS_GPR_R1;
|
||||
constexpr uint32_t groupCount2Register = RegisterOffsets::csGprR1;
|
||||
constexpr AluRegisters groupCount2AluRegister = AluRegisters::R_1;
|
||||
|
||||
constexpr uint32_t groupSize1Register = CS_GPR_R0;
|
||||
constexpr uint32_t groupSize1Register = RegisterOffsets::csGprR0;
|
||||
constexpr AluRegisters groupSize1AluRegister = AluRegisters::R_0;
|
||||
|
||||
constexpr uint32_t groupCount1Register = CS_GPR_R1;
|
||||
constexpr uint32_t groupCount1Register = RegisterOffsets::csGprR1;
|
||||
constexpr AluRegisters groupCount1AluRegister = AluRegisters::R_1;
|
||||
|
||||
constexpr AluRegisters sumAluRegister = AluRegisters::R_0;
|
||||
@@ -643,18 +643,18 @@ void EncodeIndirectParams<Family>::setWorkDimIndirect(CommandContainer &containe
|
||||
|
||||
constexpr AluRegisters workDimGe2AluRegister = AluRegisters::R_4;
|
||||
|
||||
constexpr uint32_t constantOneRegister = CS_GPR_R5;
|
||||
constexpr uint32_t constantOneRegister = RegisterOffsets::csGprR5;
|
||||
constexpr AluRegisters constantOneAluRegister = AluRegisters::R_5;
|
||||
constexpr uint32_t constantTwoRegister = CS_GPR_R6;
|
||||
constexpr uint32_t constantTwoRegister = RegisterOffsets::csGprR6;
|
||||
constexpr AluRegisters constantTwoAluRegister = AluRegisters::R_6;
|
||||
|
||||
constexpr uint32_t backupRegister = CS_GPR_R7;
|
||||
constexpr uint32_t backupRegister = RegisterOffsets::csGprR7;
|
||||
constexpr AluRegisters backupAluRegister = AluRegisters::R_7;
|
||||
|
||||
constexpr uint32_t memoryMaskRegister = CS_GPR_R8;
|
||||
constexpr uint32_t memoryMaskRegister = RegisterOffsets::csGprR8;
|
||||
constexpr AluRegisters memoryMaskAluRegister = AluRegisters::R_8;
|
||||
|
||||
constexpr uint32_t offsetRegister = CS_GPR_R8;
|
||||
constexpr uint32_t offsetRegister = RegisterOffsets::csGprR8;
|
||||
constexpr AluRegisters offsetAluRegister = AluRegisters::R_8;
|
||||
|
||||
if (offset) {
|
||||
@@ -667,13 +667,13 @@ void EncodeIndirectParams<Family>::setWorkDimIndirect(CommandContainer &containe
|
||||
EncodeSetMMIO<Family>::encodeIMM(container, constantOneRegister, 1, true);
|
||||
EncodeSetMMIO<Family>::encodeIMM(container, constantTwoRegister, 2, true);
|
||||
|
||||
EncodeSetMMIO<Family>::encodeREG(container, groupCount2Register, GPUGPU_DISPATCHDIM[2]);
|
||||
EncodeSetMMIO<Family>::encodeREG(container, groupCount2Register, RegisterOffsets::gpgpuDispatchDim[2]);
|
||||
|
||||
EncodeMath<Family>::greaterThan(container, groupCount2AluRegister, constantOneAluRegister, workDimEq3AluRegister);
|
||||
EncodeMath<Family>::bitwiseAnd(container, workDimEq3AluRegister, constantOneAluRegister, workDimEq3AluRegister);
|
||||
|
||||
EncodeSetMMIO<Family>::encodeIMM(container, groupSize1Register, groupSize[1], true);
|
||||
EncodeSetMMIO<Family>::encodeREG(container, groupCount1Register, GPUGPU_DISPATCHDIM[1]);
|
||||
EncodeSetMMIO<Family>::encodeREG(container, groupCount1Register, RegisterOffsets::gpgpuDispatchDim[1]);
|
||||
|
||||
EncodeMath<Family>::addition(container, groupSize1AluRegister, groupCount1AluRegister, sumAluRegister);
|
||||
EncodeMath<Family>::addition(container, sumAluRegister, workDimEq3AluRegister, sumAluRegister);
|
||||
@@ -774,13 +774,13 @@ void EncodeIndirectParams<Family>::setGlobalWorkSizeIndirect(CommandContainer &c
|
||||
if (NEO::isUndefinedOffset(offsets[i])) {
|
||||
continue;
|
||||
}
|
||||
EncodeMathMMIO<Family>::encodeMulRegVal(container, GPUGPU_DISPATCHDIM[i], lws[i], ptrOffset(crossThreadAddress, offsets[i]));
|
||||
EncodeMathMMIO<Family>::encodeMulRegVal(container, RegisterOffsets::gpgpuDispatchDim[i], lws[i], ptrOffset(crossThreadAddress, offsets[i]));
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
inline size_t EncodeIndirectParams<Family>::getCmdsSizeForSetWorkDimIndirect(const uint32_t *groupSize, bool misaligedPtr) {
|
||||
constexpr uint32_t aluCmdSize = sizeof(MI_MATH) + sizeof(MI_MATH_ALU_INST_INLINE) * NUM_ALU_INST_FOR_READ_MODIFY_WRITE;
|
||||
constexpr uint32_t aluCmdSize = sizeof(MI_MATH) + sizeof(MI_MATH_ALU_INST_INLINE) * RegisterConstants::numAluInstForReadModifyWrite;
|
||||
auto requiredSize = sizeof(MI_STORE_REGISTER_MEM) + sizeof(MI_LOAD_REGISTER_IMM);
|
||||
UNRECOVERABLE_IF(!groupSize);
|
||||
if (groupSize[2] < 2) {
|
||||
@@ -863,19 +863,19 @@ void EncodeAtomic<Family>::programMiAtomic(LinearStream &commandStream,
|
||||
template <typename Family>
|
||||
void EncodeBatchBufferStartOrEnd<Family>::programConditionalDataMemBatchBufferStart(LinearStream &commandStream, uint64_t startAddress, uint64_t compareAddress,
|
||||
uint64_t compareData, CompareOperation compareOperation, bool indirect, bool useQwordData) {
|
||||
EncodeSetMMIO<Family>::encodeMEM(commandStream, CS_GPR_R7, compareAddress);
|
||||
EncodeSetMMIO<Family>::encodeMEM(commandStream, RegisterOffsets::csGprR7, compareAddress);
|
||||
|
||||
if (useQwordData) {
|
||||
EncodeSetMMIO<Family>::encodeMEM(commandStream, CS_GPR_R7 + 4, compareAddress + 4);
|
||||
EncodeSetMMIO<Family>::encodeMEM(commandStream, RegisterOffsets::csGprR7 + 4, compareAddress + 4);
|
||||
} else {
|
||||
LriHelper<Family>::program(&commandStream, CS_GPR_R7 + 4, 0, true);
|
||||
LriHelper<Family>::program(&commandStream, RegisterOffsets::csGprR7 + 4, 0, true);
|
||||
}
|
||||
|
||||
uint32_t compareDataLow = static_cast<uint32_t>(compareData & std::numeric_limits<uint32_t>::max());
|
||||
uint32_t compareDataHigh = useQwordData ? static_cast<uint32_t>(compareData >> 32) : 0;
|
||||
|
||||
LriHelper<Family>::program(&commandStream, CS_GPR_R8, compareDataLow, true);
|
||||
LriHelper<Family>::program(&commandStream, CS_GPR_R8 + 4, compareDataHigh, true);
|
||||
LriHelper<Family>::program(&commandStream, RegisterOffsets::csGprR8, compareDataLow, true);
|
||||
LriHelper<Family>::program(&commandStream, RegisterOffsets::csGprR8 + 4, compareDataHigh, true);
|
||||
|
||||
programConditionalBatchBufferStartBase(commandStream, startAddress, AluRegisters::R_7, AluRegisters::R_8, compareOperation, indirect);
|
||||
}
|
||||
@@ -883,18 +883,18 @@ void EncodeBatchBufferStartOrEnd<Family>::programConditionalDataMemBatchBufferSt
|
||||
template <typename Family>
|
||||
void EncodeBatchBufferStartOrEnd<Family>::programConditionalDataRegBatchBufferStart(LinearStream &commandStream, uint64_t startAddress, uint32_t compareReg,
|
||||
uint64_t compareData, CompareOperation compareOperation, bool indirect, bool useQwordData) {
|
||||
EncodeSetMMIO<Family>::encodeREG(commandStream, CS_GPR_R7, compareReg);
|
||||
EncodeSetMMIO<Family>::encodeREG(commandStream, RegisterOffsets::csGprR7, compareReg);
|
||||
if (useQwordData) {
|
||||
EncodeSetMMIO<Family>::encodeREG(commandStream, CS_GPR_R7 + 4, compareReg + 4);
|
||||
EncodeSetMMIO<Family>::encodeREG(commandStream, RegisterOffsets::csGprR7 + 4, compareReg + 4);
|
||||
} else {
|
||||
LriHelper<Family>::program(&commandStream, CS_GPR_R7 + 4, 0, true);
|
||||
LriHelper<Family>::program(&commandStream, RegisterOffsets::csGprR7 + 4, 0, true);
|
||||
}
|
||||
|
||||
uint32_t compareDataLow = static_cast<uint32_t>(compareData & std::numeric_limits<uint32_t>::max());
|
||||
uint32_t compareDataHigh = useQwordData ? static_cast<uint32_t>(compareData >> 32) : 0;
|
||||
|
||||
LriHelper<Family>::program(&commandStream, CS_GPR_R8, compareDataLow, true);
|
||||
LriHelper<Family>::program(&commandStream, CS_GPR_R8 + 4, compareDataHigh, true);
|
||||
LriHelper<Family>::program(&commandStream, RegisterOffsets::csGprR8, compareDataLow, true);
|
||||
LriHelper<Family>::program(&commandStream, RegisterOffsets::csGprR8 + 4, compareDataHigh, true);
|
||||
|
||||
programConditionalBatchBufferStartBase(commandStream, startAddress, AluRegisters::R_7, AluRegisters::R_8, compareOperation, indirect);
|
||||
}
|
||||
@@ -909,11 +909,11 @@ void EncodeBatchBufferStartOrEnd<Family>::programConditionalRegRegBatchBufferSta
|
||||
template <typename Family>
|
||||
void EncodeBatchBufferStartOrEnd<Family>::programConditionalRegMemBatchBufferStart(LinearStream &commandStream, uint64_t startAddress, uint64_t compareAddress, uint32_t compareReg,
|
||||
CompareOperation compareOperation, bool indirect) {
|
||||
EncodeSetMMIO<Family>::encodeMEM(commandStream, CS_GPR_R7, compareAddress);
|
||||
LriHelper<Family>::program(&commandStream, CS_GPR_R7 + 4, 0, true);
|
||||
EncodeSetMMIO<Family>::encodeMEM(commandStream, RegisterOffsets::csGprR7, compareAddress);
|
||||
LriHelper<Family>::program(&commandStream, RegisterOffsets::csGprR7 + 4, 0, true);
|
||||
|
||||
EncodeSetMMIO<Family>::encodeREG(commandStream, CS_GPR_R8, compareReg);
|
||||
LriHelper<Family>::program(&commandStream, CS_GPR_R8 + 4, 0, true);
|
||||
EncodeSetMMIO<Family>::encodeREG(commandStream, RegisterOffsets::csGprR8, compareReg);
|
||||
LriHelper<Family>::program(&commandStream, RegisterOffsets::csGprR8 + 4, 0, true);
|
||||
|
||||
programConditionalBatchBufferStartBase(commandStream, startAddress, AluRegisters::R_7, AluRegisters::R_8, compareOperation, indirect);
|
||||
}
|
||||
@@ -936,7 +936,7 @@ void EncodeBatchBufferStartOrEnd<Family>::programConditionalBatchBufferStartBase
|
||||
|
||||
aluHelper.copyToCmdStream(commandStream);
|
||||
|
||||
EncodeSetMMIO<Family>::encodeREG(commandStream, CS_PREDICATE_RESULT_2, CS_GPR_R7);
|
||||
EncodeSetMMIO<Family>::encodeREG(commandStream, RegisterOffsets::csPredicateResult2, RegisterOffsets::csGprR7);
|
||||
|
||||
MiPredicateType predicateType = MiPredicateType::NoopOnResult2Clear; // Equal or Less
|
||||
if ((compareOperation == CompareOperation::NotEqual) || (compareOperation == CompareOperation::GreaterOrEqual)) {
|
||||
|
||||
@@ -123,8 +123,8 @@ inline void CommandStreamReceiverHw<GfxFamily>::programEndingCmd(LinearStream &c
|
||||
|
||||
bool indirect = false;
|
||||
if (relaxedOrderingEnabled && hasRelaxedOrderingDependencies) {
|
||||
NEO::EncodeSetMMIO<GfxFamily>::encodeREG(commandStream, CS_GPR_R0, CS_GPR_R3);
|
||||
NEO::EncodeSetMMIO<GfxFamily>::encodeREG(commandStream, CS_GPR_R0 + 4, CS_GPR_R3 + 4);
|
||||
NEO::EncodeSetMMIO<GfxFamily>::encodeREG(commandStream, RegisterOffsets::csGprR0, RegisterOffsets::csGprR3);
|
||||
NEO::EncodeSetMMIO<GfxFamily>::encodeREG(commandStream, RegisterOffsets::csGprR0 + 4, RegisterOffsets::csGprR3 + 4);
|
||||
|
||||
indirect = true;
|
||||
}
|
||||
|
||||
@@ -123,12 +123,12 @@ void DebuggerL0Hw<GfxFamily>::programSbaAddressLoad(NEO::LinearStream &cmdStream
|
||||
uint32_t high = (sbaGpuVa >> 32) & 0xffffffff;
|
||||
|
||||
NEO::LriHelper<GfxFamily>::program(&cmdStream,
|
||||
CS_GPR_R15,
|
||||
RegisterOffsets::csGprR15,
|
||||
low,
|
||||
true);
|
||||
|
||||
NEO::LriHelper<GfxFamily>::program(&cmdStream,
|
||||
CS_GPR_R15 + 4,
|
||||
RegisterOffsets::csGprR15 + 4,
|
||||
high,
|
||||
true);
|
||||
}
|
||||
|
||||
@@ -12,7 +12,7 @@ template <typename GfxFamily>
|
||||
size_t DebuggerL0Hw<GfxFamily>::getSbaTrackingCommandsSize(size_t trackedAddressCount) {
|
||||
if (singleAddressSpaceSbaTracking) {
|
||||
NEO::EncodeDummyBlitWaArgs waArgs{false};
|
||||
constexpr uint32_t aluCmdSize = sizeof(typename GfxFamily::MI_MATH) + sizeof(typename GfxFamily::MI_MATH_ALU_INST_INLINE) * NUM_ALU_INST_FOR_READ_MODIFY_WRITE;
|
||||
constexpr uint32_t aluCmdSize = sizeof(typename GfxFamily::MI_MATH) + sizeof(typename GfxFamily::MI_MATH_ALU_INST_INLINE) * RegisterConstants::numAluInstForReadModifyWrite;
|
||||
return 2 * (EncodeMiArbCheck<GfxFamily>::getCommandSizeWithWa(waArgs) + sizeof(typename GfxFamily::MI_BATCH_BUFFER_START)) +
|
||||
trackedAddressCount * (sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM) + aluCmdSize + 2 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM) +
|
||||
3 * sizeof(typename GfxFamily::MI_STORE_DATA_IMM) +
|
||||
@@ -77,7 +77,7 @@ void DebuggerL0Hw<GfxFamily>::programSbaTrackingCommandsSingleAddressSpace(NEO::
|
||||
for (const auto &pair : fieldOffsetAndValue) {
|
||||
|
||||
// Store SBA field offset to R0
|
||||
NEO::EncodeSetMMIO<GfxFamily>::encodeIMM(cmdStream, CS_GPR_R0, static_cast<uint32_t>(pair.first), true);
|
||||
NEO::EncodeSetMMIO<GfxFamily>::encodeIMM(cmdStream, RegisterOffsets::csGprR0, static_cast<uint32_t>(pair.first), true);
|
||||
// Add GPR0 to GPR15, store result in GPR1
|
||||
NEO::EncodeMath<GfxFamily>::addition(cmdStream, AluRegisters::R_0, AluRegisters::R_15, AluRegisters::R_1);
|
||||
|
||||
@@ -104,8 +104,8 @@ void DebuggerL0Hw<GfxFamily>::programSbaTrackingCommandsSingleAddressSpace(NEO::
|
||||
const auto gmmHelper = device->getGmmHelper();
|
||||
const auto gpuVaOfDataDWORD1 = gmmHelper->decanonize(gpuVaOfData + 4);
|
||||
|
||||
NEO::EncodeStoreMMIO<GfxFamily>::encode(miStoreRegMemLow, CS_GPR_R1, gpuVaOfAddress, false);
|
||||
NEO::EncodeStoreMMIO<GfxFamily>::encode(miStoreRegMemHigh, CS_GPR_R1 + 4, gpuVaOfAddress + 4, false);
|
||||
NEO::EncodeStoreMMIO<GfxFamily>::encode(miStoreRegMemLow, RegisterOffsets::csGprR1, gpuVaOfAddress, false);
|
||||
NEO::EncodeStoreMMIO<GfxFamily>::encode(miStoreRegMemHigh, RegisterOffsets::csGprR1 + 4, gpuVaOfAddress + 4, false);
|
||||
|
||||
MI_STORE_DATA_IMM setSbaBufferAddress = GfxFamily::cmdInitStoreDataImm;
|
||||
gpuVaOfData = gmmHelper->decanonize(gpuVaOfData);
|
||||
|
||||
@@ -128,21 +128,21 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchStaticRelaxedOrderingSch
|
||||
{
|
||||
EncodeMiPredicate<GfxFamily>::encode(schedulerCmdStream, MiPredicateType::Disable);
|
||||
|
||||
EncodeSetMMIO<GfxFamily>::encodeREG(schedulerCmdStream, CS_GPR_R0, CS_GPR_R9);
|
||||
EncodeSetMMIO<GfxFamily>::encodeREG(schedulerCmdStream, CS_GPR_R0 + 4, CS_GPR_R9 + 4);
|
||||
EncodeSetMMIO<GfxFamily>::encodeREG(schedulerCmdStream, RegisterOffsets::csGprR0, RegisterOffsets::csGprR9);
|
||||
EncodeSetMMIO<GfxFamily>::encodeREG(schedulerCmdStream, RegisterOffsets::csGprR0 + 4, RegisterOffsets::csGprR9 + 4);
|
||||
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(schedulerCmdStream, 0, CS_GPR_R1, 0, CompareOperation::Equal, true, false);
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(schedulerCmdStream, 0, RegisterOffsets::csGprR1, 0, CompareOperation::Equal, true, false);
|
||||
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R2, 0, true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R2 + 4, 0, true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR2, 0, true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR2 + 4, 0, true);
|
||||
|
||||
uint64_t removeTaskVa = schedulerStartAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<GfxFamily>::removeTaskSectionStart;
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R3, static_cast<uint32_t>(removeTaskVa & 0xFFFF'FFFFULL), true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R3 + 4, static_cast<uint32_t>(removeTaskVa >> 32), true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR3, static_cast<uint32_t>(removeTaskVa & 0xFFFF'FFFFULL), true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR3 + 4, static_cast<uint32_t>(removeTaskVa >> 32), true);
|
||||
|
||||
uint64_t walkersLoopConditionCheckVa = schedulerStartAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<GfxFamily>::tasksListLoopCheckSectionStart;
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R4, static_cast<uint32_t>(walkersLoopConditionCheckVa & 0xFFFF'FFFFULL), true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R4 + 4, static_cast<uint32_t>(walkersLoopConditionCheckVa >> 32), true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR4, static_cast<uint32_t>(walkersLoopConditionCheckVa & 0xFFFF'FFFFULL), true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR4 + 4, static_cast<uint32_t>(walkersLoopConditionCheckVa >> 32), true);
|
||||
}
|
||||
|
||||
// 2. Dispatch task section (loop start)
|
||||
@@ -151,11 +151,11 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchStaticRelaxedOrderingSch
|
||||
|
||||
EncodeMiPredicate<GfxFamily>::encode(schedulerCmdStream, MiPredicateType::Disable);
|
||||
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R6, 8, true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R6 + 4, 0, true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR6, 8, true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR6 + 4, 0, true);
|
||||
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R8, static_cast<uint32_t>(deferredTasksListGpuVa & 0xFFFF'FFFFULL), true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R8 + 4, static_cast<uint32_t>(deferredTasksListGpuVa >> 32), true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR8, static_cast<uint32_t>(deferredTasksListGpuVa & 0xFFFF'FFFFULL), true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR8 + 4, static_cast<uint32_t>(deferredTasksListGpuVa >> 32), true);
|
||||
|
||||
EncodeAluHelper<GfxFamily, 10> aluHelper;
|
||||
aluHelper.setMocs(miMathMocs);
|
||||
@@ -184,16 +184,16 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchStaticRelaxedOrderingSch
|
||||
EncodeMathMMIO<GfxFamily>::encodeDecrement(schedulerCmdStream, AluRegisters::R_1);
|
||||
EncodeMathMMIO<GfxFamily>::encodeDecrement(schedulerCmdStream, AluRegisters::R_2);
|
||||
|
||||
EncodeSetMMIO<GfxFamily>::encodeREG(schedulerCmdStream, CS_GPR_R0, CS_GPR_R9);
|
||||
EncodeSetMMIO<GfxFamily>::encodeREG(schedulerCmdStream, CS_GPR_R0 + 4, CS_GPR_R9 + 4);
|
||||
EncodeSetMMIO<GfxFamily>::encodeREG(schedulerCmdStream, RegisterOffsets::csGprR0, RegisterOffsets::csGprR9);
|
||||
EncodeSetMMIO<GfxFamily>::encodeREG(schedulerCmdStream, RegisterOffsets::csGprR0 + 4, RegisterOffsets::csGprR9 + 4);
|
||||
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(schedulerCmdStream, 0, CS_GPR_R1, 0, CompareOperation::Equal, true, false);
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(schedulerCmdStream, 0, RegisterOffsets::csGprR1, 0, CompareOperation::Equal, true, false);
|
||||
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R7, 8, true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R7 + 4, 0, true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR7, 8, true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR7 + 4, 0, true);
|
||||
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R8, static_cast<uint32_t>(deferredTasksListGpuVa & 0xFFFF'FFFFULL), true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R8 + 4, static_cast<uint32_t>(deferredTasksListGpuVa >> 32), true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR8, static_cast<uint32_t>(deferredTasksListGpuVa & 0xFFFF'FFFFULL), true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR8 + 4, static_cast<uint32_t>(deferredTasksListGpuVa >> 32), true);
|
||||
|
||||
EncodeAluHelper<GfxFamily, 14> aluHelper;
|
||||
aluHelper.setMocs(miMathMocs);
|
||||
@@ -228,8 +228,8 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchStaticRelaxedOrderingSch
|
||||
loopSectionStartAddress,
|
||||
AluRegisters::R_1, AluRegisters::R_2, CompareOperation::NotEqual, false);
|
||||
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R2, 0, true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R2 + 4, 0, true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR2, 0, true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR2 + 4, 0, true);
|
||||
}
|
||||
|
||||
// 5. Drain request section
|
||||
@@ -250,20 +250,20 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchStaticRelaxedOrderingSch
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(
|
||||
schedulerCmdStream,
|
||||
loopSectionStartAddress,
|
||||
CS_GPR_R1, currentRelaxedOrderingQueueSize, CompareOperation::GreaterOrEqual, false, false);
|
||||
RegisterOffsets::csGprR1, currentRelaxedOrderingQueueSize, CompareOperation::GreaterOrEqual, false, false);
|
||||
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(
|
||||
schedulerCmdStream,
|
||||
loopSectionStartAddress,
|
||||
CS_GPR_R5, 1, CompareOperation::Equal, false, false);
|
||||
RegisterOffsets::csGprR5, 1, CompareOperation::Equal, false, false);
|
||||
}
|
||||
|
||||
// 6. Scheduler loop check section
|
||||
{
|
||||
UNRECOVERABLE_IF(schedulerCmdStream.getUsed() != RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<GfxFamily>::schedulerLoopCheckSectionStart);
|
||||
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R10, static_cast<uint32_t>(RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::semaphoreSectionSize), true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R10 + 4, 0, true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR10, static_cast<uint32_t>(RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::semaphoreSectionSize), true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR10 + 4, 0, true);
|
||||
|
||||
EncodeAluHelper<GfxFamily, 4> aluHelper;
|
||||
aluHelper.setMocs(miMathMocs);
|
||||
@@ -273,7 +273,7 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchStaticRelaxedOrderingSch
|
||||
aluHelper.setNextAlu(AluRegisters::OPCODE_STORE, AluRegisters::R_0, AluRegisters::R_ACCU);
|
||||
aluHelper.copyToCmdStream(schedulerCmdStream);
|
||||
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalRegMemBatchBufferStart(schedulerCmdStream, 0, semaphoreGpuVa, CS_GPR_R11, CompareOperation::GreaterOrEqual, true);
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalRegMemBatchBufferStart(schedulerCmdStream, 0, semaphoreGpuVa, RegisterOffsets::csGprR11, CompareOperation::GreaterOrEqual, true);
|
||||
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferStart(&schedulerCmdStream, schedulerStartAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<GfxFamily>::loopStartSectionStart,
|
||||
false, false, false);
|
||||
@@ -292,9 +292,9 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchRelaxedOrderingScheduler
|
||||
|
||||
uint64_t semaphoreSectionVa = schedulerStartVa + RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::semaphoreSectionStart;
|
||||
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R11, value, true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R9, static_cast<uint32_t>(semaphoreSectionVa & 0xFFFF'FFFFULL), true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R9 + 4, static_cast<uint32_t>(semaphoreSectionVa >> 32), true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR11, value, true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR9, static_cast<uint32_t>(semaphoreSectionVa & 0xFFFF'FFFFULL), true);
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, RegisterOffsets::csGprR9 + 4, static_cast<uint32_t>(semaphoreSectionVa >> 32), true);
|
||||
|
||||
schedulerCmdStream.getSpace(sizeof(typename GfxFamily::MI_BATCH_BUFFER_START)); // skip patching
|
||||
|
||||
@@ -795,12 +795,12 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchRelaxedOrderingQueueStal
|
||||
LinearStream bbStartStream(ringCommandStream.getSpace(EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataRegBatchBufferStart(false)),
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataRegBatchBufferStart(false));
|
||||
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R5, 1, true);
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, RegisterOffsets::csGprR5, 1, true);
|
||||
dispatchSemaphoreSection(currentQueueWorkCount);
|
||||
|
||||
// patch conditional bb_start with current GPU address
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(bbStartStream, ringCommandStream.getCurrentGpuAddressPosition(),
|
||||
CS_GPR_R1, 0, CompareOperation::Equal, false, false);
|
||||
RegisterOffsets::csGprR1, 0, CompareOperation::Equal, false, false);
|
||||
|
||||
relaxedOrderingSchedulerRequired = false;
|
||||
}
|
||||
@@ -813,23 +813,23 @@ size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeDispatchRelaxedOrdering
|
||||
|
||||
template <typename GfxFamily, typename Dispatcher>
|
||||
void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchRelaxedOrderingReturnPtrRegs(LinearStream &cmdStream, uint64_t returnPtr) {
|
||||
LriHelper<GfxFamily>::program(&cmdStream, CS_GPR_R4, static_cast<uint32_t>(returnPtr & 0xFFFF'FFFFULL), true);
|
||||
LriHelper<GfxFamily>::program(&cmdStream, CS_GPR_R4 + 4, static_cast<uint32_t>(returnPtr >> 32), true);
|
||||
LriHelper<GfxFamily>::program(&cmdStream, RegisterOffsets::csGprR4, static_cast<uint32_t>(returnPtr & 0xFFFF'FFFFULL), true);
|
||||
LriHelper<GfxFamily>::program(&cmdStream, RegisterOffsets::csGprR4 + 4, static_cast<uint32_t>(returnPtr >> 32), true);
|
||||
|
||||
uint64_t returnPtrAfterTaskStoreSection = returnPtr;
|
||||
|
||||
returnPtrAfterTaskStoreSection += RelaxedOrderingHelper::getSizeTaskStoreSection<GfxFamily>();
|
||||
|
||||
LriHelper<GfxFamily>::program(&cmdStream, CS_GPR_R3, static_cast<uint32_t>(returnPtrAfterTaskStoreSection & 0xFFFF'FFFFULL), true);
|
||||
LriHelper<GfxFamily>::program(&cmdStream, CS_GPR_R3 + 4, static_cast<uint32_t>(returnPtrAfterTaskStoreSection >> 32), true);
|
||||
LriHelper<GfxFamily>::program(&cmdStream, RegisterOffsets::csGprR3, static_cast<uint32_t>(returnPtrAfterTaskStoreSection & 0xFFFF'FFFFULL), true);
|
||||
LriHelper<GfxFamily>::program(&cmdStream, RegisterOffsets::csGprR3 + 4, static_cast<uint32_t>(returnPtrAfterTaskStoreSection >> 32), true);
|
||||
}
|
||||
|
||||
template <typename GfxFamily, typename Dispatcher>
|
||||
void DirectSubmissionHw<GfxFamily, Dispatcher>::initRelaxedOrderingRegisters() {
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R1, 0, true);
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R1 + 4, 0, true);
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R5, 0, true);
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, CS_GPR_R5 + 4, 0, true);
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, RegisterOffsets::csGprR1, 0, true);
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, RegisterOffsets::csGprR1 + 4, 0, true);
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, RegisterOffsets::csGprR5, 0, true);
|
||||
LriHelper<GfxFamily>::program(&ringCommandStream, RegisterOffsets::csGprR5 + 4, 0, true);
|
||||
}
|
||||
|
||||
template <typename GfxFamily, typename Dispatcher>
|
||||
@@ -842,16 +842,16 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::preinitializeRelaxedOrderingSect
|
||||
EncodeMiPredicate<GfxFamily>::encode(stream, MiPredicateType::Disable);
|
||||
|
||||
uint64_t deferredTasksListGpuVa = deferredTasksListAllocation->getGpuAddress();
|
||||
LriHelper<GfxFamily>::program(&stream, CS_GPR_R6, static_cast<uint32_t>(deferredTasksListGpuVa & 0xFFFF'FFFFULL), true);
|
||||
LriHelper<GfxFamily>::program(&stream, CS_GPR_R6 + 4, static_cast<uint32_t>(deferredTasksListGpuVa >> 32), true);
|
||||
LriHelper<GfxFamily>::program(&stream, RegisterOffsets::csGprR6, static_cast<uint32_t>(deferredTasksListGpuVa & 0xFFFF'FFFFULL), true);
|
||||
LriHelper<GfxFamily>::program(&stream, RegisterOffsets::csGprR6 + 4, static_cast<uint32_t>(deferredTasksListGpuVa >> 32), true);
|
||||
|
||||
// Task start VA
|
||||
LriHelper<GfxFamily>::program(&stream, CS_GPR_R7, 0, true);
|
||||
LriHelper<GfxFamily>::program(&stream, CS_GPR_R7 + 4, 0, true);
|
||||
LriHelper<GfxFamily>::program(&stream, RegisterOffsets::csGprR7, 0, true);
|
||||
LriHelper<GfxFamily>::program(&stream, RegisterOffsets::csGprR7 + 4, 0, true);
|
||||
|
||||
// Shift by 8 = multiply by 256. Address must by 64b aligned (shift by 6), but SHL accepts only 1, 2, 4, 8, 16 and 32
|
||||
LriHelper<GfxFamily>::program(&stream, CS_GPR_R8, 8, true);
|
||||
LriHelper<GfxFamily>::program(&stream, CS_GPR_R8 + 4, 0, true);
|
||||
LriHelper<GfxFamily>::program(&stream, RegisterOffsets::csGprR8, 8, true);
|
||||
LriHelper<GfxFamily>::program(&stream, RegisterOffsets::csGprR8 + 4, 0, true);
|
||||
|
||||
const uint32_t miMathMocs = this->rootDeviceEnvironment.getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
|
||||
|
||||
@@ -880,9 +880,9 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::preinitializeRelaxedOrderingSect
|
||||
uint64_t schedulerStartAddress = relaxedOrderingSchedulerAllocation->getGpuAddress();
|
||||
|
||||
// 1. Init section
|
||||
LriHelper<GfxFamily>::program(&schedulerStream, CS_GPR_R11, 0, true);
|
||||
LriHelper<GfxFamily>::program(&schedulerStream, CS_GPR_R9, 0, true);
|
||||
LriHelper<GfxFamily>::program(&schedulerStream, CS_GPR_R9 + 4, 0, true);
|
||||
LriHelper<GfxFamily>::program(&schedulerStream, RegisterOffsets::csGprR11, 0, true);
|
||||
LriHelper<GfxFamily>::program(&schedulerStream, RegisterOffsets::csGprR9, 0, true);
|
||||
LriHelper<GfxFamily>::program(&schedulerStream, RegisterOffsets::csGprR9 + 4, 0, true);
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferStart(&schedulerStream, schedulerStartAddress, false, false, false);
|
||||
|
||||
// 2. Semaphore section
|
||||
@@ -898,7 +898,7 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::preinitializeRelaxedOrderingSect
|
||||
{
|
||||
EncodeMiPredicate<GfxFamily>::encode(schedulerStream, MiPredicateType::Disable);
|
||||
|
||||
LriHelper<GfxFamily>::program(&schedulerStream, CS_GPR_R5, 0, true);
|
||||
LriHelper<GfxFamily>::program(&schedulerStream, RegisterOffsets::csGprR5, 0, true);
|
||||
}
|
||||
|
||||
UNRECOVERABLE_IF(schedulerStream.getUsed() != RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<GfxFamily>::totalSize);
|
||||
|
||||
@@ -22,8 +22,8 @@ static constexpr uint32_t maxQueueSize = 16;
|
||||
template <typename GfxFamily>
|
||||
void encodeRegistersBeforeDependencyCheckers(LinearStream &cmdStream) {
|
||||
// Indirect BB_START operates only on GPR_0
|
||||
EncodeSetMMIO<GfxFamily>::encodeREG(cmdStream, CS_GPR_R0, CS_GPR_R4);
|
||||
EncodeSetMMIO<GfxFamily>::encodeREG(cmdStream, CS_GPR_R0 + 4, CS_GPR_R4 + 4);
|
||||
EncodeSetMMIO<GfxFamily>::encodeREG(cmdStream, RegisterOffsets::csGprR0, RegisterOffsets::csGprR4);
|
||||
EncodeSetMMIO<GfxFamily>::encodeREG(cmdStream, RegisterOffsets::csGprR0 + 4, RegisterOffsets::csGprR4 + 4);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
||||
@@ -92,8 +92,8 @@ void PreemptionHelper::applyPreemptionWaCmdsBegin<GfxFamily>(LinearStream *pComm
|
||||
preemptionMode == PreemptionMode::MidThread) {
|
||||
if (device.getHardwareInfo().workaroundTable.flags.waModifyVFEStateAfterGPGPUPreemption) {
|
||||
LriHelper<GfxFamily>::program(pCommandStream,
|
||||
CS_GPR_R0,
|
||||
GPGPU_WALKER_COOKIE_VALUE_BEFORE_WALKER,
|
||||
RegisterOffsets::csGprR0,
|
||||
RegisterConstants::gpgpuWalkerCookieValueBeforeWalker,
|
||||
false);
|
||||
}
|
||||
}
|
||||
@@ -107,8 +107,8 @@ void PreemptionHelper::applyPreemptionWaCmdsEnd<GfxFamily>(LinearStream *pComman
|
||||
preemptionMode == PreemptionMode::MidThread) {
|
||||
if (device.getHardwareInfo().workaroundTable.flags.waModifyVFEStateAfterGPGPUPreemption) {
|
||||
LriHelper<GfxFamily>::program(pCommandStream,
|
||||
CS_GPR_R0,
|
||||
GPGPU_WALKER_COOKIE_VALUE_AFTER_WALKER,
|
||||
RegisterOffsets::csGprR0,
|
||||
RegisterConstants::gpgpuWalkerCookieValueAfterWalker,
|
||||
false);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -41,8 +41,8 @@ void PreemptionHelper::applyPreemptionWaCmdsBegin<GfxFamily>(LinearStream *pComm
|
||||
preemptionMode == PreemptionMode::MidThread) {
|
||||
if (device.getHardwareInfo().workaroundTable.flags.waModifyVFEStateAfterGPGPUPreemption) {
|
||||
LriHelper<GfxFamily>::program(pCommandStream,
|
||||
CS_GPR_R0,
|
||||
GPGPU_WALKER_COOKIE_VALUE_BEFORE_WALKER,
|
||||
RegisterOffsets::csGprR0,
|
||||
RegisterConstants::gpgpuWalkerCookieValueBeforeWalker,
|
||||
false);
|
||||
}
|
||||
}
|
||||
@@ -56,8 +56,8 @@ void PreemptionHelper::applyPreemptionWaCmdsEnd<GfxFamily>(LinearStream *pComman
|
||||
preemptionMode == PreemptionMode::MidThread) {
|
||||
if (device.getHardwareInfo().workaroundTable.flags.waModifyVFEStateAfterGPGPUPreemption) {
|
||||
LriHelper<GfxFamily>::program(pCommandStream,
|
||||
CS_GPR_R0,
|
||||
GPGPU_WALKER_COOKIE_VALUE_AFTER_WALKER,
|
||||
RegisterOffsets::csGprR0,
|
||||
RegisterConstants::gpgpuWalkerCookieValueAfterWalker,
|
||||
false);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -522,8 +522,8 @@ void BlitCommandsHelper<GfxFamily>::encodeProfilingStartMmios(LinearStream &cmdS
|
||||
auto timestampContextStartGpuAddress = TimestampPacketHelper::getContextStartGpuAddress(timestampPacketNode);
|
||||
auto timestampGlobalStartAddress = TimestampPacketHelper::getGlobalStartGpuAddress(timestampPacketNode);
|
||||
|
||||
EncodeStoreMMIO<GfxFamily>::encode(cmdStream, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, timestampContextStartGpuAddress, false);
|
||||
EncodeStoreMMIO<GfxFamily>::encode(cmdStream, REG_GLOBAL_TIMESTAMP_LDW, timestampGlobalStartAddress, false);
|
||||
EncodeStoreMMIO<GfxFamily>::encode(cmdStream, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, timestampContextStartGpuAddress, false);
|
||||
EncodeStoreMMIO<GfxFamily>::encode(cmdStream, RegisterOffsets::globalTimestampLdw, timestampGlobalStartAddress, false);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
@@ -531,8 +531,8 @@ void BlitCommandsHelper<GfxFamily>::encodeProfilingEndMmios(LinearStream &cmdStr
|
||||
auto timestampContextEndGpuAddress = TimestampPacketHelper::getContextEndGpuAddress(timestampPacketNode);
|
||||
auto timestampGlobalEndAddress = TimestampPacketHelper::getGlobalEndGpuAddress(timestampPacketNode);
|
||||
|
||||
EncodeStoreMMIO<GfxFamily>::encode(cmdStream, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, timestampContextEndGpuAddress, false);
|
||||
EncodeStoreMMIO<GfxFamily>::encode(cmdStream, REG_GLOBAL_TIMESTAMP_LDW, timestampGlobalEndAddress, false);
|
||||
EncodeStoreMMIO<GfxFamily>::encode(cmdStream, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, timestampContextEndGpuAddress, false);
|
||||
EncodeStoreMMIO<GfxFamily>::encode(cmdStream, RegisterOffsets::globalTimestampLdw, timestampGlobalEndAddress, false);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
||||
@@ -34,7 +34,7 @@ void PreambleHelper<GfxFamily>::programSemaphoreDelay(LinearStream *pCommandStre
|
||||
if (debugManager.flags.ForceSemaphoreDelayBetweenWaits.get() > -1) {
|
||||
uint32_t valueOfNewSemaphoreDelay = debugManager.flags.ForceSemaphoreDelayBetweenWaits.get();
|
||||
LriHelper<GfxFamily>::program(pCommandStream,
|
||||
SEMA_WAIT_POLL,
|
||||
RegisterOffsets::semaWaitPoll,
|
||||
valueOfNewSemaphoreDelay,
|
||||
true);
|
||||
};
|
||||
|
||||
@@ -8,43 +8,52 @@
|
||||
#pragma once
|
||||
#include <stdint.h>
|
||||
|
||||
inline constexpr uint32_t L3SQC_BIT_LQSC_RO_PERF_DIS = 0x08000000;
|
||||
inline constexpr uint32_t L3SQC_REG4 = 0xB118;
|
||||
namespace RegisterConstants {
|
||||
inline constexpr uint32_t l3SqcBitLqscR0PerfDis = 0x08000000;
|
||||
|
||||
inline constexpr uint32_t GPGPU_WALKER_COOKIE_VALUE_BEFORE_WALKER = 0xFFFFFFFF;
|
||||
inline constexpr uint32_t GPGPU_WALKER_COOKIE_VALUE_AFTER_WALKER = 0x00000000;
|
||||
inline constexpr uint32_t gpgpuWalkerCookieValueBeforeWalker = 0xFFFFFFFF;
|
||||
inline constexpr uint32_t gpgpuWalkerCookieValueAfterWalker = 0x00000000;
|
||||
inline constexpr uint32_t numAluInstForReadModifyWrite = 4;
|
||||
} // namespace RegisterConstants
|
||||
namespace RegisterOffsets {
|
||||
inline constexpr uint32_t l3sqcReg4 = 0xB118;
|
||||
|
||||
// Threads Dimension X/Y/Z
|
||||
inline constexpr uint32_t GPUGPU_DISPATCHDIMX = 0x2500;
|
||||
inline constexpr uint32_t GPUGPU_DISPATCHDIMY = 0x2504;
|
||||
inline constexpr uint32_t GPUGPU_DISPATCHDIMZ = 0x2508;
|
||||
inline constexpr uint32_t gpgpuDispatchDimX = 0x2500;
|
||||
inline constexpr uint32_t gpgpuDispatchDimY = 0x2504;
|
||||
inline constexpr uint32_t gpgpuDispatchDimZ = 0x2508;
|
||||
|
||||
inline constexpr uint32_t GPUGPU_DISPATCHDIM[3] = {GPUGPU_DISPATCHDIMX, GPUGPU_DISPATCHDIMY, GPUGPU_DISPATCHDIMZ};
|
||||
inline constexpr uint32_t gpgpuDispatchDim[3] = {gpgpuDispatchDimX, gpgpuDispatchDimY, gpgpuDispatchDimZ};
|
||||
|
||||
inline constexpr uint32_t CS_GPR_R0 = 0x2600;
|
||||
inline constexpr uint32_t CS_GPR_R1 = 0x2608;
|
||||
inline constexpr uint32_t CS_GPR_R2 = 0x2610;
|
||||
inline constexpr uint32_t CS_GPR_R3 = 0x2618;
|
||||
inline constexpr uint32_t CS_GPR_R4 = 0x2620;
|
||||
inline constexpr uint32_t CS_GPR_R5 = 0x2628;
|
||||
inline constexpr uint32_t CS_GPR_R6 = 0x2630;
|
||||
inline constexpr uint32_t CS_GPR_R7 = 0x2638;
|
||||
inline constexpr uint32_t CS_GPR_R8 = 0x2640;
|
||||
inline constexpr uint32_t CS_GPR_R9 = 0x2648;
|
||||
inline constexpr uint32_t CS_GPR_R10 = 0x2650;
|
||||
inline constexpr uint32_t CS_GPR_R11 = 0x2658;
|
||||
inline constexpr uint32_t CS_GPR_R12 = 0x2660;
|
||||
inline constexpr uint32_t CS_GPR_R13 = 0x2668;
|
||||
inline constexpr uint32_t CS_GPR_R14 = 0x2670;
|
||||
inline constexpr uint32_t CS_GPR_R15 = 0x2678;
|
||||
inline constexpr uint32_t csGprR0 = 0x2600;
|
||||
inline constexpr uint32_t csGprR1 = 0x2608;
|
||||
inline constexpr uint32_t csGprR2 = 0x2610;
|
||||
inline constexpr uint32_t csGprR3 = 0x2618;
|
||||
inline constexpr uint32_t csGprR4 = 0x2620;
|
||||
inline constexpr uint32_t csGprR5 = 0x2628;
|
||||
inline constexpr uint32_t csGprR6 = 0x2630;
|
||||
inline constexpr uint32_t csGprR7 = 0x2638;
|
||||
inline constexpr uint32_t csGprR8 = 0x2640;
|
||||
inline constexpr uint32_t csGprR9 = 0x2648;
|
||||
inline constexpr uint32_t csGprR10 = 0x2650;
|
||||
inline constexpr uint32_t csGprR11 = 0x2658;
|
||||
inline constexpr uint32_t csGprR12 = 0x2660;
|
||||
inline constexpr uint32_t csGprR13 = 0x2668;
|
||||
inline constexpr uint32_t csGprR14 = 0x2670;
|
||||
inline constexpr uint32_t csGprR15 = 0x2678;
|
||||
|
||||
inline constexpr uint32_t CS_PREDICATE_RESULT = 0x2418;
|
||||
inline constexpr uint32_t CS_PREDICATE_RESULT_2 = 0x23BC;
|
||||
inline constexpr uint32_t csPredicateResult = 0x2418;
|
||||
inline constexpr uint32_t csPredicateResult2 = 0x23BC;
|
||||
|
||||
inline constexpr uint32_t semaWaitPoll = 0x0224c;
|
||||
|
||||
inline constexpr uint32_t gpThreadTimeRegAddressOffsetLow = 0x23A8;
|
||||
|
||||
inline constexpr uint32_t globalTimestampLdw = 0x2358;
|
||||
inline constexpr uint32_t globalTimestampUn = 0x235c;
|
||||
} // namespace RegisterOffsets
|
||||
|
||||
inline constexpr uint32_t SEMA_WAIT_POLL = 0x0224c;
|
||||
// Alu opcodes
|
||||
inline constexpr uint32_t NUM_ALU_INST_FOR_READ_MODIFY_WRITE = 4;
|
||||
|
||||
enum class AluRegisters : uint32_t {
|
||||
OPCODE_NONE = 0x000,
|
||||
OPCODE_FENCE_RD = 0x001,
|
||||
@@ -85,8 +94,3 @@ enum class AluRegisters : uint32_t {
|
||||
R_ZF = 0x32,
|
||||
R_CF = 0x33
|
||||
};
|
||||
|
||||
inline constexpr uint32_t GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW = 0x23A8;
|
||||
|
||||
inline constexpr uint32_t REG_GLOBAL_TIMESTAMP_LDW = 0x2358;
|
||||
inline constexpr uint32_t REG_GLOBAL_TIMESTAMP_UN = 0x235c;
|
||||
|
||||
@@ -583,7 +583,7 @@ bool IoctlHelper::getGemTiling(void *setTiling) {
|
||||
|
||||
bool getGpuTime32(::NEO::Drm &drm, uint64_t *timestamp) {
|
||||
RegisterRead reg = {};
|
||||
reg.offset = REG_GLOBAL_TIMESTAMP_LDW;
|
||||
reg.offset = RegisterOffsets::globalTimestampLdw;
|
||||
|
||||
if (drm.ioctl(DrmIoctl::RegRead, ®)) {
|
||||
return false;
|
||||
@@ -594,7 +594,7 @@ bool getGpuTime32(::NEO::Drm &drm, uint64_t *timestamp) {
|
||||
|
||||
bool getGpuTime36(::NEO::Drm &drm, uint64_t *timestamp) {
|
||||
RegisterRead reg = {};
|
||||
reg.offset = REG_GLOBAL_TIMESTAMP_LDW | 1;
|
||||
reg.offset = RegisterOffsets::globalTimestampLdw | 1;
|
||||
|
||||
if (drm.ioctl(DrmIoctl::RegRead, ®)) {
|
||||
return false;
|
||||
@@ -609,8 +609,8 @@ bool getGpuTimeSplitted(::NEO::Drm &drm, uint64_t *timestamp) {
|
||||
uint64_t tmpHi;
|
||||
int err = 0, loop = 3;
|
||||
|
||||
regHi.offset = REG_GLOBAL_TIMESTAMP_UN;
|
||||
regLo.offset = REG_GLOBAL_TIMESTAMP_LDW;
|
||||
regHi.offset = RegisterOffsets::globalTimestampUn;
|
||||
regLo.offset = RegisterOffsets::globalTimestampLdw;
|
||||
|
||||
err += drm.ioctl(DrmIoctl::RegRead, ®Hi);
|
||||
do {
|
||||
@@ -631,10 +631,10 @@ void IoctlHelper::initializeGetGpuTimeFunction() {
|
||||
RegisterRead reg = {};
|
||||
int err;
|
||||
|
||||
reg.offset = (REG_GLOBAL_TIMESTAMP_LDW | 1);
|
||||
reg.offset = (RegisterOffsets::globalTimestampLdw | 1);
|
||||
err = this->ioctl(DrmIoctl::RegRead, ®);
|
||||
if (err) {
|
||||
reg.offset = REG_GLOBAL_TIMESTAMP_UN;
|
||||
reg.offset = RegisterOffsets::globalTimestampUn;
|
||||
err = this->ioctl(DrmIoctl::RegRead, ®);
|
||||
if (err) {
|
||||
this->getGpuTime = &getGpuTime32;
|
||||
|
||||
@@ -80,12 +80,12 @@ bool verifyIncrementOrDecrement(void *cmds, AluRegisters aluRegister, bool incre
|
||||
using MI_MATH = typename FamilyType::MI_MATH;
|
||||
|
||||
auto lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(cmds);
|
||||
if (!verifyLri<FamilyType>(lriCmd, CS_GPR_R7, 1)) {
|
||||
if (!verifyLri<FamilyType>(lriCmd, RegisterOffsets::csGprR7, 1)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
lriCmd++;
|
||||
if (!verifyLri<FamilyType>(lriCmd, CS_GPR_R7 + 4, 0)) {
|
||||
if (!verifyLri<FamilyType>(lriCmd, RegisterOffsets::csGprR7 + 4, 0)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -190,7 +190,7 @@ bool verifyBaseConditionalBbStart(void *cmd, CompareOperation compareOperation,
|
||||
}
|
||||
|
||||
auto lrrCmd = reinterpret_cast<MI_LOAD_REGISTER_REG *>(++miAluCmd);
|
||||
if (!verifyLrr<FamilyType>(lrrCmd, CS_PREDICATE_RESULT_2, CS_GPR_R7)) {
|
||||
if (!verifyLrr<FamilyType>(lrrCmd, RegisterOffsets::csPredicateResult2, RegisterOffsets::csGprR7)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -229,7 +229,7 @@ bool verifyConditionalDataMemBbStart(void *cmd, uint64_t startAddress, uint64_t
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
|
||||
auto lrmCmd = reinterpret_cast<MI_LOAD_REGISTER_MEM *>(cmd);
|
||||
if ((lrmCmd->getRegisterAddress() != CS_GPR_R7) || (lrmCmd->getMemoryAddress() != compareAddress)) {
|
||||
if ((lrmCmd->getRegisterAddress() != RegisterOffsets::csGprR7) || (lrmCmd->getMemoryAddress() != compareAddress)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -237,25 +237,25 @@ bool verifyConditionalDataMemBbStart(void *cmd, uint64_t startAddress, uint64_t
|
||||
|
||||
if (qwordData) {
|
||||
lrmCmd++;
|
||||
if ((lrmCmd->getRegisterAddress() != CS_GPR_R7 + 4) || (lrmCmd->getMemoryAddress() != compareAddress + 4)) {
|
||||
if ((lrmCmd->getRegisterAddress() != RegisterOffsets::csGprR7 + 4) || (lrmCmd->getMemoryAddress() != compareAddress + 4)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(++lrmCmd);
|
||||
} else {
|
||||
auto lriCmd2 = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(++lrmCmd);
|
||||
if (!verifyLri<FamilyType>(lriCmd2, CS_GPR_R7 + 4, 0)) {
|
||||
if (!verifyLri<FamilyType>(lriCmd2, RegisterOffsets::csGprR7 + 4, 0)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
lriCmd = ++lriCmd2;
|
||||
}
|
||||
|
||||
if (!verifyLri<FamilyType>(lriCmd, CS_GPR_R8, getLowPart(compareData))) {
|
||||
if (!verifyLri<FamilyType>(lriCmd, RegisterOffsets::csGprR8, getLowPart(compareData))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!verifyLri<FamilyType>(++lriCmd, CS_GPR_R8 + 4, qwordData ? getHighPart(compareData) : 0)) {
|
||||
if (!verifyLri<FamilyType>(++lriCmd, RegisterOffsets::csGprR8 + 4, qwordData ? getHighPart(compareData) : 0)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -269,22 +269,22 @@ bool verifyConditionalDataRegBbStart(void *cmds, uint64_t startAddress, uint32_t
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
|
||||
auto lrrCmd = reinterpret_cast<MI_LOAD_REGISTER_REG *>(cmds);
|
||||
if (!verifyLrr<FamilyType>(lrrCmd, CS_GPR_R7, compareReg)) {
|
||||
if (!verifyLrr<FamilyType>(lrrCmd, RegisterOffsets::csGprR7, compareReg)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(++lrrCmd);
|
||||
if (!verifyLri<FamilyType>(lriCmd, CS_GPR_R7 + 4, 0)) {
|
||||
if (!verifyLri<FamilyType>(lriCmd, RegisterOffsets::csGprR7 + 4, 0)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
lriCmd++;
|
||||
if (!verifyLri<FamilyType>(lriCmd, CS_GPR_R8, compareData)) {
|
||||
if (!verifyLri<FamilyType>(lriCmd, RegisterOffsets::csGprR8, compareData)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
lriCmd++;
|
||||
if (!verifyLri<FamilyType>(lriCmd, CS_GPR_R8 + 4, 0)) {
|
||||
if (!verifyLri<FamilyType>(lriCmd, RegisterOffsets::csGprR8 + 4, 0)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -299,22 +299,22 @@ bool verifyConditionalRegMemBbStart(void *cmds, uint64_t startAddress, uint64_t
|
||||
using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG;
|
||||
|
||||
auto lrmCmd = reinterpret_cast<MI_LOAD_REGISTER_MEM *>(cmds);
|
||||
if ((lrmCmd->getRegisterAddress() != CS_GPR_R7) || (lrmCmd->getMemoryAddress() != compareAddress)) {
|
||||
if ((lrmCmd->getRegisterAddress() != RegisterOffsets::csGprR7) || (lrmCmd->getMemoryAddress() != compareAddress)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(++lrmCmd);
|
||||
if (!verifyLri<FamilyType>(lriCmd, CS_GPR_R7 + 4, 0)) {
|
||||
if (!verifyLri<FamilyType>(lriCmd, RegisterOffsets::csGprR7 + 4, 0)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto lrrCmd = reinterpret_cast<MI_LOAD_REGISTER_REG *>(++lriCmd);
|
||||
if (!verifyLrr<FamilyType>(lrrCmd, CS_GPR_R8, compareReg)) {
|
||||
if (!verifyLrr<FamilyType>(lrrCmd, RegisterOffsets::csGprR8, compareReg)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(++lrrCmd);
|
||||
if (!verifyLri<FamilyType>(lriCmd, CS_GPR_R8 + 4, 0)) {
|
||||
if (!verifyLri<FamilyType>(lriCmd, RegisterOffsets::csGprR8 + 4, 0)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@@ -179,7 +179,7 @@ HWTEST2_P(L0DebuggerBBlevelParameterizedTest, GivenNonZeroSbaAddressesWhenProgra
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
|
||||
auto lri = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itor);
|
||||
EXPECT_EQ(CS_GPR_R0, lri->getRegisterOffset());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR0, lri->getRegisterOffset());
|
||||
|
||||
itor = find<MI_MATH *>(itor, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
@@ -287,7 +287,7 @@ HWTEST2_P(L0DebuggerBBlevelParameterizedTest, GivenOneNonZeroSbaAddressesWhenPro
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
|
||||
auto lri = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itor);
|
||||
EXPECT_EQ(CS_GPR_R0, lri->getRegisterOffset());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR0, lri->getRegisterOffset());
|
||||
|
||||
itor = find<MI_MATH *>(itor, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
|
||||
@@ -1089,44 +1089,44 @@ bool DirectSubmissionRelaxedOrderingTests::verifyStaticSchedulerProgramming(Grap
|
||||
|
||||
auto lrrCmd = reinterpret_cast<MI_LOAD_REGISTER_REG *>(++miPredicate);
|
||||
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLrr<FamilyType>(lrrCmd, CS_GPR_R0, CS_GPR_R9)) {
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLrr<FamilyType>(lrrCmd, RegisterOffsets::csGprR0, RegisterOffsets::csGprR9)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLrr<FamilyType>(++lrrCmd, CS_GPR_R0 + 4, CS_GPR_R9 + 4)) {
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLrr<FamilyType>(++lrrCmd, RegisterOffsets::csGprR0 + 4, RegisterOffsets::csGprR9 + 4)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!RelaxedOrderingCommandsHelper::verifyConditionalDataRegBbStart<FamilyType>(++lrrCmd, 0, CS_GPR_R1, 0, CompareOperation::Equal, true)) {
|
||||
if (!RelaxedOrderingCommandsHelper::verifyConditionalDataRegBbStart<FamilyType>(++lrrCmd, 0, RegisterOffsets::csGprR1, 0, CompareOperation::Equal, true)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(ptrOffset(lrrCmd, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart(false)));
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(lriCmd, CS_GPR_R2, 0)) {
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(lriCmd, RegisterOffsets::csGprR2, 0)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, CS_GPR_R2 + 4, 0)) {
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, RegisterOffsets::csGprR2 + 4, 0)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
uint64_t removeTaskVa = schedulerStartGpuAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<FamilyType>::removeTaskSectionStart;
|
||||
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, CS_GPR_R3, static_cast<uint32_t>(removeTaskVa & 0xFFFF'FFFFULL))) {
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, RegisterOffsets::csGprR3, static_cast<uint32_t>(removeTaskVa & 0xFFFF'FFFFULL))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, CS_GPR_R3 + 4, static_cast<uint32_t>(removeTaskVa >> 32))) {
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, RegisterOffsets::csGprR3 + 4, static_cast<uint32_t>(removeTaskVa >> 32))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
uint64_t walkersLoopConditionCheckVa = schedulerStartGpuAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<FamilyType>::tasksListLoopCheckSectionStart;
|
||||
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, CS_GPR_R4, static_cast<uint32_t>(walkersLoopConditionCheckVa & 0xFFFF'FFFFULL))) {
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, RegisterOffsets::csGprR4, static_cast<uint32_t>(walkersLoopConditionCheckVa & 0xFFFF'FFFFULL))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, CS_GPR_R4 + 4, static_cast<uint32_t>(walkersLoopConditionCheckVa >> 32))) {
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, RegisterOffsets::csGprR4 + 4, static_cast<uint32_t>(walkersLoopConditionCheckVa >> 32))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -1138,19 +1138,19 @@ bool DirectSubmissionRelaxedOrderingTests::verifyStaticSchedulerProgramming(Grap
|
||||
}
|
||||
|
||||
lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(++miPredicate);
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(lriCmd, CS_GPR_R6, 8)) {
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(lriCmd, RegisterOffsets::csGprR6, 8)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, CS_GPR_R6 + 4, 0)) {
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, RegisterOffsets::csGprR6 + 4, 0)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, CS_GPR_R8, static_cast<uint32_t>(deferredTaskListVa & 0xFFFF'FFFFULL))) {
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, RegisterOffsets::csGprR8, static_cast<uint32_t>(deferredTaskListVa & 0xFFFF'FFFFULL))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, CS_GPR_R8 + 4, static_cast<uint32_t>(deferredTaskListVa >> 32))) {
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, RegisterOffsets::csGprR8 + 4, static_cast<uint32_t>(deferredTaskListVa >> 32))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -1230,32 +1230,32 @@ bool DirectSubmissionRelaxedOrderingTests::verifyStaticSchedulerProgramming(Grap
|
||||
|
||||
lrrCmd = reinterpret_cast<MI_LOAD_REGISTER_REG *>(ptrOffset(cmds, EncodeMathMMIO<FamilyType>::getCmdSizeForIncrementOrDecrement()));
|
||||
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLrr<FamilyType>(lrrCmd, CS_GPR_R0, CS_GPR_R9)) {
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLrr<FamilyType>(lrrCmd, RegisterOffsets::csGprR0, RegisterOffsets::csGprR9)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLrr<FamilyType>(++lrrCmd, CS_GPR_R0 + 4, CS_GPR_R9 + 4)) {
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLrr<FamilyType>(++lrrCmd, RegisterOffsets::csGprR0 + 4, RegisterOffsets::csGprR9 + 4)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!RelaxedOrderingCommandsHelper::verifyConditionalDataRegBbStart<FamilyType>(++lrrCmd, 0, CS_GPR_R1, 0, CompareOperation::Equal, true)) {
|
||||
if (!RelaxedOrderingCommandsHelper::verifyConditionalDataRegBbStart<FamilyType>(++lrrCmd, 0, RegisterOffsets::csGprR1, 0, CompareOperation::Equal, true)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(ptrOffset(lrrCmd, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart(false)));
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(lriCmd, CS_GPR_R7, 8)) {
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(lriCmd, RegisterOffsets::csGprR7, 8)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, CS_GPR_R7 + 4, 0)) {
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, RegisterOffsets::csGprR7 + 4, 0)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, CS_GPR_R8, static_cast<uint32_t>(deferredTaskListVa & 0xFFFF'FFFFULL))) {
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, RegisterOffsets::csGprR8, static_cast<uint32_t>(deferredTaskListVa & 0xFFFF'FFFFULL))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, CS_GPR_R8 + 4, static_cast<uint32_t>(deferredTaskListVa >> 32))) {
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, RegisterOffsets::csGprR8 + 4, static_cast<uint32_t>(deferredTaskListVa >> 32))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -1348,11 +1348,11 @@ bool DirectSubmissionRelaxedOrderingTests::verifyStaticSchedulerProgramming(Grap
|
||||
|
||||
lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(ptrOffset(cmds, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalRegRegBatchBufferStart()));
|
||||
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(lriCmd, CS_GPR_R2, 0)) {
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(lriCmd, RegisterOffsets::csGprR2, 0)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, CS_GPR_R2 + 4, 0)) {
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, RegisterOffsets::csGprR2 + 4, 0)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -1364,25 +1364,25 @@ bool DirectSubmissionRelaxedOrderingTests::verifyStaticSchedulerProgramming(Grap
|
||||
}
|
||||
|
||||
if (!RelaxedOrderingCommandsHelper::verifyConditionalDataRegBbStart<FamilyType>(++arbCheck, schedulerStartGpuAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<FamilyType>::loopStartSectionStart,
|
||||
CS_GPR_R1, expectedQueueSizeLimit, CompareOperation::GreaterOrEqual, false)) {
|
||||
RegisterOffsets::csGprR1, expectedQueueSizeLimit, CompareOperation::GreaterOrEqual, false)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto conditionalBbStartcmds = ptrOffset(arbCheck, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart(false));
|
||||
|
||||
if (!RelaxedOrderingCommandsHelper::verifyConditionalDataRegBbStart<FamilyType>(conditionalBbStartcmds, schedulerStartGpuAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<FamilyType>::loopStartSectionStart,
|
||||
CS_GPR_R5, 1, CompareOperation::Equal, false)) {
|
||||
RegisterOffsets::csGprR5, 1, CompareOperation::Equal, false)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// 6. Scheduler loop check section
|
||||
lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(ptrOffset(conditionalBbStartcmds, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart(false)));
|
||||
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(lriCmd, CS_GPR_R10, static_cast<uint32_t>(RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<FamilyType>::semaphoreSectionSize))) {
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(lriCmd, RegisterOffsets::csGprR10, static_cast<uint32_t>(RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<FamilyType>::semaphoreSectionSize))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, CS_GPR_R10 + 4, 0)) {
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, RegisterOffsets::csGprR10 + 4, 0)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -1414,7 +1414,7 @@ bool DirectSubmissionRelaxedOrderingTests::verifyStaticSchedulerProgramming(Grap
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!RelaxedOrderingCommandsHelper::verifyConditionalRegMemBbStart<FamilyType>(++miAluCmd, 0, semaphoreGpuVa, CS_GPR_R11, CompareOperation::GreaterOrEqual, true)) {
|
||||
if (!RelaxedOrderingCommandsHelper::verifyConditionalRegMemBbStart<FamilyType>(++miAluCmd, 0, semaphoreGpuVa, RegisterOffsets::csGprR11, CompareOperation::GreaterOrEqual, true)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -1453,15 +1453,15 @@ bool DirectSubmissionRelaxedOrderingTests::verifyDynamicSchedulerProgramming(Lin
|
||||
|
||||
uint64_t semaphoreSectionVa = schedulerStartAddress + RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection<FamilyType>::semaphoreSectionStart;
|
||||
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(lriCmd, CS_GPR_R11, semaphoreValue)) {
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(lriCmd, RegisterOffsets::csGprR11, semaphoreValue)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, CS_GPR_R9, static_cast<uint32_t>(semaphoreSectionVa & 0xFFFF'FFFFULL))) {
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, RegisterOffsets::csGprR9, static_cast<uint32_t>(semaphoreSectionVa & 0xFFFF'FFFFULL))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, CS_GPR_R9 + 4, static_cast<uint32_t>(semaphoreSectionVa >> 32))) {
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, RegisterOffsets::csGprR9 + 4, static_cast<uint32_t>(semaphoreSectionVa >> 32))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -1491,7 +1491,7 @@ bool DirectSubmissionRelaxedOrderingTests::verifyDynamicSchedulerProgramming(Lin
|
||||
}
|
||||
|
||||
lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(++miPredicate);
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(lriCmd, CS_GPR_R5, 0)) {
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(lriCmd, RegisterOffsets::csGprR5, 0)) {
|
||||
continue;
|
||||
}
|
||||
lriCmd++;
|
||||
@@ -1669,21 +1669,21 @@ HWTEST_F(DirectSubmissionRelaxedOrderingTests, whenInitializingThenPreinitialize
|
||||
|
||||
for (auto &it : hwParse.cmdList) {
|
||||
if (auto lriCmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(it)) {
|
||||
if (CS_GPR_R1 == lriCmd->getRegisterOffset()) {
|
||||
if (RegisterOffsets::csGprR1 == lriCmd->getRegisterOffset()) {
|
||||
EXPECT_EQ(0u, lriCmd->getDataDword());
|
||||
|
||||
lriCmd++;
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(lriCmd, CS_GPR_R1 + 4, 0)) {
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(lriCmd, RegisterOffsets::csGprR1 + 4, 0)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
lriCmd++;
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(lriCmd, CS_GPR_R5, 0)) {
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(lriCmd, RegisterOffsets::csGprR5, 0)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
lriCmd++;
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(lriCmd, CS_GPR_R5 + 4, 0)) {
|
||||
if (!RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(lriCmd, RegisterOffsets::csGprR5 + 4, 0)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -1770,23 +1770,23 @@ HWTEST_F(DirectSubmissionRelaxedOrderingTests, whenDispatchingWorkThenDispatchTa
|
||||
|
||||
auto lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(taskStoreSection);
|
||||
|
||||
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(lriCmd, CS_GPR_R6, static_cast<uint32_t>(deferredTasksVa & 0xFFFF'FFFFULL)));
|
||||
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(lriCmd, RegisterOffsets::csGprR6, static_cast<uint32_t>(deferredTasksVa & 0xFFFF'FFFFULL)));
|
||||
|
||||
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, CS_GPR_R6 + 4, static_cast<uint32_t>(deferredTasksVa >> 32)));
|
||||
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, RegisterOffsets::csGprR6 + 4, static_cast<uint32_t>(deferredTasksVa >> 32)));
|
||||
|
||||
EXPECT_NE(0u, batchBuffer.taskStartAddress);
|
||||
|
||||
uint32_t taskStartAddressLow = static_cast<uint32_t>(batchBuffer.taskStartAddress & 0xFFFF'FFFFULL);
|
||||
EXPECT_NE(0u, taskStartAddressLow);
|
||||
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, CS_GPR_R7, taskStartAddressLow));
|
||||
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, RegisterOffsets::csGprR7, taskStartAddressLow));
|
||||
|
||||
uint32_t taskStartHigh = static_cast<uint32_t>(batchBuffer.taskStartAddress >> 32);
|
||||
EXPECT_NE(0u, taskStartHigh);
|
||||
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, CS_GPR_R7 + 4, taskStartHigh));
|
||||
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, RegisterOffsets::csGprR7 + 4, taskStartHigh));
|
||||
|
||||
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, CS_GPR_R8, 8));
|
||||
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, RegisterOffsets::csGprR8, 8));
|
||||
|
||||
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, CS_GPR_R8 + 4, 0));
|
||||
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, RegisterOffsets::csGprR8 + 4, 0));
|
||||
|
||||
auto miMathCmd = reinterpret_cast<MI_MATH *>(++lriCmd);
|
||||
EXPECT_EQ(8u, miMathCmd->DW0.BitField.DwordLength);
|
||||
@@ -1898,7 +1898,7 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenBbWithStallingCmdsWhenDispa
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart(false);
|
||||
uint64_t expectedJumpAddress = directSubmission.ringCommandStream.getGpuBase() + offset + jumpOffset;
|
||||
|
||||
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyConditionalDataRegBbStart<FamilyType>(startAddress, expectedJumpAddress, CS_GPR_R1, 0, CompareOperation::Equal, false));
|
||||
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyConditionalDataRegBbStart<FamilyType>(startAddress, expectedJumpAddress, RegisterOffsets::csGprR1, 0, CompareOperation::Equal, false));
|
||||
|
||||
HardwareParse hwParse;
|
||||
hwParse.parseCommands<FamilyType>(directSubmission.ringCommandStream, offset + EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart(false));
|
||||
@@ -1910,7 +1910,7 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenBbWithStallingCmdsWhenDispa
|
||||
for (auto &it : hwParse.cmdList) {
|
||||
lriCmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(it);
|
||||
if (lriCmd) {
|
||||
if (CS_GPR_R5 == lriCmd->getRegisterOffset() && lriCmd->getDataDword() == 1) {
|
||||
if (RegisterOffsets::csGprR5 == lriCmd->getRegisterOffset() && lriCmd->getDataDword() == 1) {
|
||||
success = true;
|
||||
break;
|
||||
}
|
||||
@@ -1956,7 +1956,7 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenFirstBbWithStallingCmdsWhen
|
||||
for (auto &it : hwParse.cmdList) {
|
||||
lriCmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(it);
|
||||
if (lriCmd) {
|
||||
if (CS_GPR_R5 == lriCmd->getRegisterOffset() && lriCmd->getDataDword() == 1) {
|
||||
if (RegisterOffsets::csGprR5 == lriCmd->getRegisterOffset() && lriCmd->getDataDword() == 1) {
|
||||
success = true;
|
||||
break;
|
||||
}
|
||||
@@ -1999,7 +1999,7 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, whenStoppingRingThenProgramSched
|
||||
EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart(false);
|
||||
uint64_t expectedJumpAddress = directSubmission.ringCommandStream.getGpuBase() + offset + jumpOffset;
|
||||
|
||||
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyConditionalDataRegBbStart<FamilyType>(startAddress, expectedJumpAddress, CS_GPR_R1, 0, CompareOperation::Equal, false));
|
||||
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyConditionalDataRegBbStart<FamilyType>(startAddress, expectedJumpAddress, RegisterOffsets::csGprR1, 0, CompareOperation::Equal, false));
|
||||
|
||||
HardwareParse hwParse;
|
||||
hwParse.parseCommands<FamilyType>(directSubmission.ringCommandStream, offset + EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart(false));
|
||||
@@ -2011,7 +2011,7 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, whenStoppingRingThenProgramSched
|
||||
for (auto &it : hwParse.cmdList) {
|
||||
lriCmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(it);
|
||||
if (lriCmd) {
|
||||
if (CS_GPR_R5 == lriCmd->getRegisterOffset() && lriCmd->getDataDword() == 1) {
|
||||
if (RegisterOffsets::csGprR5 == lriCmd->getRegisterOffset() && lriCmd->getDataDword() == 1) {
|
||||
success = true;
|
||||
break;
|
||||
}
|
||||
@@ -2056,7 +2056,7 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, WhenStoppingRingWithoutSubmissio
|
||||
for (auto &it : hwParse.cmdList) {
|
||||
lriCmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(it);
|
||||
if (lriCmd) {
|
||||
if (CS_GPR_R5 == lriCmd->getRegisterOffset() && lriCmd->getDataDword() == 1) {
|
||||
if (RegisterOffsets::csGprR5 == lriCmd->getRegisterOffset() && lriCmd->getDataDword() == 1) {
|
||||
success = true;
|
||||
break;
|
||||
}
|
||||
@@ -2164,12 +2164,12 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenBcsRelaxedOrderingEnabledWh
|
||||
ultCsr->programEndingCmd(commandStream, &endingPtr, true, true);
|
||||
|
||||
auto lrrCmd = reinterpret_cast<MI_LOAD_REGISTER_REG *>(commandStream.getCpuBase());
|
||||
EXPECT_EQ(lrrCmd->getSourceRegisterAddress(), CS_GPR_R3);
|
||||
EXPECT_EQ(lrrCmd->getDestinationRegisterAddress(), CS_GPR_R0);
|
||||
EXPECT_EQ(lrrCmd->getSourceRegisterAddress(), RegisterOffsets::csGprR3);
|
||||
EXPECT_EQ(lrrCmd->getDestinationRegisterAddress(), RegisterOffsets::csGprR0);
|
||||
|
||||
lrrCmd++;
|
||||
EXPECT_EQ(lrrCmd->getSourceRegisterAddress(), CS_GPR_R3 + 4);
|
||||
EXPECT_EQ(lrrCmd->getDestinationRegisterAddress(), CS_GPR_R0 + 4);
|
||||
EXPECT_EQ(lrrCmd->getSourceRegisterAddress(), RegisterOffsets::csGprR3 + 4);
|
||||
EXPECT_EQ(lrrCmd->getDestinationRegisterAddress(), RegisterOffsets::csGprR0 + 4);
|
||||
|
||||
auto bbStartCmd = reinterpret_cast<MI_BATCH_BUFFER_START *>(++lrrCmd);
|
||||
EXPECT_EQ(1u, bbStartCmd->getIndirectAddressEnable());
|
||||
@@ -2220,12 +2220,12 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, whenProgrammingEndingCmdsThenSet
|
||||
ultCsr->programEndingCmd(commandStream, &endingPtr, true, true);
|
||||
|
||||
auto lrrCmd = reinterpret_cast<MI_LOAD_REGISTER_REG *>(commandStream.getCpuBase());
|
||||
EXPECT_EQ(lrrCmd->getSourceRegisterAddress(), CS_GPR_R3);
|
||||
EXPECT_EQ(lrrCmd->getDestinationRegisterAddress(), CS_GPR_R0);
|
||||
EXPECT_EQ(lrrCmd->getSourceRegisterAddress(), RegisterOffsets::csGprR3);
|
||||
EXPECT_EQ(lrrCmd->getDestinationRegisterAddress(), RegisterOffsets::csGprR0);
|
||||
|
||||
lrrCmd++;
|
||||
EXPECT_EQ(lrrCmd->getSourceRegisterAddress(), CS_GPR_R3 + 4);
|
||||
EXPECT_EQ(lrrCmd->getDestinationRegisterAddress(), CS_GPR_R0 + 4);
|
||||
EXPECT_EQ(lrrCmd->getSourceRegisterAddress(), RegisterOffsets::csGprR3 + 4);
|
||||
EXPECT_EQ(lrrCmd->getDestinationRegisterAddress(), RegisterOffsets::csGprR0 + 4);
|
||||
|
||||
auto bbStartCmd = reinterpret_cast<MI_BATCH_BUFFER_START *>(++lrrCmd);
|
||||
EXPECT_EQ(1u, bbStartCmd->getIndirectAddressEnable());
|
||||
@@ -2270,12 +2270,12 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, whenDispatchingWorkloadSectionTh
|
||||
uint64_t returnPtr = directSubmission.ringCommandStream.getGpuBase() + offset + (4 * sizeof(MI_LOAD_REGISTER_IMM)) + directSubmission.getSizeStartSection();
|
||||
|
||||
auto lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(ptrOffset(directSubmission.ringCommandStream.getCpuBase(), offset));
|
||||
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(lriCmd, CS_GPR_R4, static_cast<uint32_t>(returnPtr & 0xFFFF'FFFFULL)));
|
||||
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, CS_GPR_R4 + 4, static_cast<uint32_t>(returnPtr >> 32)));
|
||||
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(lriCmd, RegisterOffsets::csGprR4, static_cast<uint32_t>(returnPtr & 0xFFFF'FFFFULL)));
|
||||
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, RegisterOffsets::csGprR4 + 4, static_cast<uint32_t>(returnPtr >> 32)));
|
||||
|
||||
uint64_t returnPtr2 = returnPtr + RelaxedOrderingHelper::getSizeTaskStoreSection<FamilyType>();
|
||||
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, CS_GPR_R3, static_cast<uint32_t>(returnPtr2 & 0xFFFF'FFFFULL)));
|
||||
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, CS_GPR_R3 + 4, static_cast<uint32_t>(returnPtr2 >> 32)));
|
||||
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, RegisterOffsets::csGprR3, static_cast<uint32_t>(returnPtr2 & 0xFFFF'FFFFULL)));
|
||||
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, RegisterOffsets::csGprR3 + 4, static_cast<uint32_t>(returnPtr2 >> 32)));
|
||||
|
||||
EXPECT_EQ(0, memcmp(&originalBbStart, batchBuffer.endCmdPtr, sizeof(MI_BATCH_BUFFER_START)));
|
||||
}
|
||||
@@ -2685,11 +2685,11 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenSchedulerRequiredWhenDispat
|
||||
directSubmission.dispatchRelaxedOrderingReturnPtrRegs(directSubmission.ringCommandStream, returnPtr);
|
||||
|
||||
auto lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(ptrOffset(directSubmission.ringCommandStream.getCpuBase(), offset));
|
||||
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(lriCmd, CS_GPR_R4, static_cast<uint32_t>(returnPtr & 0xFFFF'FFFFULL)));
|
||||
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, CS_GPR_R4 + 4, static_cast<uint32_t>(returnPtr >> 32)));
|
||||
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(lriCmd, RegisterOffsets::csGprR4, static_cast<uint32_t>(returnPtr & 0xFFFF'FFFFULL)));
|
||||
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, RegisterOffsets::csGprR4 + 4, static_cast<uint32_t>(returnPtr >> 32)));
|
||||
|
||||
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, CS_GPR_R3, static_cast<uint32_t>(returnPtr2 & 0xFFFF'FFFFULL)));
|
||||
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, CS_GPR_R3 + 4, static_cast<uint32_t>(returnPtr2 >> 32)));
|
||||
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, RegisterOffsets::csGprR3, static_cast<uint32_t>(returnPtr2 & 0xFFFF'FFFFULL)));
|
||||
EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyLri<FamilyType>(++lriCmd, RegisterOffsets::csGprR3 + 4, static_cast<uint32_t>(returnPtr2 >> 32)));
|
||||
}
|
||||
|
||||
HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenReturnPtrsRequiredWhenAskingForDispatchSizeTheAddMmioSizes, IsAtLeastXeHpcCore) {
|
||||
|
||||
@@ -147,13 +147,13 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterCommandEncoderTest, givenOffsetAndValue
|
||||
ASSERT_NE(commands.end(), itor);
|
||||
auto cmdLoadReg = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
|
||||
EXPECT_EQ(regOffset, cmdLoadReg->getSourceRegisterAddress());
|
||||
EXPECT_EQ(CS_GPR_R13, cmdLoadReg->getDestinationRegisterAddress());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR13, cmdLoadReg->getDestinationRegisterAddress());
|
||||
|
||||
// load immVal to R14
|
||||
itor++;
|
||||
ASSERT_NE(commands.end(), itor);
|
||||
auto cmdLoadImm = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itor);
|
||||
EXPECT_EQ(CS_GPR_R14, cmdLoadImm->getRegisterOffset());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR14, cmdLoadImm->getRegisterOffset());
|
||||
EXPECT_EQ(immVal, cmdLoadImm->getDataDword());
|
||||
|
||||
// encodeAluAnd should have its own unit tests, so we only check
|
||||
@@ -167,7 +167,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterCommandEncoderTest, givenOffsetAndValue
|
||||
itor++;
|
||||
ASSERT_NE(commands.end(), itor);
|
||||
auto cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(CS_GPR_R15, cmdMem->getRegisterAddress());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR15, cmdMem->getRegisterAddress());
|
||||
EXPECT_EQ(dstAddress, cmdMem->getMemoryAddress());
|
||||
EXPECT_TRUE(cmdMem->getWorkloadPartitionIdOffsetEnable());
|
||||
}
|
||||
|
||||
@@ -167,7 +167,7 @@ HWTEST_F(CommandEncoderMathTest, WhenReservingCommandThenBitfieldSetCorrectly) {
|
||||
EXPECT_EQ(cmdMATH->DW0.BitField.InstructionOpcode,
|
||||
static_cast<uint32_t>(MI_MATH::MI_COMMAND_OPCODE_MI_MATH));
|
||||
EXPECT_EQ(cmdMATH->DW0.BitField.DwordLength,
|
||||
static_cast<uint32_t>(NUM_ALU_INST_FOR_READ_MODIFY_WRITE - 1));
|
||||
static_cast<uint32_t>(RegisterConstants::numAluInstForReadModifyWrite - 1));
|
||||
}
|
||||
|
||||
HWTEST_F(CommandEncoderMathTest, givenOffsetAndValueWhenEncodeBitwiseAndValIsCalledThenContainerHasCorrectMathCommands) {
|
||||
@@ -194,13 +194,13 @@ HWTEST_F(CommandEncoderMathTest, givenOffsetAndValueWhenEncodeBitwiseAndValIsCal
|
||||
EXPECT_NE(commands.end(), itor);
|
||||
auto cmdLoadReg = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
|
||||
EXPECT_EQ(cmdLoadReg->getSourceRegisterAddress(), regOffset);
|
||||
EXPECT_EQ(cmdLoadReg->getDestinationRegisterAddress(), CS_GPR_R13);
|
||||
EXPECT_EQ(cmdLoadReg->getDestinationRegisterAddress(), RegisterOffsets::csGprR13);
|
||||
|
||||
// load immVal to R14
|
||||
itor++;
|
||||
EXPECT_NE(commands.end(), itor);
|
||||
auto cmdLoadImm = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itor);
|
||||
EXPECT_EQ(cmdLoadImm->getRegisterOffset(), CS_GPR_R14);
|
||||
EXPECT_EQ(cmdLoadImm->getRegisterOffset(), RegisterOffsets::csGprR14);
|
||||
EXPECT_EQ(cmdLoadImm->getDataDword(), immVal);
|
||||
|
||||
// encodeAluAnd should have its own unit tests, so we only check
|
||||
@@ -214,7 +214,7 @@ HWTEST_F(CommandEncoderMathTest, givenOffsetAndValueWhenEncodeBitwiseAndValIsCal
|
||||
itor++;
|
||||
EXPECT_NE(commands.end(), itor);
|
||||
auto cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(cmdMem->getRegisterAddress(), CS_GPR_R15);
|
||||
EXPECT_EQ(cmdMem->getRegisterAddress(), RegisterOffsets::csGprR15);
|
||||
EXPECT_EQ(cmdMem->getMemoryAddress(), dstAddress);
|
||||
}
|
||||
|
||||
@@ -299,11 +299,11 @@ HWTEST_F(CommandEncodeAluTests, whenProgrammingIncrementOperationThenUseCorrectA
|
||||
EXPECT_EQ(bufferSize, cmdStream.getUsed());
|
||||
|
||||
auto lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(buffer);
|
||||
EXPECT_EQ(lriCmd->getRegisterOffset(), CS_GPR_R7);
|
||||
EXPECT_EQ(lriCmd->getRegisterOffset(), RegisterOffsets::csGprR7);
|
||||
EXPECT_EQ(lriCmd->getDataDword(), 1u);
|
||||
|
||||
lriCmd++;
|
||||
EXPECT_EQ(CS_GPR_R7 + 4, lriCmd->getRegisterOffset());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR7 + 4, lriCmd->getRegisterOffset());
|
||||
EXPECT_EQ(0u, lriCmd->getDataDword());
|
||||
|
||||
auto miMathCmd = reinterpret_cast<MI_MATH *>(++lriCmd);
|
||||
@@ -346,11 +346,11 @@ HWTEST_F(CommandEncodeAluTests, whenProgrammingDecrementOperationThenUseCorrectA
|
||||
EXPECT_EQ(bufferSize, cmdStream.getUsed());
|
||||
|
||||
auto lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(buffer);
|
||||
EXPECT_EQ(lriCmd->getRegisterOffset(), CS_GPR_R7);
|
||||
EXPECT_EQ(lriCmd->getRegisterOffset(), RegisterOffsets::csGprR7);
|
||||
EXPECT_EQ(lriCmd->getDataDword(), 1u);
|
||||
|
||||
lriCmd++;
|
||||
EXPECT_EQ(CS_GPR_R7 + 4, lriCmd->getRegisterOffset());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR7 + 4, lriCmd->getRegisterOffset());
|
||||
EXPECT_EQ(0u, lriCmd->getDataDword());
|
||||
|
||||
auto miMathCmd = reinterpret_cast<MI_MATH *>(++lriCmd);
|
||||
|
||||
@@ -38,14 +38,14 @@ HWTEST2_F(XeHPAndLaterCommandEncoderMathTest, WhenAppendsAGreaterThanThenPredica
|
||||
ASSERT_NE(itor, commands.end());
|
||||
|
||||
auto cmdMEM = genCmdCast<MI_LOAD_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(CS_GPR_R0, cmdMEM->getRegisterAddress());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR0, cmdMEM->getRegisterAddress());
|
||||
EXPECT_EQ(0xDEADBEEFCAF0u, cmdMEM->getMemoryAddress());
|
||||
|
||||
itor = find<MI_LOAD_REGISTER_IMM *>(itor, commands.end());
|
||||
ASSERT_NE(itor, commands.end());
|
||||
|
||||
auto cmdIMM = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itor);
|
||||
EXPECT_EQ(CS_GPR_R1, cmdIMM->getRegisterOffset());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR1, cmdIMM->getRegisterOffset());
|
||||
EXPECT_EQ(17u, cmdIMM->getDataDword());
|
||||
EXPECT_TRUE(cmdIMM->getMmioRemapEnable());
|
||||
|
||||
@@ -59,8 +59,8 @@ HWTEST2_F(XeHPAndLaterCommandEncoderMathTest, WhenAppendsAGreaterThanThenPredica
|
||||
ASSERT_NE(itor, commands.end());
|
||||
|
||||
auto cmdREG = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
|
||||
EXPECT_EQ(cmdREG->getSourceRegisterAddress(), CS_GPR_R2);
|
||||
EXPECT_EQ(cmdREG->getDestinationRegisterAddress(), CS_PREDICATE_RESULT);
|
||||
EXPECT_EQ(cmdREG->getSourceRegisterAddress(), RegisterOffsets::csGprR2);
|
||||
EXPECT_EQ(cmdREG->getDestinationRegisterAddress(), RegisterOffsets::csPredicateResult);
|
||||
|
||||
auto cmdALU = reinterpret_cast<MI_MATH_ALU_INST_INLINE *>(cmdMATH + 3);
|
||||
EXPECT_EQ(cmdALU->DW0.BitField.ALUOpcode,
|
||||
|
||||
@@ -65,8 +65,8 @@ struct EncodeConditionalBatchBufferStartTest : public ::testing::Test {
|
||||
}
|
||||
|
||||
auto lrrCmd = reinterpret_cast<MI_LOAD_REGISTER_REG *>(++miAluCmd);
|
||||
EXPECT_EQ(CS_PREDICATE_RESULT_2, lrrCmd->getDestinationRegisterAddress());
|
||||
EXPECT_EQ(CS_GPR_R7, lrrCmd->getSourceRegisterAddress());
|
||||
EXPECT_EQ(RegisterOffsets::csPredicateResult2, lrrCmd->getDestinationRegisterAddress());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR7, lrrCmd->getSourceRegisterAddress());
|
||||
|
||||
auto predicateCmd = reinterpret_cast<MI_SET_PREDICATE *>(++lrrCmd);
|
||||
if (compareOperation == CompareOperation::Equal) {
|
||||
@@ -112,19 +112,19 @@ HWTEST2_F(EncodeConditionalBatchBufferStartTest, whenProgrammingConditionalDataM
|
||||
EXPECT_EQ(expectedSize, cmdStream.getUsed());
|
||||
|
||||
auto lrmCmd = reinterpret_cast<MI_LOAD_REGISTER_MEM *>(buffer);
|
||||
EXPECT_EQ(CS_GPR_R7, lrmCmd->getRegisterAddress());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR7, lrmCmd->getRegisterAddress());
|
||||
EXPECT_EQ(compareAddress, lrmCmd->getMemoryAddress());
|
||||
|
||||
auto lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(++lrmCmd);
|
||||
EXPECT_EQ(CS_GPR_R7 + 4, lriCmd->getRegisterOffset());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR7 + 4, lriCmd->getRegisterOffset());
|
||||
EXPECT_EQ(0u, lriCmd->getDataDword());
|
||||
|
||||
lriCmd++;
|
||||
EXPECT_EQ(CS_GPR_R8, lriCmd->getRegisterOffset());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR8, lriCmd->getRegisterOffset());
|
||||
EXPECT_EQ(compareData, lriCmd->getDataDword());
|
||||
|
||||
lriCmd++;
|
||||
EXPECT_EQ(CS_GPR_R8 + 4, lriCmd->getRegisterOffset());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR8 + 4, lriCmd->getRegisterOffset());
|
||||
EXPECT_EQ(0u, lriCmd->getDataDword());
|
||||
|
||||
validateBaseProgramming<FamilyType>(++lriCmd, compareOperation, startAddress, indirect, AluRegisters::R_7, AluRegisters::R_8);
|
||||
@@ -157,19 +157,19 @@ HWTEST2_F(EncodeConditionalBatchBufferStartTest, whenProgramming64bConditionalDa
|
||||
EXPECT_EQ(expectedSize, cmdStream.getUsed());
|
||||
|
||||
auto lrmCmd = reinterpret_cast<MI_LOAD_REGISTER_MEM *>(buffer);
|
||||
EXPECT_EQ(CS_GPR_R7, lrmCmd->getRegisterAddress());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR7, lrmCmd->getRegisterAddress());
|
||||
EXPECT_EQ(compareAddress, lrmCmd->getMemoryAddress());
|
||||
|
||||
lrmCmd++;
|
||||
EXPECT_EQ(CS_GPR_R7 + 4, lrmCmd->getRegisterAddress());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR7 + 4, lrmCmd->getRegisterAddress());
|
||||
EXPECT_EQ(compareAddress + 4, lrmCmd->getMemoryAddress());
|
||||
|
||||
auto lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(++lrmCmd);
|
||||
EXPECT_EQ(CS_GPR_R8, lriCmd->getRegisterOffset());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR8, lriCmd->getRegisterOffset());
|
||||
EXPECT_EQ(static_cast<uint32_t>(compareData & std::numeric_limits<uint32_t>::max()), lriCmd->getDataDword());
|
||||
|
||||
lriCmd++;
|
||||
EXPECT_EQ(CS_GPR_R8 + 4, lriCmd->getRegisterOffset());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR8 + 4, lriCmd->getRegisterOffset());
|
||||
EXPECT_EQ(static_cast<uint32_t>(compareData >> 32), lriCmd->getDataDword());
|
||||
|
||||
validateBaseProgramming<FamilyType>(++lriCmd, compareOperation, startAddress, indirect, AluRegisters::R_7, AluRegisters::R_8);
|
||||
@@ -189,7 +189,7 @@ HWTEST2_F(EncodeConditionalBatchBufferStartTest, whenProgrammingConditionalDataR
|
||||
EXPECT_EQ(expectedSize, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart(false));
|
||||
|
||||
constexpr uint64_t startAddress = 0x12340000;
|
||||
constexpr uint32_t compareReg = CS_GPR_R1;
|
||||
constexpr uint32_t compareReg = RegisterOffsets::csGprR1;
|
||||
constexpr uint32_t compareData = 9876;
|
||||
|
||||
for (auto compareOperation : {CompareOperation::Equal, CompareOperation::NotEqual, CompareOperation::GreaterOrEqual}) {
|
||||
@@ -202,19 +202,19 @@ HWTEST2_F(EncodeConditionalBatchBufferStartTest, whenProgrammingConditionalDataR
|
||||
EXPECT_EQ(expectedSize, cmdStream.getUsed());
|
||||
|
||||
auto lrrCmd = reinterpret_cast<MI_LOAD_REGISTER_REG *>(buffer);
|
||||
EXPECT_EQ(CS_GPR_R7, lrrCmd->getDestinationRegisterAddress());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR7, lrrCmd->getDestinationRegisterAddress());
|
||||
EXPECT_EQ(compareReg, lrrCmd->getSourceRegisterAddress());
|
||||
|
||||
auto lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(++lrrCmd);
|
||||
EXPECT_EQ(CS_GPR_R7 + 4, lriCmd->getRegisterOffset());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR7 + 4, lriCmd->getRegisterOffset());
|
||||
EXPECT_EQ(0u, lriCmd->getDataDword());
|
||||
|
||||
lriCmd++;
|
||||
EXPECT_EQ(CS_GPR_R8, lriCmd->getRegisterOffset());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR8, lriCmd->getRegisterOffset());
|
||||
EXPECT_EQ(compareData, lriCmd->getDataDword());
|
||||
|
||||
lriCmd++;
|
||||
EXPECT_EQ(CS_GPR_R8 + 4, lriCmd->getRegisterOffset());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR8 + 4, lriCmd->getRegisterOffset());
|
||||
EXPECT_EQ(0u, lriCmd->getDataDword());
|
||||
|
||||
validateBaseProgramming<FamilyType>(++lriCmd, compareOperation, startAddress, indirect, AluRegisters::R_7, AluRegisters::R_8);
|
||||
@@ -234,7 +234,7 @@ HWTEST2_F(EncodeConditionalBatchBufferStartTest, whenProgramming64bConditionalDa
|
||||
EXPECT_EQ(expectedSize, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart(true));
|
||||
|
||||
constexpr uint64_t startAddress = 0x12340000;
|
||||
constexpr uint32_t compareReg = CS_GPR_R1;
|
||||
constexpr uint32_t compareReg = RegisterOffsets::csGprR1;
|
||||
constexpr uint64_t compareData = 0x12345678'12345678;
|
||||
|
||||
for (auto compareOperation : {CompareOperation::Equal, CompareOperation::NotEqual, CompareOperation::GreaterOrEqual}) {
|
||||
@@ -247,19 +247,19 @@ HWTEST2_F(EncodeConditionalBatchBufferStartTest, whenProgramming64bConditionalDa
|
||||
EXPECT_EQ(expectedSize, cmdStream.getUsed());
|
||||
|
||||
auto lrrCmd = reinterpret_cast<MI_LOAD_REGISTER_REG *>(buffer);
|
||||
EXPECT_EQ(CS_GPR_R7, lrrCmd->getDestinationRegisterAddress());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR7, lrrCmd->getDestinationRegisterAddress());
|
||||
EXPECT_EQ(compareReg, lrrCmd->getSourceRegisterAddress());
|
||||
|
||||
lrrCmd++;
|
||||
EXPECT_EQ(CS_GPR_R7 + 4, lrrCmd->getDestinationRegisterAddress());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR7 + 4, lrrCmd->getDestinationRegisterAddress());
|
||||
EXPECT_EQ(compareReg + 4, lrrCmd->getSourceRegisterAddress());
|
||||
|
||||
auto lriCmd = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(++lrrCmd);
|
||||
EXPECT_EQ(CS_GPR_R8, lriCmd->getRegisterOffset());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR8, lriCmd->getRegisterOffset());
|
||||
EXPECT_EQ(static_cast<uint32_t>(compareData & std::numeric_limits<uint32_t>::max()), lriCmd->getDataDword());
|
||||
|
||||
lriCmd++;
|
||||
EXPECT_EQ(CS_GPR_R8 + 4, lriCmd->getRegisterOffset());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR8 + 4, lriCmd->getRegisterOffset());
|
||||
EXPECT_EQ(static_cast<uint32_t>(compareData >> 32), lriCmd->getDataDword());
|
||||
|
||||
validateBaseProgramming<FamilyType>(++lriCmd, compareOperation, startAddress, indirect, AluRegisters::R_7, AluRegisters::R_8);
|
||||
|
||||
@@ -40,14 +40,14 @@ GEN11TEST_F(CommandEncoderMathTestGen11, WhenAppendsAGreaterThanThenPredicateCor
|
||||
ASSERT_NE(itor, commands.end());
|
||||
|
||||
auto cmdMEM = genCmdCast<MI_LOAD_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(cmdMEM->getRegisterAddress(), CS_GPR_R0);
|
||||
EXPECT_EQ(cmdMEM->getRegisterAddress(), RegisterOffsets::csGprR0);
|
||||
EXPECT_EQ(cmdMEM->getMemoryAddress(), 0xDEADBEEFCAF0u);
|
||||
|
||||
itor = find<MI_LOAD_REGISTER_IMM *>(itor, commands.end());
|
||||
ASSERT_NE(itor, commands.end());
|
||||
|
||||
auto cmdIMM = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itor);
|
||||
EXPECT_EQ(cmdIMM->getRegisterOffset(), CS_GPR_R1);
|
||||
EXPECT_EQ(cmdIMM->getRegisterOffset(), RegisterOffsets::csGprR1);
|
||||
EXPECT_EQ(cmdIMM->getDataDword(), 17u);
|
||||
|
||||
itor = find<MI_MATH *>(itor, commands.end());
|
||||
@@ -60,8 +60,8 @@ GEN11TEST_F(CommandEncoderMathTestGen11, WhenAppendsAGreaterThanThenPredicateCor
|
||||
ASSERT_NE(itor, commands.end());
|
||||
|
||||
auto cmdREG = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
|
||||
EXPECT_EQ(cmdREG->getSourceRegisterAddress(), CS_GPR_R2);
|
||||
EXPECT_EQ(cmdREG->getDestinationRegisterAddress(), CS_PREDICATE_RESULT);
|
||||
EXPECT_EQ(cmdREG->getSourceRegisterAddress(), RegisterOffsets::csGprR2);
|
||||
EXPECT_EQ(cmdREG->getDestinationRegisterAddress(), RegisterOffsets::csPredicateResult);
|
||||
|
||||
auto cmdALU = reinterpret_cast<MI_MATH_ALU_INST_INLINE *>(cmdMATH + 3);
|
||||
EXPECT_EQ(cmdALU->DW0.BitField.ALUOpcode,
|
||||
|
||||
@@ -38,14 +38,14 @@ GEN12LPTEST_F(CommandEncoderMathTestGen12Lp, WhenAppendsAGreaterThanThenPredicat
|
||||
ASSERT_NE(itor, commands.end());
|
||||
|
||||
auto cmdMEM = genCmdCast<MI_LOAD_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(CS_GPR_R0, cmdMEM->getRegisterAddress());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR0, cmdMEM->getRegisterAddress());
|
||||
EXPECT_EQ(0xDEADBEEFCAF0u, cmdMEM->getMemoryAddress());
|
||||
|
||||
itor = find<MI_LOAD_REGISTER_IMM *>(itor, commands.end());
|
||||
ASSERT_NE(itor, commands.end());
|
||||
|
||||
auto cmdIMM = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itor);
|
||||
EXPECT_EQ(CS_GPR_R1, cmdIMM->getRegisterOffset());
|
||||
EXPECT_EQ(RegisterOffsets::csGprR1, cmdIMM->getRegisterOffset());
|
||||
EXPECT_EQ(17u, cmdIMM->getDataDword());
|
||||
EXPECT_TRUE(cmdIMM->getMmioRemapEnable());
|
||||
|
||||
@@ -59,8 +59,8 @@ GEN12LPTEST_F(CommandEncoderMathTestGen12Lp, WhenAppendsAGreaterThanThenPredicat
|
||||
ASSERT_NE(itor, commands.end());
|
||||
|
||||
auto cmdREG = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
|
||||
EXPECT_EQ(cmdREG->getSourceRegisterAddress(), CS_GPR_R2);
|
||||
EXPECT_EQ(cmdREG->getDestinationRegisterAddress(), CS_PREDICATE_RESULT);
|
||||
EXPECT_EQ(cmdREG->getSourceRegisterAddress(), RegisterOffsets::csGprR2);
|
||||
EXPECT_EQ(cmdREG->getDestinationRegisterAddress(), RegisterOffsets::csPredicateResult);
|
||||
|
||||
auto cmdALU = reinterpret_cast<MI_MATH_ALU_INST_INLINE *>(cmdMATH + 3);
|
||||
EXPECT_EQ(cmdALU->DW0.BitField.ALUOpcode,
|
||||
|
||||
@@ -40,14 +40,14 @@ GEN8TEST_F(CommandEncoderMathTestGen8, WhenAppendsAGreaterThanThenPredicateCorre
|
||||
ASSERT_NE(itor, commands.end());
|
||||
|
||||
auto cmdMEM = genCmdCast<MI_LOAD_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(cmdMEM->getRegisterAddress(), CS_GPR_R0);
|
||||
EXPECT_EQ(cmdMEM->getRegisterAddress(), RegisterOffsets::csGprR0);
|
||||
EXPECT_EQ(cmdMEM->getMemoryAddress(), 0xDEADBEEFCAF0u);
|
||||
|
||||
itor = find<MI_LOAD_REGISTER_IMM *>(itor, commands.end());
|
||||
ASSERT_NE(itor, commands.end());
|
||||
|
||||
auto cmdIMM = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itor);
|
||||
EXPECT_EQ(cmdIMM->getRegisterOffset(), CS_GPR_R1);
|
||||
EXPECT_EQ(cmdIMM->getRegisterOffset(), RegisterOffsets::csGprR1);
|
||||
EXPECT_EQ(cmdIMM->getDataDword(), 17u);
|
||||
|
||||
itor = find<MI_MATH *>(itor, commands.end());
|
||||
@@ -60,8 +60,8 @@ GEN8TEST_F(CommandEncoderMathTestGen8, WhenAppendsAGreaterThanThenPredicateCorre
|
||||
ASSERT_NE(itor, commands.end());
|
||||
|
||||
auto cmdREG = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
|
||||
EXPECT_EQ(cmdREG->getSourceRegisterAddress(), CS_GPR_R2);
|
||||
EXPECT_EQ(cmdREG->getDestinationRegisterAddress(), CS_PREDICATE_RESULT);
|
||||
EXPECT_EQ(cmdREG->getSourceRegisterAddress(), RegisterOffsets::csGprR2);
|
||||
EXPECT_EQ(cmdREG->getDestinationRegisterAddress(), RegisterOffsets::csPredicateResult);
|
||||
|
||||
auto cmdALU = reinterpret_cast<MI_MATH_ALU_INST_INLINE *>(cmdMATH + 3);
|
||||
EXPECT_EQ(cmdALU->DW0.BitField.ALUOpcode,
|
||||
|
||||
@@ -40,14 +40,14 @@ GEN9TEST_F(CommandEncoderMathTestGen9, WhenAppendsAGreaterThanThenPredicateCorre
|
||||
ASSERT_NE(itor, commands.end());
|
||||
|
||||
auto cmdMEM = genCmdCast<MI_LOAD_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(cmdMEM->getRegisterAddress(), CS_GPR_R0);
|
||||
EXPECT_EQ(cmdMEM->getRegisterAddress(), RegisterOffsets::csGprR0);
|
||||
EXPECT_EQ(cmdMEM->getMemoryAddress(), 0xDEADBEEFCAF0u);
|
||||
|
||||
itor = find<MI_LOAD_REGISTER_IMM *>(itor, commands.end());
|
||||
ASSERT_NE(itor, commands.end());
|
||||
|
||||
auto cmdIMM = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itor);
|
||||
EXPECT_EQ(cmdIMM->getRegisterOffset(), CS_GPR_R1);
|
||||
EXPECT_EQ(cmdIMM->getRegisterOffset(), RegisterOffsets::csGprR1);
|
||||
EXPECT_EQ(cmdIMM->getDataDword(), 17u);
|
||||
|
||||
itor = find<MI_MATH *>(itor, commands.end());
|
||||
@@ -60,8 +60,8 @@ GEN9TEST_F(CommandEncoderMathTestGen9, WhenAppendsAGreaterThanThenPredicateCorre
|
||||
ASSERT_NE(itor, commands.end());
|
||||
|
||||
auto cmdREG = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
|
||||
EXPECT_EQ(cmdREG->getSourceRegisterAddress(), CS_GPR_R2);
|
||||
EXPECT_EQ(cmdREG->getDestinationRegisterAddress(), CS_PREDICATE_RESULT);
|
||||
EXPECT_EQ(cmdREG->getSourceRegisterAddress(), RegisterOffsets::csGprR2);
|
||||
EXPECT_EQ(cmdREG->getDestinationRegisterAddress(), RegisterOffsets::csPredicateResult);
|
||||
|
||||
auto cmdALU = reinterpret_cast<MI_MATH_ALU_INST_INLINE *>(cmdMATH + 3);
|
||||
EXPECT_EQ(cmdALU->DW0.BitField.ALUOpcode,
|
||||
|
||||
Reference in New Issue
Block a user