Setup partition registers once per context

Related-To: NEO-6262

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2021-11-17 19:51:43 +00:00
committed by Compute-Runtime-Automation
parent c6c27ed328
commit f56773d166
17 changed files with 173 additions and 165 deletions

View File

@@ -19,7 +19,6 @@ namespace ult {
using MultiPartitionPrologueTest = Test<DeviceFixture>;
HWTEST2_F(MultiPartitionPrologueTest, whenAppendMultiPartitionPrologueIsCalledThenCommandListIsUpdated, IsAtLeastXeHpCore) {
using MI_LOAD_REGISTER_MEM = typename FamilyType::MI_LOAD_REGISTER_MEM;
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
@@ -41,27 +40,22 @@ HWTEST2_F(MultiPartitionPrologueTest, whenAppendMultiPartitionPrologueIsCalledTh
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter));
auto itorPc = find<MI_LOAD_REGISTER_MEM *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itorPc);
auto itorLrm = find<MI_LOAD_REGISTER_MEM *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(cmdList.end(), itorLrm);
auto lrmCmdPc = genCmdCast<MI_LOAD_REGISTER_MEM *>(*itorPc);
ASSERT_EQ(NEO::PartitionRegisters<FamilyType>::wparidCCSOffset, lrmCmdPc->getRegisterAddress());
ASSERT_EQ(lrmCmdPc->getMmioRemapEnable(), true);
auto itorLri = find<MI_LOAD_REGISTER_IMM *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itorLri);
itorPc = find<MI_LOAD_REGISTER_IMM *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itorPc);
auto lriCmdPc = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itorPc);
ASSERT_EQ(NEO::PartitionRegisters<FamilyType>::addressOffsetCCSOffset, static_cast<uint64_t>(lriCmdPc->getRegisterOffset()));
ASSERT_EQ(static_cast<uint32_t>(lriCmdPc->getDataDword()), dataPartitionSize);
ASSERT_EQ(lriCmdPc->getMmioRemapEnable(), true);
auto lriCmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itorLri);
EXPECT_EQ(NEO::PartitionRegisters<FamilyType>::addressOffsetCCSOffset, static_cast<uint64_t>(lriCmd->getRegisterOffset()));
EXPECT_EQ(dataPartitionSize, static_cast<uint32_t>(lriCmd->getDataDword()));
EXPECT_EQ(true, lriCmd->getMmioRemapEnable());
auto result = commandList->close();
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
}
HWTEST2_F(MultiPartitionPrologueTest, whenAppendMultiPartitionPrologueIsCalledThenCommandListIsNotUpdated, IsAtMostGen12lp) {
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Compute, 0u, returnValue));
auto &commandContainer = commandList->commandContainer;
@@ -81,7 +75,6 @@ HWTEST2_F(MultiPartitionPrologueTest, whenAppendMultiPartitionPrologueIsCalledTh
}
using MultiPartitionEpilogueTest = Test<DeviceFixture>;
HWTEST2_F(MultiPartitionEpilogueTest, whenAppendMultiPartitionEpilogueIsCalledThenCommandListIsUpdated, IsAtLeastXeHpCore) {
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
ze_result_t returnValue;
@@ -101,16 +94,16 @@ HWTEST2_F(MultiPartitionEpilogueTest, whenAppendMultiPartitionEpilogueIsCalledTh
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter));
auto itorPc = find<MI_LOAD_REGISTER_IMM *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itorPc);
auto itorLri = find<MI_LOAD_REGISTER_IMM *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itorLri);
auto lriCmdPc = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itorPc);
ASSERT_EQ(NEO::PartitionRegisters<FamilyType>::addressOffsetCCSOffset, static_cast<uint64_t>(lriCmdPc->getRegisterOffset()));
ASSERT_EQ(static_cast<uint32_t>(lriCmdPc->getDataDword()), CommonConstants::partitionAddressOffset);
ASSERT_EQ(lriCmdPc->getMmioRemapEnable(), true);
auto lriCmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itorLri);
EXPECT_EQ(NEO::PartitionRegisters<FamilyType>::addressOffsetCCSOffset, static_cast<uint64_t>(lriCmd->getRegisterOffset()));
EXPECT_EQ(CommonConstants::partitionAddressOffset, static_cast<uint32_t>(lriCmd->getDataDword()));
EXPECT_EQ(true, lriCmd->getMmioRemapEnable());
auto result = commandList->close();
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
}
HWTEST2_F(MultiPartitionEpilogueTest, whenAppendMultiPartitionPrologueIsCalledThenCommandListIsNotUpdated, IsAtMostGen12lp) {

View File

@@ -674,7 +674,7 @@ HWTEST_F(CommandQueueCommandsSingleTile, givenCommandQueueWhenExecutingCommandLi
commandQueue->destroy();
}
HWTEST_F(CommandQueueCommandsMultiTile, givenCommandQueueOnMultiTileWhenExecutingCommandListsThenWorkPartitionAllocationIsMadeResident) {
HWTEST2_F(CommandQueueCommandsMultiTile, givenCommandQueueOnMultiTileWhenExecutingCommandListsThenWorkPartitionAllocationIsMadeResident, IsAtLeastXeHpCore) {
DebugManagerStateRestore restorer;
DebugManager.flags.EnableWalkerPartition.set(1);
@@ -691,6 +691,7 @@ HWTEST_F(CommandQueueCommandsMultiTile, givenCommandQueueOnMultiTileWhenExecutin
bool expectedGAWasMadeResident = false;
};
MyCsrMock csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
EXPECT_EQ(2u, csr.activePartitions);
csr.initializeTagAllocation();
csr.createWorkPartitionAllocation(*neoDevice);
csr.setupContext(*neoDevice->getDefaultEngine().osContext);
@@ -712,13 +713,14 @@ HWTEST_F(CommandQueueCommandsMultiTile, givenCommandQueueOnMultiTileWhenExecutin
auto status = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
EXPECT_EQ(status, ZE_RESULT_SUCCESS);
EXPECT_EQ(2u, csr.activePartitionsConfig);
ASSERT_NE(nullptr, workPartitionAllocation);
EXPECT_TRUE(csr.expectedGAWasMadeResident);
commandQueue->destroy();
}
HWTEST_F(CommandQueueCommandsMultiTile, givenCommandQueueOnMultiTileWhenWalkerPartitionIsDisabledThenWorkPartitionAllocationIsNotCreated) {
HWTEST2_F(CommandQueueCommandsMultiTile, givenCommandQueueOnMultiTileWhenWalkerPartitionIsDisabledThenWorkPartitionAllocationIsNotCreated, IsAtLeastXeHpCore) {
DebugManagerStateRestore restorer;
DebugManager.flags.EnableWalkerPartition.set(0);
@@ -2109,26 +2111,5 @@ HWTEST2_F(DeviceWithDualStorage, givenCmdListWithAppendedKernelAndUsmTransferAnd
commandQueue->destroy();
}
HWTEST2_F(CommandQueueSynchronizeTest, givenBasePlatformsWhenProgrammingPartitionRegistersThenExpectNoAction, CommandQueueSBASupport) {
ze_result_t returnValue;
ze_command_queue_desc_t desc = {};
auto csr = neoDevice->getDefaultEngine().commandStreamReceiver;
auto commandQueue = new MockCommandQueueHw<gfxCoreFamily>(device, csr, &desc);
returnValue = commandQueue->initialize(false, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
constexpr size_t expectedSize = 0;
EXPECT_EQ(expectedSize, commandQueue->getPartitionProgrammingSize());
size_t usedBefore = commandQueue->commandStream->getUsed();
commandQueue->programPartitionConfiguration(*commandQueue->commandStream);
size_t usedAfter = commandQueue->commandStream->getUsed();
EXPECT_EQ(expectedSize, usedAfter - usedBefore);
commandQueue->destroy();
}
} // namespace ult
} // namespace L0

View File

@@ -5,6 +5,7 @@
*
*/
#include "shared/source/command_container/implicit_scaling.h"
#include "shared/source/command_stream/command_stream_receiver_hw.h"
#include "shared/source/command_stream/preemption.h"
#include "shared/source/utilities/software_tags_manager.h"
@@ -823,11 +824,41 @@ HWTEST_F(CommandQueueExecuteCommandListSWTagsTests, givenEnableSWTagsAndCommandL
EXPECT_TRUE(tagFound);
}
template <typename GfxFamily>
void findPartitionRegister(GenCmdList &cmdList, bool expectToFind) {
using MI_LOAD_REGISTER_MEM = typename GfxFamily::MI_LOAD_REGISTER_MEM;
using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM;
auto loadRegisterMemList = findAll<MI_LOAD_REGISTER_MEM *>(cmdList.begin(), cmdList.end());
bool wparidRegisterFound = false;
for (size_t i = 0; i < loadRegisterMemList.size(); i++) {
auto loadRegMem = reinterpret_cast<MI_LOAD_REGISTER_MEM *>(*loadRegisterMemList[i]);
if (NEO::PartitionRegisters<GfxFamily>::wparidCCSOffset == loadRegMem->getRegisterAddress()) {
wparidRegisterFound = true;
}
}
auto loadRegisterImmList = findAll<MI_LOAD_REGISTER_IMM *>(cmdList.begin(), cmdList.end());
bool offsetRegisterFound = false;
for (size_t i = 0; i < loadRegisterImmList.size(); i++) {
auto loadRegImm = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(*loadRegisterImmList[i]);
if (NEO::PartitionRegisters<GfxFamily>::addressOffsetCCSOffset == loadRegImm->getRegisterOffset()) {
offsetRegisterFound = true;
}
}
if (expectToFind) {
EXPECT_TRUE(wparidRegisterFound);
EXPECT_TRUE(offsetRegisterFound);
} else {
EXPECT_FALSE(wparidRegisterFound);
EXPECT_FALSE(offsetRegisterFound);
}
}
HWTEST2_F(MultiDeviceCommandQueueExecuteCommandLists, givenMultiplePartitionCountWhenExecutingCmdListThenExpectMmioProgrammingAndCorrectEstimation, IsAtLeastXeHpCore) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using POST_SYNC_OPERATION = typename FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION;
using MI_LOAD_REGISTER_MEM = typename FamilyType::MI_LOAD_REGISTER_MEM;
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
using PARSE = typename FamilyType::PARSE;
ze_command_queue_desc_t desc{};
@@ -846,6 +877,8 @@ HWTEST2_F(MultiDeviceCommandQueueExecuteCommandLists, givenMultiplePartitionCoun
false,
returnValue));
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
EXPECT_EQ(2u, commandQueue->partitionCount);
ASSERT_NE(nullptr, commandQueue->commandStream);
auto &commandStreamReceiver = device->getNEODevice()->getDefaultEngine().commandStreamReceiver;
if (device->getNEODevice()->getPreemptionMode() == PreemptionMode::MidThread || device->getNEODevice()->isDebuggerActive()) {
@@ -858,28 +891,34 @@ HWTEST2_F(MultiDeviceCommandQueueExecuteCommandLists, givenMultiplePartitionCoun
EXPECT_EQ(1u, fence->partitionCount);
ze_fence_handle_t fenceHandle = fence->toHandle();
ASSERT_NE(nullptr, commandQueue->commandStream);
fence->partitionCount = 2;
//1st execute call initialized pipeline
auto usedSpaceBefore = commandQueue->commandStream->getUsed();
auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, fenceHandle, true);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
auto usedSpaceAfter = commandQueue->commandStream->getUsed();
auto usedSpaceBefore = commandQueue->commandStream->getUsed();
//1st call then initialize registers
GenCmdList cmdList;
ASSERT_TRUE(PARSE::parseCommandBuffer(cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), usedSpaceBefore), usedSpaceAfter));
findPartitionRegister<FamilyType>(cmdList, true);
usedSpaceBefore = commandQueue->commandStream->getUsed();
result = commandQueue->executeCommandLists(numCommandLists, commandLists, fenceHandle, true);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto usedSpaceAfter = commandQueue->commandStream->getUsed();
usedSpaceAfter = commandQueue->commandStream->getUsed();
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
size_t cmdBufferSizeWithoutMmioProgramming = usedSpaceAfter - usedSpaceBefore;
EXPECT_EQ(1u, fence->partitionCount);
auto workPartitionAddress = device->getNEODevice()->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocationGpuAddress();
EXPECT_EQ(2u, fence->partitionCount);
for (auto i = 0u; i < numCommandLists; i++) {
auto commandList = CommandList::fromHandle(commandLists[i]);
commandList->partitionCount = 2;
}
cmdList.clear();
ASSERT_TRUE(PARSE::parseCommandBuffer(cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), usedSpaceBefore), usedSpaceAfter));
findPartitionRegister<FamilyType>(cmdList, false);
usedSpaceBefore = commandQueue->commandStream->getUsed();
result = commandQueue->executeCommandLists(numCommandLists, commandLists, fenceHandle, true);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
@@ -888,24 +927,12 @@ HWTEST2_F(MultiDeviceCommandQueueExecuteCommandLists, givenMultiplePartitionCoun
size_t cmdBufferSizeWithtMmioProgramming = usedSpaceAfter - usedSpaceBefore;
EXPECT_EQ(2u, fence->partitionCount);
size_t expectedSizeWithMmioProgramming = cmdBufferSizeWithoutMmioProgramming + sizeof(MI_LOAD_REGISTER_IMM) + sizeof(MI_LOAD_REGISTER_MEM);
size_t expectedSizeWithMmioProgramming = cmdBufferSizeWithoutMmioProgramming;
EXPECT_GE(expectedSizeWithMmioProgramming, cmdBufferSizeWithtMmioProgramming);
GenCmdList cmdList;
cmdList.clear();
ASSERT_TRUE(PARSE::parseCommandBuffer(cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), usedSpaceBefore), usedSpaceAfter));
auto itorLri = find<MI_LOAD_REGISTER_IMM *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itorLri);
auto itorLrm = find<MI_LOAD_REGISTER_MEM *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itorLrm);
auto loadRegisterImm = static_cast<MI_LOAD_REGISTER_IMM *>(*itorLri);
EXPECT_EQ(0x23B4u, loadRegisterImm->getRegisterOffset());
EXPECT_EQ(8u, loadRegisterImm->getDataDword());
auto loadRegisterMem = static_cast<MI_LOAD_REGISTER_MEM *>(*itorLrm);
EXPECT_EQ(0x221Cu, loadRegisterMem->getRegisterAddress());
EXPECT_EQ(workPartitionAddress, loadRegisterMem->getMemoryAddress());
findPartitionRegister<FamilyType>(cmdList, false);
auto pipeControlList = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());