Limit preemption programming in level zero command queues

When multiple command queues use the same context and retain the same state
No preemption programming for copy command queues
Program preemption preamble only for mid thread preemption

Related-To: NEO-7187

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2022-07-27 15:35:15 +00:00
committed by Compute-Runtime-Automation
parent 17f22990e6
commit 0c301e5e99
11 changed files with 368 additions and 94 deletions

View File

@ -51,14 +51,9 @@ struct CommandQueue : _ze_command_queue_handle_t {
ze_command_queue_handle_t toHandle() { return this; }
void setCommandQueuePreemptionMode(NEO::PreemptionMode newPreemptionMode) {
commandQueuePreemptionMode = newPreemptionMode;
}
bool peekIsCopyOnlyCommandQueue() const { return this->isCopyOnlyCommandQueue; }
protected:
NEO::PreemptionMode commandQueuePreemptionMode = NEO::PreemptionMode::Initial;
uint32_t partitionCount = 1;
uint32_t activeSubDevices = 1;
bool preemptionCmdSyncProgramming = true;

View File

@ -124,26 +124,23 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
NEO::Device *neoDevice = device->getNEODevice();
auto devicePreemption = device->getDevicePreemptionMode();
const bool initialPreemptionMode = commandQueuePreemptionMode == NEO::PreemptionMode::Initial;
NEO::PreemptionMode cmdQueuePreemption = commandQueuePreemptionMode;
if (initialPreemptionMode) {
cmdQueuePreemption = devicePreemption;
}
NEO::PreemptionMode statePreemption = cmdQueuePreemption;
auto contextPreemptionMode = csr->getPreemptionMode();
const bool initialPreemptionMode = contextPreemptionMode == NEO::PreemptionMode::Initial;
NEO::PreemptionMode statePreemption = contextPreemptionMode;
const bool stateSipRequired = (initialPreemptionMode && devicePreemption == NEO::PreemptionMode::MidThread) ||
(neoDevice->getDebugger() && NEO::Debugger::isDebugEnabled(internalUsage));
if (initialPreemptionMode) {
preemptionSize += NEO::PreemptionHelper::getRequiredPreambleSize<GfxFamily>(*neoDevice);
}
if (!isCopyOnlyCommandQueue) {
if (initialPreemptionMode) {
preemptionSize += NEO::PreemptionHelper::getRequiredPreambleSize<GfxFamily>(*neoDevice);
}
if (stateSipRequired) {
preemptionSize += NEO::PreemptionHelper::getRequiredStateSipCmdSize<GfxFamily>(*neoDevice, csr->isRcs());
if (stateSipRequired) {
preemptionSize += NEO::PreemptionHelper::getRequiredStateSipCmdSize<GfxFamily>(*neoDevice, csr->isRcs());
}
}
preemptionSize += NEO::PreemptionHelper::getRequiredCmdStreamSize<GfxFamily>(devicePreemption, commandQueuePreemptionMode);
if (NEO::Debugger::isDebugEnabled(internalUsage) && !commandQueueDebugCmdsProgrammed) {
if (neoDevice->getSourceLevelDebugger() != nullptr) {
debuggerCmdsSize += NEO::PreambleHelper<GfxFamily>::getKernelDebuggingCommandsSize(true);
@ -184,25 +181,27 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
totalCmdBuffers += commandList->commandContainer.getCmdBufferAllocations().size();
spaceForResidency += commandList->commandContainer.getResidencyContainer().size();
auto commandListPreemption = commandList->getCommandListPreemptionMode();
if (statePreemption != commandListPreemption) {
if (preemptionCmdSyncProgramming) {
preemptionSize += NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForSingleBarrier();
if (!isCopyOnlyCommandQueue) {
auto commandListPreemption = commandList->getCommandListPreemptionMode();
if (statePreemption != commandListPreemption) {
if (preemptionCmdSyncProgramming) {
preemptionSize += NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForSingleBarrier();
}
preemptionSize += NEO::PreemptionHelper::getRequiredCmdStreamSize<GfxFamily>(commandListPreemption, statePreemption);
statePreemption = commandListPreemption;
}
preemptionSize += NEO::PreemptionHelper::getRequiredCmdStreamSize<GfxFamily>(commandListPreemption, statePreemption);
statePreemption = commandListPreemption;
}
perThreadScratchSpaceSize = std::max(perThreadScratchSpaceSize, commandList->getCommandListPerThreadScratchSize());
perThreadScratchSpaceSize = std::max(perThreadScratchSpaceSize, commandList->getCommandListPerThreadScratchSize());
perThreadPrivateScratchSize = std::max(perThreadPrivateScratchSize, commandList->getCommandListPerThreadPrivateScratchSize());
perThreadPrivateScratchSize = std::max(perThreadPrivateScratchSize, commandList->getCommandListPerThreadPrivateScratchSize());
if (commandList->getCommandListPerThreadScratchSize() != 0 || commandList->getCommandListPerThreadPrivateScratchSize() != 0) {
if (commandList->commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE) != nullptr) {
heapContainer.push_back(commandList->commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE)->getGraphicsAllocation());
}
for (auto element : commandList->commandContainer.sshAllocations) {
heapContainer.push_back(element);
if (commandList->getCommandListPerThreadScratchSize() != 0 || commandList->getCommandListPerThreadPrivateScratchSize() != 0) {
if (commandList->commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE) != nullptr) {
heapContainer.push_back(commandList->commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE)->getGraphicsAllocation());
}
for (auto element : commandList->commandContainer.sshAllocations) {
heapContainer.push_back(element);
}
}
}
@ -343,15 +342,6 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
NEO::PreemptionHelper::programStateSip<GfxFamily>(child, *neoDevice, csr->getLogicalStateHelper());
}
if (cmdQueuePreemption != commandQueuePreemptionMode) {
NEO::PreemptionHelper::programCmdStream<GfxFamily>(child,
cmdQueuePreemption,
commandQueuePreemptionMode,
csr->getPreemptionAllocation());
}
statePreemption = cmdQueuePreemption;
const bool sipKernelUsed = devicePreemption == NEO::PreemptionMode::MidThread ||
(neoDevice->getDebugger() != nullptr && NEO::Debugger::isDebugEnabled(internalUsage));
@ -383,33 +373,35 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
csr->getLogicalStateHelper()->writeStreamInline(child, false);
}
statePreemption = contextPreemptionMode;
for (auto i = 0u; i < numCommandLists; ++i) {
auto commandList = CommandList::fromHandle(phCommandLists[i]);
auto &cmdBufferAllocations = commandList->commandContainer.getCmdBufferAllocations();
auto cmdBufferCount = cmdBufferAllocations.size();
bool immediateMode = (commandList->cmdListType == CommandList::CommandListType::TYPE_IMMEDIATE) ? true : false;
auto commandListPreemption = commandList->getCommandListPreemptionMode();
if (statePreemption != commandListPreemption) {
if (NEO::DebugManager.flags.EnableSWTags.get()) {
neoDevice->getRootDeviceEnvironment().tagsManager->insertTag<GfxFamily, NEO::SWTags::PipeControlReasonTag>(
child,
*neoDevice,
"ComandList Preemption Mode update", 0u);
}
if (preemptionCmdSyncProgramming) {
NEO::PipeControlArgs args;
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(child, args);
}
NEO::PreemptionHelper::programCmdStream<GfxFamily>(child,
commandListPreemption,
statePreemption,
csr->getPreemptionAllocation());
statePreemption = commandListPreemption;
}
if (!isCopyOnlyCommandQueue) {
auto commandListPreemption = commandList->getCommandListPreemptionMode();
if (statePreemption != commandListPreemption) {
if (NEO::DebugManager.flags.EnableSWTags.get()) {
neoDevice->getRootDeviceEnvironment().tagsManager->insertTag<GfxFamily, NEO::SWTags::PipeControlReasonTag>(
child,
*neoDevice,
"ComandList Preemption Mode update", 0u);
}
if (preemptionCmdSyncProgramming) {
NEO::PipeControlArgs args;
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(child, args);
}
NEO::PreemptionHelper::programCmdStream<GfxFamily>(child,
commandListPreemption,
statePreemption,
csr->getPreemptionAllocation());
statePreemption = commandListPreemption;
}
bool programVfe = frontEndStateDirty;
if (isPatchingVfeStateAllowed) {
auto &requiredStreamState = commandList->getRequiredStreamState();
@ -453,7 +445,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
NEO::PreemptionHelper::programStateSipEndWa<GfxFamily>(child, *neoDevice);
}
commandQueuePreemptionMode = statePreemption;
csr->setPreemptionMode(statePreemption);
if (hFence) {
fence = Fence::fromHandle(hFence);

View File

@ -33,7 +33,6 @@ struct WhiteBox<::L0::CommandQueue> : public ::L0::CommandQueueImp {
using BaseClass::synchronizeByPollingForTaskCount;
using BaseClass::taskCount;
using CommandQueue::activeSubDevices;
using CommandQueue::commandQueuePreemptionMode;
using CommandQueue::internalUsage;
using CommandQueue::partitionCount;

View File

@ -122,7 +122,6 @@ HWTEST_TEMPLATED_F(AubCsrTest, givenAubCsrWhenCallingExecuteCommandListsThenPoll
auto aubCsr = static_cast<NEO::UltAubCommandStreamReceiver<FamilyType> *>(csr);
CommandQueue *queue = static_cast<CommandQueue *>(L0::CommandQueue::fromHandle(commandQueue));
queue->setCommandQueuePreemptionMode(PreemptionMode::Disabled);
EXPECT_EQ(aubCsr->pollForCompletionCalled, 0u);
std::unique_ptr<L0::CommandList> commandList(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::Compute, 0u, returnValue));

View File

@ -407,13 +407,16 @@ HWTEST_F(CommandQueueExecuteCommandLists, givenMidThreadPreemptionWhenCommandsAr
desc.index = 0u;
desc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL;
auto currentCsr = neoDevice->getDefaultEngine().commandStreamReceiver;
std::array<bool, 2> testedInternalFlags = {true, false};
for (auto flagInternal : testedInternalFlags) {
ze_result_t returnValue;
currentCsr->setPreemptionMode(NEO::PreemptionMode::Initial);
auto commandQueue = whiteboxCast(CommandQueue::create(productFamily,
device,
neoDevice->getDefaultEngine().commandStreamReceiver,
currentCsr,
&desc,
false,
flagInternal,
@ -458,13 +461,16 @@ HWTEST2_F(CommandQueueExecuteCommandLists, givenMidThreadPreemptionWhenCommandsA
desc.index = 0u;
desc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL;
auto currentCsr = neoDevice->getDefaultEngine().commandStreamReceiver;
std::array<bool, 2> testedInternalFlags = {true, false};
for (auto flagInternal : testedInternalFlags) {
ze_result_t returnValue;
currentCsr->setPreemptionMode(NEO::PreemptionMode::Initial);
auto commandQueue = whiteboxCast(CommandQueue::create(productFamily,
device,
neoDevice->getDefaultEngine().commandStreamReceiver,
currentCsr,
&desc,
false,
flagInternal,
@ -645,10 +651,12 @@ void CommandQueueExecuteCommandLists::twoCommandListCommandPreemptionTest(bool p
desc.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC;
desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
auto currentCsr = neoDevice->getDefaultEngine().commandStreamReceiver;
ze_result_t returnValue;
auto commandQueue = whiteboxCast(CommandQueue::create(
productFamily,
device, neoDevice->getDefaultEngine().commandStreamReceiver, &desc, false, false, returnValue));
device, currentCsr, &desc, false, false, returnValue));
ASSERT_NE(nullptr, commandQueue->commandStream);
commandQueue->preemptionCmdSyncProgramming = preemptionCmdProgramming;
preemptionCmdProgramming = NEO::PreemptionHelper::getRequiredCmdStreamSize<FamilyType>(NEO::PreemptionMode::ThreadGroup, NEO::PreemptionMode::Disabled) > 0u;
@ -670,12 +678,12 @@ void CommandQueueExecuteCommandLists::twoCommandListCommandPreemptionTest(bool p
result = commandQueue->synchronize(0);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(NEO::PreemptionMode::Disabled, commandQueue->commandQueuePreemptionMode);
EXPECT_EQ(NEO::PreemptionMode::Disabled, currentCsr->getPreemptionMode());
result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(NEO::PreemptionMode::Disabled, commandQueue->commandQueuePreemptionMode);
EXPECT_EQ(NEO::PreemptionMode::Disabled, currentCsr->getPreemptionMode());
auto usedSpaceAfter = commandQueue->commandStream->getUsed();
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
@ -693,7 +701,16 @@ void CommandQueueExecuteCommandLists::twoCommandListCommandPreemptionTest(bool p
auto preemptionMode = neoDevice->getPreemptionMode();
GenCmdList::iterator itor = cmdList.begin();
GenCmdList::iterator itorStateSip = find<STATE_SIP *>(cmdList.begin(), cmdList.end());
GenCmdList::iterator itorCsrCmd = NEO::UnitTestHelper<FamilyType>::findMidThreadPreemptionAllocationCommand(cmdList.begin(), cmdList.end());
if (preemptionMode == NEO::PreemptionMode::MidThread) {
EXPECT_NE(itorCsrCmd, cmdList.end());
itor = itorCsrCmd;
} else {
EXPECT_EQ(itorCsrCmd, cmdList.end());
}
GenCmdList::iterator itorStateSip = find<STATE_SIP *>(itor, cmdList.end());
if (preemptionMode == NEO::PreemptionMode::MidThread) {
EXPECT_NE(itorStateSip, cmdList.end());
@ -702,38 +719,52 @@ void CommandQueueExecuteCommandLists::twoCommandListCommandPreemptionTest(bool p
EXPECT_EQ(itorStateSip, cmdList.end());
}
constexpr uint32_t registerOffset = 0x2580;
constexpr uint32_t disabledPreemptionRegisterData = (1 << 2) | (((1 << 1) | (1 << 2)) << 16);
constexpr uint32_t threadGroupPreemptionRegisterData = (1 << 1) | (((1 << 1) | (1 << 2)) << 16);
// MMIO programming of 1st disabled preemption command list: initial->disabled
MI_LOAD_REGISTER_IMM *lriCmd = nullptr;
auto itorLri = find<MI_LOAD_REGISTER_IMM *>(itor, cmdList.end());
if (preemptionCmdProgramming) {
EXPECT_NE(itorLri, cmdList.end());
//Initial cmdQ preemption
lriCmd = static_cast<MI_LOAD_REGISTER_IMM *>(*itorLri);
EXPECT_EQ(0x2580u, lriCmd->getRegisterOffset());
EXPECT_EQ(registerOffset, lriCmd->getRegisterOffset());
EXPECT_EQ(disabledPreemptionRegisterData, lriCmd->getDataDword());
// verify presence of sync PIPE_CONTROL just before LRI switching preemption
auto itorPipeControl = find<PIPE_CONTROL *>(itor, itorLri);
if (commandQueue->preemptionCmdSyncProgramming) {
EXPECT_NE(itorPipeControl, itorLri);
} else {
EXPECT_EQ(itorPipeControl, itorLri);
}
itor = itorLri;
} else {
EXPECT_EQ(itorLri, cmdList.end());
}
uint32_t data = 0;
//next should be BB_START to 1st Disabled preemption Cmd List
// next should be BB_START to 1st disabled preemption Cmd List
auto itorBBStart = find<MI_BATCH_BUFFER_START *>(itor, cmdList.end());
EXPECT_NE(itorBBStart, cmdList.end());
itor = itorBBStart;
// MMIO programming of thread-group preemption command list: disabled->thread-group
itorLri = find<MI_LOAD_REGISTER_IMM *>(itor, cmdList.end());
if (preemptionCmdProgramming) {
EXPECT_NE(itorLri, cmdList.end());
lriCmd = static_cast<MI_LOAD_REGISTER_IMM *>(*itorLri);
EXPECT_EQ(0x2580u, lriCmd->getRegisterOffset());
data = (1 << 1) | (((1 << 1) | (1 << 2)) << 16);
EXPECT_EQ(data, lriCmd->getDataDword());
EXPECT_EQ(registerOffset, lriCmd->getRegisterOffset());
EXPECT_EQ(threadGroupPreemptionRegisterData, lriCmd->getDataDword());
//verify presence of sync PIPE_CONTROL just before LRI switching to thread-group
// verify presence of sync PIPE_CONTROL just before LRI switching preemption
auto itorPipeControl = find<PIPE_CONTROL *>(itor, itorLri);
if (commandQueue->preemptionCmdSyncProgramming) {
auto itorPipeControl = find<PIPE_CONTROL *>(itor, itorLri);
EXPECT_NE(itorPipeControl, cmdList.end());
EXPECT_NE(itorPipeControl, itorLri);
} else {
EXPECT_EQ(itorPipeControl, itorLri);
}
itor = itorLri;
@ -741,23 +772,25 @@ void CommandQueueExecuteCommandLists::twoCommandListCommandPreemptionTest(bool p
EXPECT_EQ(itorLri, cmdList.end());
}
//start of thread-group command list
// start of thread-group preemption Cmd List
itorBBStart = find<MI_BATCH_BUFFER_START *>(itor, cmdList.end());
EXPECT_NE(itorBBStart, cmdList.end());
itor = itorBBStart;
// MMIO programming of 2nd disabled preemption command list: thread-group->disabled
itorLri = find<MI_LOAD_REGISTER_IMM *>(itor, cmdList.end());
if (preemptionCmdProgramming) {
EXPECT_NE(itorLri, cmdList.end());
lriCmd = static_cast<MI_LOAD_REGISTER_IMM *>(*itorLri);
EXPECT_EQ(0x2580u, lriCmd->getRegisterOffset());
data = (1 << 2) | (((1 << 1) | (1 << 2)) << 16);
EXPECT_EQ(data, lriCmd->getDataDword());
EXPECT_EQ(registerOffset, lriCmd->getRegisterOffset());
EXPECT_EQ(disabledPreemptionRegisterData, lriCmd->getDataDword());
//verify presence of sync PIPE_CONTROL just before LRI switching to thread-group
// verify presence of sync PIPE_CONTROL just before LRI switching preemption
auto itorPipeControl = find<PIPE_CONTROL *>(itor, itorLri);
if (commandQueue->preemptionCmdSyncProgramming) {
auto itorPipeControl = find<PIPE_CONTROL *>(itor, itorLri);
EXPECT_NE(itorPipeControl, cmdList.end());
EXPECT_NE(itorPipeControl, itorLri);
} else {
EXPECT_EQ(itorPipeControl, itorLri);
}
itor = itorLri;
@ -765,29 +798,114 @@ void CommandQueueExecuteCommandLists::twoCommandListCommandPreemptionTest(bool p
EXPECT_EQ(itorLri, cmdList.end());
}
//start of thread-group command list
// start of 2nd disabled preemption command list
itorBBStart = find<MI_BATCH_BUFFER_START *>(itor, cmdList.end());
EXPECT_NE(itorBBStart, cmdList.end());
itor = itorBBStart;
// BB end
auto itorBBEnd = find<MI_BATCH_BUFFER_START *>(itor, cmdList.end());
// BB end or ULLS BB start
if (currentCsr->isDirectSubmissionEnabled()) {
itorBBStart = find<MI_BATCH_BUFFER_START *>(itor, cmdList.end());
EXPECT_NE(itorBBStart, cmdList.end());
itor = itorBBStart;
} else {
auto itorBBEnd = find<MI_BATCH_BUFFER_END *>(itor, cmdList.end());
EXPECT_NE(itorBBEnd, cmdList.end());
itor = itorBBEnd;
}
GenCmdList::iterator firstExecListItor = itor;
// second execution of command lists:
// BB_START to 1st disabled preemption Cmd List
itorBBStart = find<MI_BATCH_BUFFER_START *>(itor, cmdList.end());
EXPECT_NE(itorBBStart, cmdList.end());
// no MMIO programming prior 1st disabled cmd list, since command queue retains disabled preemption state
itorLri = find<MI_LOAD_REGISTER_IMM *>(itor, itorBBStart);
EXPECT_EQ(itorLri, itorBBStart);
itor = itorBBStart;
// MMIO programming of thread-group preemption command list: disabled->thread-group
itorLri = find<MI_LOAD_REGISTER_IMM *>(itor, cmdList.end());
if (preemptionCmdProgramming) {
EXPECT_NE(itorLri, cmdList.end());
lriCmd = static_cast<MI_LOAD_REGISTER_IMM *>(*itorLri);
EXPECT_EQ(registerOffset, lriCmd->getRegisterOffset());
EXPECT_EQ(threadGroupPreemptionRegisterData, lriCmd->getDataDword());
// verify presence of sync PIPE_CONTROL just before LRI switching preemption
auto itorPipeControl = find<PIPE_CONTROL *>(itor, itorLri);
if (commandQueue->preemptionCmdSyncProgramming) {
EXPECT_NE(itorPipeControl, itorLri);
} else {
EXPECT_EQ(itorPipeControl, itorLri);
}
itor = itorLri;
} else {
EXPECT_EQ(itorLri, cmdList.end());
}
// start of thread-group preemption Cmd List
itorBBStart = find<MI_BATCH_BUFFER_START *>(itor, cmdList.end());
EXPECT_NE(itorBBStart, cmdList.end());
itor = itorBBStart;
// MMIO programming of 2nd disabled preemption command list: thread-group->disabled
itorLri = find<MI_LOAD_REGISTER_IMM *>(itor, cmdList.end());
if (preemptionCmdProgramming) {
EXPECT_NE(itorLri, cmdList.end());
lriCmd = static_cast<MI_LOAD_REGISTER_IMM *>(*itorLri);
EXPECT_EQ(registerOffset, lriCmd->getRegisterOffset());
EXPECT_EQ(disabledPreemptionRegisterData, lriCmd->getDataDword());
// verify presence of sync PIPE_CONTROL just before LRI switching preemption
auto itorPipeControl = find<PIPE_CONTROL *>(itor, itorLri);
if (commandQueue->preemptionCmdSyncProgramming) {
EXPECT_NE(itorPipeControl, itorLri);
} else {
EXPECT_EQ(itorPipeControl, itorLri);
}
itor = itorLri;
} else {
EXPECT_EQ(itorLri, cmdList.end());
}
// start of 2nd disabled preemption command list
itorBBStart = find<MI_BATCH_BUFFER_START *>(itor, cmdList.end());
EXPECT_NE(itorBBStart, cmdList.end());
itor = itorBBStart;
// BB end or ULLS BB start
if (currentCsr->isDirectSubmissionEnabled()) {
itorBBStart = find<MI_BATCH_BUFFER_START *>(itor, cmdList.end());
EXPECT_NE(itorBBStart, cmdList.end());
} else {
auto itorBBEnd = find<MI_BATCH_BUFFER_END *>(itor, cmdList.end());
EXPECT_NE(itorBBEnd, cmdList.end());
}
auto allCsrCmds = NEO::UnitTestHelper<FamilyType>::findAllMidThreadPreemptionAllocationCommand(cmdList.begin(), cmdList.end());
auto allStateSips = findAll<STATE_SIP *>(cmdList.begin(), cmdList.end());
if (preemptionMode == NEO::PreemptionMode::MidThread) {
EXPECT_EQ(1u, allStateSips.size());
EXPECT_EQ(1u, allCsrCmds.size());
} else {
EXPECT_EQ(0u, allStateSips.size());
EXPECT_EQ(0u, allCsrCmds.size());
}
auto firstExecMmioCount = countMmio<FamilyType>(cmdList.begin(), itorBBEnd, 0x2580u);
size_t expectedMmioCount = preemptionCmdProgramming ? 4u : 0u;
auto firstExecMmioCount = countMmio<FamilyType>(cmdList.begin(), firstExecListItor, registerOffset);
size_t expectedMmioCount = preemptionCmdProgramming ? 3u : 0u;
EXPECT_EQ(expectedMmioCount, firstExecMmioCount);
// Count next MMIOs for preemption - only two should be present as last cmdlist from 1st exec
// and first cmdlist from 2nd exec has the same mode - cmdQ state should remember it
auto secondExecMmioCount = countMmio<FamilyType>(itorBBEnd, cmdList.end(), 0x2580u);
auto secondExecMmioCount = countMmio<FamilyType>(firstExecListItor, cmdList.end(), registerOffset);
expectedMmioCount = preemptionCmdProgramming ? 2u : 0u;
EXPECT_EQ(expectedMmioCount, secondExecMmioCount);
@ -804,6 +922,60 @@ HWTEST2_F(CommandQueueExecuteCommandLists, GivenCmdListsWithDifferentPreemptionM
twoCommandListCommandPreemptionTest<FamilyType>(true);
}
HWTEST_F(CommandQueueExecuteCommandLists, GivenCopyCommandQueueWhenExecutingCopyCommandListThenExpectNoPreemptionProgramming) {
using STATE_SIP = typename FamilyType::STATE_SIP;
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
constexpr uint32_t preemptionRegisterOffset = 0x2580;
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Copy, 0u, returnValue));
ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue);
// force command list to have preemption state to verify this state is not used during execution
commandList->commandListPreemptionMode = NEO::PreemptionMode::MidThread;
auto currentCsr = neoDevice->getDefaultEngine().commandStreamReceiver;
EXPECT_EQ(NEO::PreemptionMode::Initial, currentCsr->getPreemptionMode());
const ze_command_queue_desc_t desc{};
auto commandQueue = whiteboxCast(CommandQueue::create(productFamily,
device,
currentCsr,
&desc,
true,
false,
returnValue));
ASSERT_NE(nullptr, commandQueue);
ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue);
EXPECT_TRUE(commandQueue->peekIsCopyOnlyCommandQueue());
zet_command_list_handle_t cmdListHandle = commandList->toHandle();
returnValue = commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue);
size_t usedSpaceAfter = commandQueue->commandStream->getUsed();
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList,
commandQueue->commandStream->getCpuBase(),
usedSpaceAfter));
size_t preemptionMmioCount = countMmio<FamilyType>(cmdList.begin(), cmdList.end(), preemptionRegisterOffset);
constexpr size_t expectedMmioCount = 0;
EXPECT_EQ(expectedMmioCount, preemptionMmioCount);
auto allStateSips = findAll<STATE_SIP *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(0u, allStateSips.size());
auto allCsrCmds = NEO::UnitTestHelper<FamilyType>::findAllMidThreadPreemptionAllocationCommand(cmdList.begin(), cmdList.end());
EXPECT_EQ(0u, allCsrCmds.size());
EXPECT_EQ(NEO::PreemptionMode::Initial, currentCsr->getPreemptionMode());
commandQueue->destroy();
}
struct CommandQueueExecuteCommandListSWTagsTests : public Test<DeviceFixture> {
void SetUp() override {
DebugManager.flags.EnableSWTags.set(true);

View File

@ -183,5 +183,80 @@ HWTEST2_F(CommandQueueExecuteCommandListsSimpleTest, givenTwoCommandQueuesUsingS
commandQueue2->destroy();
}
using IsMmioPreemptionUsed = IsWithinGfxCore<IGFX_GEN9_CORE, IGFX_XE_HPC_CORE>;
HWTEST2_F(CommandQueueExecuteCommandListsSimpleTest, givenTwoCommandQueuesUsingSingleCsrWhenExecutingFirstTimeOnBothQueuesThenPreemptionModeIsProgrammedOnce, IsMmioPreemptionUsed) {
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
ze_result_t returnValue;
ze_command_list_handle_t commandList = CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle();
ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue);
CommandList::fromHandle(commandList)->commandListPreemptionMode = NEO::PreemptionMode::ThreadGroup;
ze_command_queue_desc_t queueDesc = {};
queueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
auto commandQueue = whiteboxCast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc, false, false, returnValue));
ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue);
ASSERT_NE(nullptr, commandQueue->commandStream);
auto usedSpaceBefore = commandQueue->commandStream->getUsed();
returnValue = commandQueue->executeCommandLists(1, &commandList, nullptr, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue);
auto usedSpaceAfter = commandQueue->commandStream->getUsed();
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList,
ptrOffset(commandQueue->commandStream->getCpuBase(), usedSpaceBefore),
usedSpaceAfter - usedSpaceBefore));
auto loadRegisterImmList = findAll<MI_LOAD_REGISTER_IMM *>(cmdList.begin(), cmdList.end());
size_t foundPreemptionMmioCount = 0;
for (auto it : loadRegisterImmList) {
auto cmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(*it);
if (cmd->getRegisterOffset() == 0x2580) {
foundPreemptionMmioCount++;
}
}
constexpr size_t expectedFirstPreemptionMmioCount = 1u;
EXPECT_EQ(expectedFirstPreemptionMmioCount, foundPreemptionMmioCount);
cmdList.clear();
foundPreemptionMmioCount = 0;
auto commandQueue2 = whiteboxCast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc, false, false, returnValue));
ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue);
ASSERT_NE(nullptr, commandQueue2->commandStream);
usedSpaceBefore = commandQueue2->commandStream->getUsed();
returnValue = commandQueue2->executeCommandLists(1, &commandList, nullptr, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue);
usedSpaceAfter = commandQueue2->commandStream->getUsed();
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList,
ptrOffset(commandQueue2->commandStream->getCpuBase(), usedSpaceBefore),
usedSpaceAfter - usedSpaceBefore));
loadRegisterImmList = findAll<MI_LOAD_REGISTER_IMM *>(cmdList.begin(), cmdList.end());
for (auto it : loadRegisterImmList) {
auto cmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(*it);
if (cmd->getRegisterOffset() == 0x2580) {
foundPreemptionMmioCount++;
}
}
constexpr size_t expectedSecondPreemptionMmioCount = 0u;
EXPECT_EQ(expectedSecondPreemptionMmioCount, foundPreemptionMmioCount);
CommandList::fromHandle(commandList)->destroy();
commandQueue->destroy();
commandQueue2->destroy();
}
} // namespace ult
} // namespace L0

View File

@ -348,6 +348,14 @@ class CommandStreamReceiver {
isPreambleSent = value;
}
PreemptionMode getPreemptionMode() const {
return lastPreemptionMode;
}
void setPreemptionMode(PreemptionMode value) {
lastPreemptionMode = value;
}
protected:
void cleanupResources();
void printDeviceIndex();

View File

@ -7,6 +7,7 @@
#pragma once
#include "shared/source/helpers/aux_translation.h"
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
#include <cstddef>
#include <vector>
@ -82,6 +83,10 @@ struct UnitTestHelper {
static bool timestampRegisterHighAddress();
static void validateSbaMocs(uint32_t expectedMocs, CommandStreamReceiver &csr);
static GenCmdList::iterator findMidThreadPreemptionAllocationCommand(GenCmdList::iterator begin, GenCmdList::iterator end);
static std::vector<GenCmdList::iterator> findAllMidThreadPreemptionAllocationCommand(GenCmdList::iterator begin, GenCmdList::iterator end);
};
} // namespace NEO

View File

@ -77,4 +77,14 @@ inline bool UnitTestHelper<GfxFamily>::getWorkloadPartitionForStoreRegisterMemCm
return false;
}
template <typename GfxFamily>
GenCmdList::iterator UnitTestHelper<GfxFamily>::findMidThreadPreemptionAllocationCommand(GenCmdList::iterator begin, GenCmdList::iterator end) {
return find<typename GfxFamily::GPGPU_CSR_BASE_ADDRESS *>(begin, end);
}
template <typename GfxFamily>
std::vector<GenCmdList::iterator> UnitTestHelper<GfxFamily>::findAllMidThreadPreemptionAllocationCommand(GenCmdList::iterator begin, GenCmdList::iterator end) {
return findAll<typename GfxFamily::GPGPU_CSR_BASE_ADDRESS *>(begin, end);
}
} // namespace NEO

View File

@ -105,4 +105,15 @@ inline bool UnitTestHelper<GfxFamily>::getWorkloadPartitionForStoreRegisterMemCm
return storeRegisterMem.getWorkloadPartitionIdOffsetEnable();
}
template <typename GfxFamily>
GenCmdList::iterator UnitTestHelper<GfxFamily>::findMidThreadPreemptionAllocationCommand(GenCmdList::iterator begin, GenCmdList::iterator end) {
return end;
}
template <typename GfxFamily>
std::vector<GenCmdList::iterator> UnitTestHelper<GfxFamily>::findAllMidThreadPreemptionAllocationCommand(GenCmdList::iterator begin, GenCmdList::iterator end) {
std::vector<GenCmdList::iterator> emptyList;
return emptyList;
}
} // namespace NEO

View File

@ -2103,3 +2103,11 @@ TEST_F(CommandStreamReceiverTest, givenPreambleFlagIsSetWhenGettingFlagStateThen
commandStreamReceiver->setPreambleSetFlag(true);
EXPECT_TRUE(commandStreamReceiver->getPreambleSetFlag());
}
TEST_F(CommandStreamReceiverTest, givenPreemptionSentIsInitialWhenSettingPreemptionToNewModeThenExpectCorrectPreemption) {
PreemptionMode mode = PreemptionMode::Initial;
EXPECT_EQ(mode, commandStreamReceiver->getPreemptionMode());
mode = PreemptionMode::ThreadGroup;
commandStreamReceiver->setPreemptionMode(mode);
EXPECT_EQ(mode, commandStreamReceiver->getPreemptionMode());
}