mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Limit preemption programming in level zero command queues
When multiple command queues use the same context and retain the same state No preemption programming for copy command queues Program preemption preamble only for mid thread preemption Related-To: NEO-7187 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
17f22990e6
commit
0c301e5e99
@ -51,14 +51,9 @@ struct CommandQueue : _ze_command_queue_handle_t {
|
||||
|
||||
ze_command_queue_handle_t toHandle() { return this; }
|
||||
|
||||
void setCommandQueuePreemptionMode(NEO::PreemptionMode newPreemptionMode) {
|
||||
commandQueuePreemptionMode = newPreemptionMode;
|
||||
}
|
||||
|
||||
bool peekIsCopyOnlyCommandQueue() const { return this->isCopyOnlyCommandQueue; }
|
||||
|
||||
protected:
|
||||
NEO::PreemptionMode commandQueuePreemptionMode = NEO::PreemptionMode::Initial;
|
||||
uint32_t partitionCount = 1;
|
||||
uint32_t activeSubDevices = 1;
|
||||
bool preemptionCmdSyncProgramming = true;
|
||||
|
@ -124,26 +124,23 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
|
||||
|
||||
NEO::Device *neoDevice = device->getNEODevice();
|
||||
auto devicePreemption = device->getDevicePreemptionMode();
|
||||
const bool initialPreemptionMode = commandQueuePreemptionMode == NEO::PreemptionMode::Initial;
|
||||
NEO::PreemptionMode cmdQueuePreemption = commandQueuePreemptionMode;
|
||||
if (initialPreemptionMode) {
|
||||
cmdQueuePreemption = devicePreemption;
|
||||
}
|
||||
NEO::PreemptionMode statePreemption = cmdQueuePreemption;
|
||||
auto contextPreemptionMode = csr->getPreemptionMode();
|
||||
const bool initialPreemptionMode = contextPreemptionMode == NEO::PreemptionMode::Initial;
|
||||
NEO::PreemptionMode statePreemption = contextPreemptionMode;
|
||||
|
||||
const bool stateSipRequired = (initialPreemptionMode && devicePreemption == NEO::PreemptionMode::MidThread) ||
|
||||
(neoDevice->getDebugger() && NEO::Debugger::isDebugEnabled(internalUsage));
|
||||
|
||||
if (initialPreemptionMode) {
|
||||
preemptionSize += NEO::PreemptionHelper::getRequiredPreambleSize<GfxFamily>(*neoDevice);
|
||||
}
|
||||
if (!isCopyOnlyCommandQueue) {
|
||||
if (initialPreemptionMode) {
|
||||
preemptionSize += NEO::PreemptionHelper::getRequiredPreambleSize<GfxFamily>(*neoDevice);
|
||||
}
|
||||
|
||||
if (stateSipRequired) {
|
||||
preemptionSize += NEO::PreemptionHelper::getRequiredStateSipCmdSize<GfxFamily>(*neoDevice, csr->isRcs());
|
||||
if (stateSipRequired) {
|
||||
preemptionSize += NEO::PreemptionHelper::getRequiredStateSipCmdSize<GfxFamily>(*neoDevice, csr->isRcs());
|
||||
}
|
||||
}
|
||||
|
||||
preemptionSize += NEO::PreemptionHelper::getRequiredCmdStreamSize<GfxFamily>(devicePreemption, commandQueuePreemptionMode);
|
||||
|
||||
if (NEO::Debugger::isDebugEnabled(internalUsage) && !commandQueueDebugCmdsProgrammed) {
|
||||
if (neoDevice->getSourceLevelDebugger() != nullptr) {
|
||||
debuggerCmdsSize += NEO::PreambleHelper<GfxFamily>::getKernelDebuggingCommandsSize(true);
|
||||
@ -184,25 +181,27 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
|
||||
|
||||
totalCmdBuffers += commandList->commandContainer.getCmdBufferAllocations().size();
|
||||
spaceForResidency += commandList->commandContainer.getResidencyContainer().size();
|
||||
auto commandListPreemption = commandList->getCommandListPreemptionMode();
|
||||
if (statePreemption != commandListPreemption) {
|
||||
if (preemptionCmdSyncProgramming) {
|
||||
preemptionSize += NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForSingleBarrier();
|
||||
if (!isCopyOnlyCommandQueue) {
|
||||
auto commandListPreemption = commandList->getCommandListPreemptionMode();
|
||||
if (statePreemption != commandListPreemption) {
|
||||
if (preemptionCmdSyncProgramming) {
|
||||
preemptionSize += NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForSingleBarrier();
|
||||
}
|
||||
preemptionSize += NEO::PreemptionHelper::getRequiredCmdStreamSize<GfxFamily>(commandListPreemption, statePreemption);
|
||||
statePreemption = commandListPreemption;
|
||||
}
|
||||
preemptionSize += NEO::PreemptionHelper::getRequiredCmdStreamSize<GfxFamily>(commandListPreemption, statePreemption);
|
||||
statePreemption = commandListPreemption;
|
||||
}
|
||||
|
||||
perThreadScratchSpaceSize = std::max(perThreadScratchSpaceSize, commandList->getCommandListPerThreadScratchSize());
|
||||
perThreadScratchSpaceSize = std::max(perThreadScratchSpaceSize, commandList->getCommandListPerThreadScratchSize());
|
||||
|
||||
perThreadPrivateScratchSize = std::max(perThreadPrivateScratchSize, commandList->getCommandListPerThreadPrivateScratchSize());
|
||||
perThreadPrivateScratchSize = std::max(perThreadPrivateScratchSize, commandList->getCommandListPerThreadPrivateScratchSize());
|
||||
|
||||
if (commandList->getCommandListPerThreadScratchSize() != 0 || commandList->getCommandListPerThreadPrivateScratchSize() != 0) {
|
||||
if (commandList->commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE) != nullptr) {
|
||||
heapContainer.push_back(commandList->commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE)->getGraphicsAllocation());
|
||||
}
|
||||
for (auto element : commandList->commandContainer.sshAllocations) {
|
||||
heapContainer.push_back(element);
|
||||
if (commandList->getCommandListPerThreadScratchSize() != 0 || commandList->getCommandListPerThreadPrivateScratchSize() != 0) {
|
||||
if (commandList->commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE) != nullptr) {
|
||||
heapContainer.push_back(commandList->commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE)->getGraphicsAllocation());
|
||||
}
|
||||
for (auto element : commandList->commandContainer.sshAllocations) {
|
||||
heapContainer.push_back(element);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -343,15 +342,6 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
|
||||
NEO::PreemptionHelper::programStateSip<GfxFamily>(child, *neoDevice, csr->getLogicalStateHelper());
|
||||
}
|
||||
|
||||
if (cmdQueuePreemption != commandQueuePreemptionMode) {
|
||||
NEO::PreemptionHelper::programCmdStream<GfxFamily>(child,
|
||||
cmdQueuePreemption,
|
||||
commandQueuePreemptionMode,
|
||||
csr->getPreemptionAllocation());
|
||||
}
|
||||
|
||||
statePreemption = cmdQueuePreemption;
|
||||
|
||||
const bool sipKernelUsed = devicePreemption == NEO::PreemptionMode::MidThread ||
|
||||
(neoDevice->getDebugger() != nullptr && NEO::Debugger::isDebugEnabled(internalUsage));
|
||||
|
||||
@ -383,33 +373,35 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
|
||||
csr->getLogicalStateHelper()->writeStreamInline(child, false);
|
||||
}
|
||||
|
||||
statePreemption = contextPreemptionMode;
|
||||
|
||||
for (auto i = 0u; i < numCommandLists; ++i) {
|
||||
auto commandList = CommandList::fromHandle(phCommandLists[i]);
|
||||
auto &cmdBufferAllocations = commandList->commandContainer.getCmdBufferAllocations();
|
||||
auto cmdBufferCount = cmdBufferAllocations.size();
|
||||
bool immediateMode = (commandList->cmdListType == CommandList::CommandListType::TYPE_IMMEDIATE) ? true : false;
|
||||
|
||||
auto commandListPreemption = commandList->getCommandListPreemptionMode();
|
||||
if (statePreemption != commandListPreemption) {
|
||||
if (NEO::DebugManager.flags.EnableSWTags.get()) {
|
||||
neoDevice->getRootDeviceEnvironment().tagsManager->insertTag<GfxFamily, NEO::SWTags::PipeControlReasonTag>(
|
||||
child,
|
||||
*neoDevice,
|
||||
"ComandList Preemption Mode update", 0u);
|
||||
}
|
||||
|
||||
if (preemptionCmdSyncProgramming) {
|
||||
NEO::PipeControlArgs args;
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(child, args);
|
||||
}
|
||||
NEO::PreemptionHelper::programCmdStream<GfxFamily>(child,
|
||||
commandListPreemption,
|
||||
statePreemption,
|
||||
csr->getPreemptionAllocation());
|
||||
statePreemption = commandListPreemption;
|
||||
}
|
||||
|
||||
if (!isCopyOnlyCommandQueue) {
|
||||
auto commandListPreemption = commandList->getCommandListPreemptionMode();
|
||||
if (statePreemption != commandListPreemption) {
|
||||
if (NEO::DebugManager.flags.EnableSWTags.get()) {
|
||||
neoDevice->getRootDeviceEnvironment().tagsManager->insertTag<GfxFamily, NEO::SWTags::PipeControlReasonTag>(
|
||||
child,
|
||||
*neoDevice,
|
||||
"ComandList Preemption Mode update", 0u);
|
||||
}
|
||||
|
||||
if (preemptionCmdSyncProgramming) {
|
||||
NEO::PipeControlArgs args;
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(child, args);
|
||||
}
|
||||
NEO::PreemptionHelper::programCmdStream<GfxFamily>(child,
|
||||
commandListPreemption,
|
||||
statePreemption,
|
||||
csr->getPreemptionAllocation());
|
||||
statePreemption = commandListPreemption;
|
||||
}
|
||||
|
||||
bool programVfe = frontEndStateDirty;
|
||||
if (isPatchingVfeStateAllowed) {
|
||||
auto &requiredStreamState = commandList->getRequiredStreamState();
|
||||
@ -453,7 +445,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
|
||||
NEO::PreemptionHelper::programStateSipEndWa<GfxFamily>(child, *neoDevice);
|
||||
}
|
||||
|
||||
commandQueuePreemptionMode = statePreemption;
|
||||
csr->setPreemptionMode(statePreemption);
|
||||
|
||||
if (hFence) {
|
||||
fence = Fence::fromHandle(hFence);
|
||||
|
@ -33,7 +33,6 @@ struct WhiteBox<::L0::CommandQueue> : public ::L0::CommandQueueImp {
|
||||
using BaseClass::synchronizeByPollingForTaskCount;
|
||||
using BaseClass::taskCount;
|
||||
using CommandQueue::activeSubDevices;
|
||||
using CommandQueue::commandQueuePreemptionMode;
|
||||
using CommandQueue::internalUsage;
|
||||
using CommandQueue::partitionCount;
|
||||
|
||||
|
@ -122,7 +122,6 @@ HWTEST_TEMPLATED_F(AubCsrTest, givenAubCsrWhenCallingExecuteCommandListsThenPoll
|
||||
|
||||
auto aubCsr = static_cast<NEO::UltAubCommandStreamReceiver<FamilyType> *>(csr);
|
||||
CommandQueue *queue = static_cast<CommandQueue *>(L0::CommandQueue::fromHandle(commandQueue));
|
||||
queue->setCommandQueuePreemptionMode(PreemptionMode::Disabled);
|
||||
EXPECT_EQ(aubCsr->pollForCompletionCalled, 0u);
|
||||
|
||||
std::unique_ptr<L0::CommandList> commandList(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::Compute, 0u, returnValue));
|
||||
|
@ -407,13 +407,16 @@ HWTEST_F(CommandQueueExecuteCommandLists, givenMidThreadPreemptionWhenCommandsAr
|
||||
desc.index = 0u;
|
||||
desc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL;
|
||||
|
||||
auto currentCsr = neoDevice->getDefaultEngine().commandStreamReceiver;
|
||||
|
||||
std::array<bool, 2> testedInternalFlags = {true, false};
|
||||
|
||||
for (auto flagInternal : testedInternalFlags) {
|
||||
ze_result_t returnValue;
|
||||
currentCsr->setPreemptionMode(NEO::PreemptionMode::Initial);
|
||||
auto commandQueue = whiteboxCast(CommandQueue::create(productFamily,
|
||||
device,
|
||||
neoDevice->getDefaultEngine().commandStreamReceiver,
|
||||
currentCsr,
|
||||
&desc,
|
||||
false,
|
||||
flagInternal,
|
||||
@ -458,13 +461,16 @@ HWTEST2_F(CommandQueueExecuteCommandLists, givenMidThreadPreemptionWhenCommandsA
|
||||
desc.index = 0u;
|
||||
desc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL;
|
||||
|
||||
auto currentCsr = neoDevice->getDefaultEngine().commandStreamReceiver;
|
||||
|
||||
std::array<bool, 2> testedInternalFlags = {true, false};
|
||||
|
||||
for (auto flagInternal : testedInternalFlags) {
|
||||
ze_result_t returnValue;
|
||||
currentCsr->setPreemptionMode(NEO::PreemptionMode::Initial);
|
||||
auto commandQueue = whiteboxCast(CommandQueue::create(productFamily,
|
||||
device,
|
||||
neoDevice->getDefaultEngine().commandStreamReceiver,
|
||||
currentCsr,
|
||||
&desc,
|
||||
false,
|
||||
flagInternal,
|
||||
@ -645,10 +651,12 @@ void CommandQueueExecuteCommandLists::twoCommandListCommandPreemptionTest(bool p
|
||||
desc.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC;
|
||||
desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
|
||||
|
||||
auto currentCsr = neoDevice->getDefaultEngine().commandStreamReceiver;
|
||||
|
||||
ze_result_t returnValue;
|
||||
auto commandQueue = whiteboxCast(CommandQueue::create(
|
||||
productFamily,
|
||||
device, neoDevice->getDefaultEngine().commandStreamReceiver, &desc, false, false, returnValue));
|
||||
device, currentCsr, &desc, false, false, returnValue));
|
||||
ASSERT_NE(nullptr, commandQueue->commandStream);
|
||||
commandQueue->preemptionCmdSyncProgramming = preemptionCmdProgramming;
|
||||
preemptionCmdProgramming = NEO::PreemptionHelper::getRequiredCmdStreamSize<FamilyType>(NEO::PreemptionMode::ThreadGroup, NEO::PreemptionMode::Disabled) > 0u;
|
||||
@ -670,12 +678,12 @@ void CommandQueueExecuteCommandLists::twoCommandListCommandPreemptionTest(bool p
|
||||
result = commandQueue->synchronize(0);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_EQ(NEO::PreemptionMode::Disabled, commandQueue->commandQueuePreemptionMode);
|
||||
EXPECT_EQ(NEO::PreemptionMode::Disabled, currentCsr->getPreemptionMode());
|
||||
|
||||
result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_EQ(NEO::PreemptionMode::Disabled, commandQueue->commandQueuePreemptionMode);
|
||||
EXPECT_EQ(NEO::PreemptionMode::Disabled, currentCsr->getPreemptionMode());
|
||||
|
||||
auto usedSpaceAfter = commandQueue->commandStream->getUsed();
|
||||
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
|
||||
@ -693,7 +701,16 @@ void CommandQueueExecuteCommandLists::twoCommandListCommandPreemptionTest(bool p
|
||||
auto preemptionMode = neoDevice->getPreemptionMode();
|
||||
GenCmdList::iterator itor = cmdList.begin();
|
||||
|
||||
GenCmdList::iterator itorStateSip = find<STATE_SIP *>(cmdList.begin(), cmdList.end());
|
||||
GenCmdList::iterator itorCsrCmd = NEO::UnitTestHelper<FamilyType>::findMidThreadPreemptionAllocationCommand(cmdList.begin(), cmdList.end());
|
||||
if (preemptionMode == NEO::PreemptionMode::MidThread) {
|
||||
EXPECT_NE(itorCsrCmd, cmdList.end());
|
||||
|
||||
itor = itorCsrCmd;
|
||||
} else {
|
||||
EXPECT_EQ(itorCsrCmd, cmdList.end());
|
||||
}
|
||||
|
||||
GenCmdList::iterator itorStateSip = find<STATE_SIP *>(itor, cmdList.end());
|
||||
if (preemptionMode == NEO::PreemptionMode::MidThread) {
|
||||
EXPECT_NE(itorStateSip, cmdList.end());
|
||||
|
||||
@ -702,38 +719,52 @@ void CommandQueueExecuteCommandLists::twoCommandListCommandPreemptionTest(bool p
|
||||
EXPECT_EQ(itorStateSip, cmdList.end());
|
||||
}
|
||||
|
||||
constexpr uint32_t registerOffset = 0x2580;
|
||||
constexpr uint32_t disabledPreemptionRegisterData = (1 << 2) | (((1 << 1) | (1 << 2)) << 16);
|
||||
constexpr uint32_t threadGroupPreemptionRegisterData = (1 << 1) | (((1 << 1) | (1 << 2)) << 16);
|
||||
|
||||
// MMIO programming of 1st disabled preemption command list: initial->disabled
|
||||
MI_LOAD_REGISTER_IMM *lriCmd = nullptr;
|
||||
auto itorLri = find<MI_LOAD_REGISTER_IMM *>(itor, cmdList.end());
|
||||
if (preemptionCmdProgramming) {
|
||||
EXPECT_NE(itorLri, cmdList.end());
|
||||
//Initial cmdQ preemption
|
||||
lriCmd = static_cast<MI_LOAD_REGISTER_IMM *>(*itorLri);
|
||||
EXPECT_EQ(0x2580u, lriCmd->getRegisterOffset());
|
||||
EXPECT_EQ(registerOffset, lriCmd->getRegisterOffset());
|
||||
EXPECT_EQ(disabledPreemptionRegisterData, lriCmd->getDataDword());
|
||||
|
||||
// verify presence of sync PIPE_CONTROL just before LRI switching preemption
|
||||
auto itorPipeControl = find<PIPE_CONTROL *>(itor, itorLri);
|
||||
if (commandQueue->preemptionCmdSyncProgramming) {
|
||||
EXPECT_NE(itorPipeControl, itorLri);
|
||||
} else {
|
||||
EXPECT_EQ(itorPipeControl, itorLri);
|
||||
}
|
||||
|
||||
itor = itorLri;
|
||||
} else {
|
||||
EXPECT_EQ(itorLri, cmdList.end());
|
||||
}
|
||||
|
||||
uint32_t data = 0;
|
||||
//next should be BB_START to 1st Disabled preemption Cmd List
|
||||
// next should be BB_START to 1st disabled preemption Cmd List
|
||||
auto itorBBStart = find<MI_BATCH_BUFFER_START *>(itor, cmdList.end());
|
||||
EXPECT_NE(itorBBStart, cmdList.end());
|
||||
itor = itorBBStart;
|
||||
|
||||
// MMIO programming of thread-group preemption command list: disabled->thread-group
|
||||
itorLri = find<MI_LOAD_REGISTER_IMM *>(itor, cmdList.end());
|
||||
if (preemptionCmdProgramming) {
|
||||
EXPECT_NE(itorLri, cmdList.end());
|
||||
|
||||
lriCmd = static_cast<MI_LOAD_REGISTER_IMM *>(*itorLri);
|
||||
EXPECT_EQ(0x2580u, lriCmd->getRegisterOffset());
|
||||
data = (1 << 1) | (((1 << 1) | (1 << 2)) << 16);
|
||||
EXPECT_EQ(data, lriCmd->getDataDword());
|
||||
EXPECT_EQ(registerOffset, lriCmd->getRegisterOffset());
|
||||
EXPECT_EQ(threadGroupPreemptionRegisterData, lriCmd->getDataDword());
|
||||
|
||||
//verify presence of sync PIPE_CONTROL just before LRI switching to thread-group
|
||||
// verify presence of sync PIPE_CONTROL just before LRI switching preemption
|
||||
auto itorPipeControl = find<PIPE_CONTROL *>(itor, itorLri);
|
||||
if (commandQueue->preemptionCmdSyncProgramming) {
|
||||
auto itorPipeControl = find<PIPE_CONTROL *>(itor, itorLri);
|
||||
EXPECT_NE(itorPipeControl, cmdList.end());
|
||||
EXPECT_NE(itorPipeControl, itorLri);
|
||||
} else {
|
||||
EXPECT_EQ(itorPipeControl, itorLri);
|
||||
}
|
||||
|
||||
itor = itorLri;
|
||||
@ -741,23 +772,25 @@ void CommandQueueExecuteCommandLists::twoCommandListCommandPreemptionTest(bool p
|
||||
EXPECT_EQ(itorLri, cmdList.end());
|
||||
}
|
||||
|
||||
//start of thread-group command list
|
||||
// start of thread-group preemption Cmd List
|
||||
itorBBStart = find<MI_BATCH_BUFFER_START *>(itor, cmdList.end());
|
||||
EXPECT_NE(itorBBStart, cmdList.end());
|
||||
itor = itorBBStart;
|
||||
|
||||
// MMIO programming of 2nd disabled preemption command list: thread-group->disabled
|
||||
itorLri = find<MI_LOAD_REGISTER_IMM *>(itor, cmdList.end());
|
||||
if (preemptionCmdProgramming) {
|
||||
EXPECT_NE(itorLri, cmdList.end());
|
||||
lriCmd = static_cast<MI_LOAD_REGISTER_IMM *>(*itorLri);
|
||||
EXPECT_EQ(0x2580u, lriCmd->getRegisterOffset());
|
||||
data = (1 << 2) | (((1 << 1) | (1 << 2)) << 16);
|
||||
EXPECT_EQ(data, lriCmd->getDataDword());
|
||||
EXPECT_EQ(registerOffset, lriCmd->getRegisterOffset());
|
||||
EXPECT_EQ(disabledPreemptionRegisterData, lriCmd->getDataDword());
|
||||
|
||||
//verify presence of sync PIPE_CONTROL just before LRI switching to thread-group
|
||||
// verify presence of sync PIPE_CONTROL just before LRI switching preemption
|
||||
auto itorPipeControl = find<PIPE_CONTROL *>(itor, itorLri);
|
||||
if (commandQueue->preemptionCmdSyncProgramming) {
|
||||
auto itorPipeControl = find<PIPE_CONTROL *>(itor, itorLri);
|
||||
EXPECT_NE(itorPipeControl, cmdList.end());
|
||||
EXPECT_NE(itorPipeControl, itorLri);
|
||||
} else {
|
||||
EXPECT_EQ(itorPipeControl, itorLri);
|
||||
}
|
||||
|
||||
itor = itorLri;
|
||||
@ -765,29 +798,114 @@ void CommandQueueExecuteCommandLists::twoCommandListCommandPreemptionTest(bool p
|
||||
EXPECT_EQ(itorLri, cmdList.end());
|
||||
}
|
||||
|
||||
//start of thread-group command list
|
||||
// start of 2nd disabled preemption command list
|
||||
itorBBStart = find<MI_BATCH_BUFFER_START *>(itor, cmdList.end());
|
||||
EXPECT_NE(itorBBStart, cmdList.end());
|
||||
itor = itorBBStart;
|
||||
|
||||
// BB end
|
||||
auto itorBBEnd = find<MI_BATCH_BUFFER_START *>(itor, cmdList.end());
|
||||
// BB end or ULLS BB start
|
||||
if (currentCsr->isDirectSubmissionEnabled()) {
|
||||
itorBBStart = find<MI_BATCH_BUFFER_START *>(itor, cmdList.end());
|
||||
EXPECT_NE(itorBBStart, cmdList.end());
|
||||
itor = itorBBStart;
|
||||
} else {
|
||||
auto itorBBEnd = find<MI_BATCH_BUFFER_END *>(itor, cmdList.end());
|
||||
EXPECT_NE(itorBBEnd, cmdList.end());
|
||||
itor = itorBBEnd;
|
||||
}
|
||||
|
||||
GenCmdList::iterator firstExecListItor = itor;
|
||||
|
||||
// second execution of command lists:
|
||||
|
||||
// BB_START to 1st disabled preemption Cmd List
|
||||
itorBBStart = find<MI_BATCH_BUFFER_START *>(itor, cmdList.end());
|
||||
EXPECT_NE(itorBBStart, cmdList.end());
|
||||
|
||||
// no MMIO programming prior 1st disabled cmd list, since command queue retains disabled preemption state
|
||||
itorLri = find<MI_LOAD_REGISTER_IMM *>(itor, itorBBStart);
|
||||
EXPECT_EQ(itorLri, itorBBStart);
|
||||
itor = itorBBStart;
|
||||
|
||||
// MMIO programming of thread-group preemption command list: disabled->thread-group
|
||||
itorLri = find<MI_LOAD_REGISTER_IMM *>(itor, cmdList.end());
|
||||
if (preemptionCmdProgramming) {
|
||||
EXPECT_NE(itorLri, cmdList.end());
|
||||
|
||||
lriCmd = static_cast<MI_LOAD_REGISTER_IMM *>(*itorLri);
|
||||
EXPECT_EQ(registerOffset, lriCmd->getRegisterOffset());
|
||||
EXPECT_EQ(threadGroupPreemptionRegisterData, lriCmd->getDataDword());
|
||||
|
||||
// verify presence of sync PIPE_CONTROL just before LRI switching preemption
|
||||
auto itorPipeControl = find<PIPE_CONTROL *>(itor, itorLri);
|
||||
if (commandQueue->preemptionCmdSyncProgramming) {
|
||||
EXPECT_NE(itorPipeControl, itorLri);
|
||||
} else {
|
||||
EXPECT_EQ(itorPipeControl, itorLri);
|
||||
}
|
||||
|
||||
itor = itorLri;
|
||||
} else {
|
||||
EXPECT_EQ(itorLri, cmdList.end());
|
||||
}
|
||||
|
||||
// start of thread-group preemption Cmd List
|
||||
itorBBStart = find<MI_BATCH_BUFFER_START *>(itor, cmdList.end());
|
||||
EXPECT_NE(itorBBStart, cmdList.end());
|
||||
itor = itorBBStart;
|
||||
|
||||
// MMIO programming of 2nd disabled preemption command list: thread-group->disabled
|
||||
itorLri = find<MI_LOAD_REGISTER_IMM *>(itor, cmdList.end());
|
||||
if (preemptionCmdProgramming) {
|
||||
EXPECT_NE(itorLri, cmdList.end());
|
||||
lriCmd = static_cast<MI_LOAD_REGISTER_IMM *>(*itorLri);
|
||||
EXPECT_EQ(registerOffset, lriCmd->getRegisterOffset());
|
||||
EXPECT_EQ(disabledPreemptionRegisterData, lriCmd->getDataDword());
|
||||
|
||||
// verify presence of sync PIPE_CONTROL just before LRI switching preemption
|
||||
auto itorPipeControl = find<PIPE_CONTROL *>(itor, itorLri);
|
||||
if (commandQueue->preemptionCmdSyncProgramming) {
|
||||
EXPECT_NE(itorPipeControl, itorLri);
|
||||
} else {
|
||||
EXPECT_EQ(itorPipeControl, itorLri);
|
||||
}
|
||||
|
||||
itor = itorLri;
|
||||
} else {
|
||||
EXPECT_EQ(itorLri, cmdList.end());
|
||||
}
|
||||
|
||||
// start of 2nd disabled preemption command list
|
||||
itorBBStart = find<MI_BATCH_BUFFER_START *>(itor, cmdList.end());
|
||||
EXPECT_NE(itorBBStart, cmdList.end());
|
||||
itor = itorBBStart;
|
||||
|
||||
// BB end or ULLS BB start
|
||||
if (currentCsr->isDirectSubmissionEnabled()) {
|
||||
itorBBStart = find<MI_BATCH_BUFFER_START *>(itor, cmdList.end());
|
||||
EXPECT_NE(itorBBStart, cmdList.end());
|
||||
} else {
|
||||
auto itorBBEnd = find<MI_BATCH_BUFFER_END *>(itor, cmdList.end());
|
||||
EXPECT_NE(itorBBEnd, cmdList.end());
|
||||
}
|
||||
|
||||
auto allCsrCmds = NEO::UnitTestHelper<FamilyType>::findAllMidThreadPreemptionAllocationCommand(cmdList.begin(), cmdList.end());
|
||||
auto allStateSips = findAll<STATE_SIP *>(cmdList.begin(), cmdList.end());
|
||||
if (preemptionMode == NEO::PreemptionMode::MidThread) {
|
||||
EXPECT_EQ(1u, allStateSips.size());
|
||||
EXPECT_EQ(1u, allCsrCmds.size());
|
||||
} else {
|
||||
EXPECT_EQ(0u, allStateSips.size());
|
||||
EXPECT_EQ(0u, allCsrCmds.size());
|
||||
}
|
||||
|
||||
auto firstExecMmioCount = countMmio<FamilyType>(cmdList.begin(), itorBBEnd, 0x2580u);
|
||||
size_t expectedMmioCount = preemptionCmdProgramming ? 4u : 0u;
|
||||
auto firstExecMmioCount = countMmio<FamilyType>(cmdList.begin(), firstExecListItor, registerOffset);
|
||||
size_t expectedMmioCount = preemptionCmdProgramming ? 3u : 0u;
|
||||
EXPECT_EQ(expectedMmioCount, firstExecMmioCount);
|
||||
|
||||
// Count next MMIOs for preemption - only two should be present as last cmdlist from 1st exec
|
||||
// and first cmdlist from 2nd exec has the same mode - cmdQ state should remember it
|
||||
auto secondExecMmioCount = countMmio<FamilyType>(itorBBEnd, cmdList.end(), 0x2580u);
|
||||
auto secondExecMmioCount = countMmio<FamilyType>(firstExecListItor, cmdList.end(), registerOffset);
|
||||
expectedMmioCount = preemptionCmdProgramming ? 2u : 0u;
|
||||
EXPECT_EQ(expectedMmioCount, secondExecMmioCount);
|
||||
|
||||
@ -804,6 +922,60 @@ HWTEST2_F(CommandQueueExecuteCommandLists, GivenCmdListsWithDifferentPreemptionM
|
||||
twoCommandListCommandPreemptionTest<FamilyType>(true);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandQueueExecuteCommandLists, GivenCopyCommandQueueWhenExecutingCopyCommandListThenExpectNoPreemptionProgramming) {
|
||||
using STATE_SIP = typename FamilyType::STATE_SIP;
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
|
||||
constexpr uint32_t preemptionRegisterOffset = 0x2580;
|
||||
|
||||
ze_result_t returnValue;
|
||||
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Copy, 0u, returnValue));
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
|
||||
// force command list to have preemption state to verify this state is not used during execution
|
||||
commandList->commandListPreemptionMode = NEO::PreemptionMode::MidThread;
|
||||
|
||||
auto currentCsr = neoDevice->getDefaultEngine().commandStreamReceiver;
|
||||
EXPECT_EQ(NEO::PreemptionMode::Initial, currentCsr->getPreemptionMode());
|
||||
|
||||
const ze_command_queue_desc_t desc{};
|
||||
auto commandQueue = whiteboxCast(CommandQueue::create(productFamily,
|
||||
device,
|
||||
currentCsr,
|
||||
&desc,
|
||||
true,
|
||||
false,
|
||||
returnValue));
|
||||
ASSERT_NE(nullptr, commandQueue);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
EXPECT_TRUE(commandQueue->peekIsCopyOnlyCommandQueue());
|
||||
|
||||
zet_command_list_handle_t cmdListHandle = commandList->toHandle();
|
||||
returnValue = commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, false);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
size_t usedSpaceAfter = commandQueue->commandStream->getUsed();
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList,
|
||||
commandQueue->commandStream->getCpuBase(),
|
||||
usedSpaceAfter));
|
||||
|
||||
size_t preemptionMmioCount = countMmio<FamilyType>(cmdList.begin(), cmdList.end(), preemptionRegisterOffset);
|
||||
constexpr size_t expectedMmioCount = 0;
|
||||
EXPECT_EQ(expectedMmioCount, preemptionMmioCount);
|
||||
|
||||
auto allStateSips = findAll<STATE_SIP *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_EQ(0u, allStateSips.size());
|
||||
|
||||
auto allCsrCmds = NEO::UnitTestHelper<FamilyType>::findAllMidThreadPreemptionAllocationCommand(cmdList.begin(), cmdList.end());
|
||||
EXPECT_EQ(0u, allCsrCmds.size());
|
||||
|
||||
EXPECT_EQ(NEO::PreemptionMode::Initial, currentCsr->getPreemptionMode());
|
||||
|
||||
commandQueue->destroy();
|
||||
}
|
||||
|
||||
struct CommandQueueExecuteCommandListSWTagsTests : public Test<DeviceFixture> {
|
||||
void SetUp() override {
|
||||
DebugManager.flags.EnableSWTags.set(true);
|
||||
|
@ -183,5 +183,80 @@ HWTEST2_F(CommandQueueExecuteCommandListsSimpleTest, givenTwoCommandQueuesUsingS
|
||||
commandQueue2->destroy();
|
||||
}
|
||||
|
||||
using IsMmioPreemptionUsed = IsWithinGfxCore<IGFX_GEN9_CORE, IGFX_XE_HPC_CORE>;
|
||||
|
||||
HWTEST2_F(CommandQueueExecuteCommandListsSimpleTest, givenTwoCommandQueuesUsingSingleCsrWhenExecutingFirstTimeOnBothQueuesThenPreemptionModeIsProgrammedOnce, IsMmioPreemptionUsed) {
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
|
||||
ze_result_t returnValue;
|
||||
|
||||
ze_command_list_handle_t commandList = CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle();
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
CommandList::fromHandle(commandList)->commandListPreemptionMode = NEO::PreemptionMode::ThreadGroup;
|
||||
|
||||
ze_command_queue_desc_t queueDesc = {};
|
||||
queueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
|
||||
auto commandQueue = whiteboxCast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc, false, false, returnValue));
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
ASSERT_NE(nullptr, commandQueue->commandStream);
|
||||
|
||||
auto usedSpaceBefore = commandQueue->commandStream->getUsed();
|
||||
returnValue = commandQueue->executeCommandLists(1, &commandList, nullptr, false);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
auto usedSpaceAfter = commandQueue->commandStream->getUsed();
|
||||
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList,
|
||||
ptrOffset(commandQueue->commandStream->getCpuBase(), usedSpaceBefore),
|
||||
usedSpaceAfter - usedSpaceBefore));
|
||||
|
||||
auto loadRegisterImmList = findAll<MI_LOAD_REGISTER_IMM *>(cmdList.begin(), cmdList.end());
|
||||
size_t foundPreemptionMmioCount = 0;
|
||||
for (auto it : loadRegisterImmList) {
|
||||
auto cmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(*it);
|
||||
if (cmd->getRegisterOffset() == 0x2580) {
|
||||
foundPreemptionMmioCount++;
|
||||
}
|
||||
}
|
||||
|
||||
constexpr size_t expectedFirstPreemptionMmioCount = 1u;
|
||||
EXPECT_EQ(expectedFirstPreemptionMmioCount, foundPreemptionMmioCount);
|
||||
|
||||
cmdList.clear();
|
||||
foundPreemptionMmioCount = 0;
|
||||
|
||||
auto commandQueue2 = whiteboxCast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc, false, false, returnValue));
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
ASSERT_NE(nullptr, commandQueue2->commandStream);
|
||||
|
||||
usedSpaceBefore = commandQueue2->commandStream->getUsed();
|
||||
returnValue = commandQueue2->executeCommandLists(1, &commandList, nullptr, false);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
usedSpaceAfter = commandQueue2->commandStream->getUsed();
|
||||
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
|
||||
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList,
|
||||
ptrOffset(commandQueue2->commandStream->getCpuBase(), usedSpaceBefore),
|
||||
usedSpaceAfter - usedSpaceBefore));
|
||||
|
||||
loadRegisterImmList = findAll<MI_LOAD_REGISTER_IMM *>(cmdList.begin(), cmdList.end());
|
||||
for (auto it : loadRegisterImmList) {
|
||||
auto cmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(*it);
|
||||
if (cmd->getRegisterOffset() == 0x2580) {
|
||||
foundPreemptionMmioCount++;
|
||||
}
|
||||
}
|
||||
|
||||
constexpr size_t expectedSecondPreemptionMmioCount = 0u;
|
||||
EXPECT_EQ(expectedSecondPreemptionMmioCount, foundPreemptionMmioCount);
|
||||
|
||||
CommandList::fromHandle(commandList)->destroy();
|
||||
commandQueue->destroy();
|
||||
commandQueue2->destroy();
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
@ -348,6 +348,14 @@ class CommandStreamReceiver {
|
||||
isPreambleSent = value;
|
||||
}
|
||||
|
||||
PreemptionMode getPreemptionMode() const {
|
||||
return lastPreemptionMode;
|
||||
}
|
||||
|
||||
void setPreemptionMode(PreemptionMode value) {
|
||||
lastPreemptionMode = value;
|
||||
}
|
||||
|
||||
protected:
|
||||
void cleanupResources();
|
||||
void printDeviceIndex();
|
||||
|
@ -7,6 +7,7 @@
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/helpers/aux_translation.h"
|
||||
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
|
||||
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
@ -82,6 +83,10 @@ struct UnitTestHelper {
|
||||
static bool timestampRegisterHighAddress();
|
||||
|
||||
static void validateSbaMocs(uint32_t expectedMocs, CommandStreamReceiver &csr);
|
||||
|
||||
static GenCmdList::iterator findMidThreadPreemptionAllocationCommand(GenCmdList::iterator begin, GenCmdList::iterator end);
|
||||
|
||||
static std::vector<GenCmdList::iterator> findAllMidThreadPreemptionAllocationCommand(GenCmdList::iterator begin, GenCmdList::iterator end);
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
|
@ -77,4 +77,14 @@ inline bool UnitTestHelper<GfxFamily>::getWorkloadPartitionForStoreRegisterMemCm
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
GenCmdList::iterator UnitTestHelper<GfxFamily>::findMidThreadPreemptionAllocationCommand(GenCmdList::iterator begin, GenCmdList::iterator end) {
|
||||
return find<typename GfxFamily::GPGPU_CSR_BASE_ADDRESS *>(begin, end);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
std::vector<GenCmdList::iterator> UnitTestHelper<GfxFamily>::findAllMidThreadPreemptionAllocationCommand(GenCmdList::iterator begin, GenCmdList::iterator end) {
|
||||
return findAll<typename GfxFamily::GPGPU_CSR_BASE_ADDRESS *>(begin, end);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
@ -105,4 +105,15 @@ inline bool UnitTestHelper<GfxFamily>::getWorkloadPartitionForStoreRegisterMemCm
|
||||
return storeRegisterMem.getWorkloadPartitionIdOffsetEnable();
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
GenCmdList::iterator UnitTestHelper<GfxFamily>::findMidThreadPreemptionAllocationCommand(GenCmdList::iterator begin, GenCmdList::iterator end) {
|
||||
return end;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
std::vector<GenCmdList::iterator> UnitTestHelper<GfxFamily>::findAllMidThreadPreemptionAllocationCommand(GenCmdList::iterator begin, GenCmdList::iterator end) {
|
||||
std::vector<GenCmdList::iterator> emptyList;
|
||||
return emptyList;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
@ -2103,3 +2103,11 @@ TEST_F(CommandStreamReceiverTest, givenPreambleFlagIsSetWhenGettingFlagStateThen
|
||||
commandStreamReceiver->setPreambleSetFlag(true);
|
||||
EXPECT_TRUE(commandStreamReceiver->getPreambleSetFlag());
|
||||
}
|
||||
|
||||
TEST_F(CommandStreamReceiverTest, givenPreemptionSentIsInitialWhenSettingPreemptionToNewModeThenExpectCorrectPreemption) {
|
||||
PreemptionMode mode = PreemptionMode::Initial;
|
||||
EXPECT_EQ(mode, commandStreamReceiver->getPreemptionMode());
|
||||
mode = PreemptionMode::ThreadGroup;
|
||||
commandStreamReceiver->setPreemptionMode(mode);
|
||||
EXPECT_EQ(mode, commandStreamReceiver->getPreemptionMode());
|
||||
}
|
||||
|
Reference in New Issue
Block a user