Setup partition registers once per context

Related-To: NEO-6262

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2021-11-17 19:51:43 +00:00
committed by Compute-Runtime-Automation
parent c6c27ed328
commit f56773d166
17 changed files with 173 additions and 165 deletions

View File

@@ -311,19 +311,14 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(z
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendMultiPartitionPrologue(uint32_t partitionDataSize) {
const uint64_t workPartitionAllocationGpuVa = device->getNEODevice()->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocationGpuAddress();
size_t estimatedSizeRequired = NEO::ImplicitScalingDispatch<GfxFamily>::getRegisterConfigurationSize();
size_t estimatedSizeRequired = NEO::ImplicitScalingDispatch<GfxFamily>::getOffsetRegisterSize();
increaseCommandStreamSpace(estimatedSizeRequired);
NEO::ImplicitScalingDispatch<GfxFamily>::dispatchRegisterConfiguration(*commandContainer.getCommandStream(),
workPartitionAllocationGpuVa,
partitionDataSize);
NEO::ImplicitScalingDispatch<GfxFamily>::dispatchOffsetRegister(*commandContainer.getCommandStream(),
partitionDataSize);
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::appendMultiPartitionEpilogue() {
const size_t estimatedSizeRequired = NEO::ImplicitScalingDispatch<GfxFamily>::getOffsetRegisterSize();
increaseCommandStreamSpace(estimatedSizeRequired);
NEO::ImplicitScalingDispatch<GfxFamily>::dispatchOffsetRegister(*commandContainer.getCommandStream(),

View File

@@ -54,6 +54,9 @@ ze_result_t CommandQueueImp::initialize(bool copyOnly, bool isInternal) {
isCopyOnlyCommandQueue = copyOnly;
preemptionCmdSyncProgramming = getPreemptionCmdProgramming();
activeSubDevices = static_cast<uint32_t>(csr->getOsContext().getDeviceBitfield().count());
if (!isInternal) {
partitionCount = csr->getActivePartitions();
}
}
return returnValue;
}

View File

@@ -52,9 +52,6 @@ struct CommandQueueHw : public CommandQueueImp {
bool getPreemptionCmdProgramming() override;
void patchCommands(CommandList &commandList, uint64_t scratchAddress);
size_t getPartitionProgrammingSize();
void programPartitionConfiguration(NEO::LinearStream &stream);
};
} // namespace L0

View File

@@ -149,7 +149,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
}
bool directSubmissionEnabled = isCopyOnlyCommandQueue ? csr->isBlitterDirectSubmissionEnabled() : csr->isDirectSubmissionEnabled();
partitionCount = csr->getActivePartitions();
bool programActivePartitionConfig = csr->isProgramActivePartitionConfigRequired();
L0::Fence *fence = nullptr;
@@ -215,6 +215,11 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
linearStreamSizeEstimate += sizeof(MI_BATCH_BUFFER_END);
}
auto csrHw = reinterpret_cast<NEO::CommandStreamReceiverHw<GfxFamily> *>(csr);
if (programActivePartitionConfig) {
linearStreamSizeEstimate += csrHw->getCmdSizeForActivePartitionConfig();
}
auto &hwInfo = device->getHwInfo();
if (hFence) {
fence = Fence::fromHandle(hFence);
@@ -269,9 +274,6 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
}
linearStreamSizeEstimate += isCopyOnlyCommandQueue ? NEO::EncodeMiFlushDW<GfxFamily>::getMiFlushDwCmdSizeForDataWrite() : NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(hwInfo);
if (partitionCount > 1) {
linearStreamSizeEstimate += getPartitionProgrammingSize();
}
size_t alignedSize = alignUp<size_t>(linearStreamSizeEstimate, minCmdBufferPtrAlign);
size_t padding = alignedSize - linearStreamSizeEstimate;
@@ -282,6 +284,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
if (globalFenceAllocation) {
csr->makeResident(*globalFenceAllocation);
}
const auto workPartitionAllocation = csr->getWorkPartitionAllocation();
if (workPartitionAllocation) {
csr->makeResident(*workPartitionAllocation);
@@ -352,6 +355,10 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
}
}
if (programActivePartitionConfig) {
csrHw->programActivePartitionConfig(child);
}
for (auto i = 0u; i < numCommandLists; ++i) {
auto commandList = CommandList::fromHandle(phCommandLists[i]);
auto cmdBufferAllocations = commandList->commandContainer.getCmdBufferAllocations();
@@ -419,10 +426,6 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
commandQueuePreemptionMode = statePreemption;
if (partitionCount > 1) {
programPartitionConfiguration(child);
}
if (hFence) {
csr->makeResident(fence->getAllocation());
if (isCopyOnlyCommandQueue) {

View File

@@ -123,13 +123,4 @@ void CommandQueueHw<gfxCoreFamily>::patchCommands(CommandList &commandList, uint
UNRECOVERABLE_IF(!commandsToPatch.empty());
}
template <GFXCORE_FAMILY gfxCoreFamily>
size_t CommandQueueHw<gfxCoreFamily>::getPartitionProgrammingSize() {
return 0;
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandQueueHw<gfxCoreFamily>::programPartitionConfiguration(NEO::LinearStream &stream) {
}
} // namespace L0

View File

@@ -153,20 +153,4 @@ void CommandQueueHw<gfxCoreFamily>::patchCommands(CommandList &commandList, uint
}
}
template <GFXCORE_FAMILY gfxCoreFamily>
size_t CommandQueueHw<gfxCoreFamily>::getPartitionProgrammingSize() {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
return NEO::ImplicitScalingDispatch<GfxFamily>::getRegisterConfigurationSize();
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandQueueHw<gfxCoreFamily>::programPartitionConfiguration(NEO::LinearStream &stream) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
uint64_t workPartitionAddress = csr->getWorkPartitionAllocationGpuAddress();
NEO::ImplicitScalingDispatch<GfxFamily>::dispatchRegisterConfiguration(stream,
workPartitionAddress,
CommonConstants::partitionAddressOffset);
}
} // namespace L0

View File

@@ -19,7 +19,6 @@ namespace ult {
using MultiPartitionPrologueTest = Test<DeviceFixture>;
HWTEST2_F(MultiPartitionPrologueTest, whenAppendMultiPartitionPrologueIsCalledThenCommandListIsUpdated, IsAtLeastXeHpCore) {
using MI_LOAD_REGISTER_MEM = typename FamilyType::MI_LOAD_REGISTER_MEM;
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
@@ -41,27 +40,22 @@ HWTEST2_F(MultiPartitionPrologueTest, whenAppendMultiPartitionPrologueIsCalledTh
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter));
auto itorPc = find<MI_LOAD_REGISTER_MEM *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itorPc);
auto itorLrm = find<MI_LOAD_REGISTER_MEM *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(cmdList.end(), itorLrm);
auto lrmCmdPc = genCmdCast<MI_LOAD_REGISTER_MEM *>(*itorPc);
ASSERT_EQ(NEO::PartitionRegisters<FamilyType>::wparidCCSOffset, lrmCmdPc->getRegisterAddress());
ASSERT_EQ(lrmCmdPc->getMmioRemapEnable(), true);
auto itorLri = find<MI_LOAD_REGISTER_IMM *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itorLri);
itorPc = find<MI_LOAD_REGISTER_IMM *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itorPc);
auto lriCmdPc = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itorPc);
ASSERT_EQ(NEO::PartitionRegisters<FamilyType>::addressOffsetCCSOffset, static_cast<uint64_t>(lriCmdPc->getRegisterOffset()));
ASSERT_EQ(static_cast<uint32_t>(lriCmdPc->getDataDword()), dataPartitionSize);
ASSERT_EQ(lriCmdPc->getMmioRemapEnable(), true);
auto lriCmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itorLri);
EXPECT_EQ(NEO::PartitionRegisters<FamilyType>::addressOffsetCCSOffset, static_cast<uint64_t>(lriCmd->getRegisterOffset()));
EXPECT_EQ(dataPartitionSize, static_cast<uint32_t>(lriCmd->getDataDword()));
EXPECT_EQ(true, lriCmd->getMmioRemapEnable());
auto result = commandList->close();
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
}
HWTEST2_F(MultiPartitionPrologueTest, whenAppendMultiPartitionPrologueIsCalledThenCommandListIsNotUpdated, IsAtMostGen12lp) {
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Compute, 0u, returnValue));
auto &commandContainer = commandList->commandContainer;
@@ -81,7 +75,6 @@ HWTEST2_F(MultiPartitionPrologueTest, whenAppendMultiPartitionPrologueIsCalledTh
}
using MultiPartitionEpilogueTest = Test<DeviceFixture>;
HWTEST2_F(MultiPartitionEpilogueTest, whenAppendMultiPartitionEpilogueIsCalledThenCommandListIsUpdated, IsAtLeastXeHpCore) {
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
ze_result_t returnValue;
@@ -101,16 +94,16 @@ HWTEST2_F(MultiPartitionEpilogueTest, whenAppendMultiPartitionEpilogueIsCalledTh
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter));
auto itorPc = find<MI_LOAD_REGISTER_IMM *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itorPc);
auto itorLri = find<MI_LOAD_REGISTER_IMM *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itorLri);
auto lriCmdPc = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itorPc);
ASSERT_EQ(NEO::PartitionRegisters<FamilyType>::addressOffsetCCSOffset, static_cast<uint64_t>(lriCmdPc->getRegisterOffset()));
ASSERT_EQ(static_cast<uint32_t>(lriCmdPc->getDataDword()), CommonConstants::partitionAddressOffset);
ASSERT_EQ(lriCmdPc->getMmioRemapEnable(), true);
auto lriCmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itorLri);
EXPECT_EQ(NEO::PartitionRegisters<FamilyType>::addressOffsetCCSOffset, static_cast<uint64_t>(lriCmd->getRegisterOffset()));
EXPECT_EQ(CommonConstants::partitionAddressOffset, static_cast<uint32_t>(lriCmd->getDataDword()));
EXPECT_EQ(true, lriCmd->getMmioRemapEnable());
auto result = commandList->close();
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
}
HWTEST2_F(MultiPartitionEpilogueTest, whenAppendMultiPartitionPrologueIsCalledThenCommandListIsNotUpdated, IsAtMostGen12lp) {

View File

@@ -674,7 +674,7 @@ HWTEST_F(CommandQueueCommandsSingleTile, givenCommandQueueWhenExecutingCommandLi
commandQueue->destroy();
}
HWTEST_F(CommandQueueCommandsMultiTile, givenCommandQueueOnMultiTileWhenExecutingCommandListsThenWorkPartitionAllocationIsMadeResident) {
HWTEST2_F(CommandQueueCommandsMultiTile, givenCommandQueueOnMultiTileWhenExecutingCommandListsThenWorkPartitionAllocationIsMadeResident, IsAtLeastXeHpCore) {
DebugManagerStateRestore restorer;
DebugManager.flags.EnableWalkerPartition.set(1);
@@ -691,6 +691,7 @@ HWTEST_F(CommandQueueCommandsMultiTile, givenCommandQueueOnMultiTileWhenExecutin
bool expectedGAWasMadeResident = false;
};
MyCsrMock csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
EXPECT_EQ(2u, csr.activePartitions);
csr.initializeTagAllocation();
csr.createWorkPartitionAllocation(*neoDevice);
csr.setupContext(*neoDevice->getDefaultEngine().osContext);
@@ -712,13 +713,14 @@ HWTEST_F(CommandQueueCommandsMultiTile, givenCommandQueueOnMultiTileWhenExecutin
auto status = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
EXPECT_EQ(status, ZE_RESULT_SUCCESS);
EXPECT_EQ(2u, csr.activePartitionsConfig);
ASSERT_NE(nullptr, workPartitionAllocation);
EXPECT_TRUE(csr.expectedGAWasMadeResident);
commandQueue->destroy();
}
HWTEST_F(CommandQueueCommandsMultiTile, givenCommandQueueOnMultiTileWhenWalkerPartitionIsDisabledThenWorkPartitionAllocationIsNotCreated) {
HWTEST2_F(CommandQueueCommandsMultiTile, givenCommandQueueOnMultiTileWhenWalkerPartitionIsDisabledThenWorkPartitionAllocationIsNotCreated, IsAtLeastXeHpCore) {
DebugManagerStateRestore restorer;
DebugManager.flags.EnableWalkerPartition.set(0);
@@ -2109,26 +2111,5 @@ HWTEST2_F(DeviceWithDualStorage, givenCmdListWithAppendedKernelAndUsmTransferAnd
commandQueue->destroy();
}
HWTEST2_F(CommandQueueSynchronizeTest, givenBasePlatformsWhenProgrammingPartitionRegistersThenExpectNoAction, CommandQueueSBASupport) {
ze_result_t returnValue;
ze_command_queue_desc_t desc = {};
auto csr = neoDevice->getDefaultEngine().commandStreamReceiver;
auto commandQueue = new MockCommandQueueHw<gfxCoreFamily>(device, csr, &desc);
returnValue = commandQueue->initialize(false, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
constexpr size_t expectedSize = 0;
EXPECT_EQ(expectedSize, commandQueue->getPartitionProgrammingSize());
size_t usedBefore = commandQueue->commandStream->getUsed();
commandQueue->programPartitionConfiguration(*commandQueue->commandStream);
size_t usedAfter = commandQueue->commandStream->getUsed();
EXPECT_EQ(expectedSize, usedAfter - usedBefore);
commandQueue->destroy();
}
} // namespace ult
} // namespace L0

View File

@@ -5,6 +5,7 @@
*
*/
#include "shared/source/command_container/implicit_scaling.h"
#include "shared/source/command_stream/command_stream_receiver_hw.h"
#include "shared/source/command_stream/preemption.h"
#include "shared/source/utilities/software_tags_manager.h"
@@ -823,11 +824,41 @@ HWTEST_F(CommandQueueExecuteCommandListSWTagsTests, givenEnableSWTagsAndCommandL
EXPECT_TRUE(tagFound);
}
template <typename GfxFamily>
void findPartitionRegister(GenCmdList &cmdList, bool expectToFind) {
using MI_LOAD_REGISTER_MEM = typename GfxFamily::MI_LOAD_REGISTER_MEM;
using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM;
auto loadRegisterMemList = findAll<MI_LOAD_REGISTER_MEM *>(cmdList.begin(), cmdList.end());
bool wparidRegisterFound = false;
for (size_t i = 0; i < loadRegisterMemList.size(); i++) {
auto loadRegMem = reinterpret_cast<MI_LOAD_REGISTER_MEM *>(*loadRegisterMemList[i]);
if (NEO::PartitionRegisters<GfxFamily>::wparidCCSOffset == loadRegMem->getRegisterAddress()) {
wparidRegisterFound = true;
}
}
auto loadRegisterImmList = findAll<MI_LOAD_REGISTER_IMM *>(cmdList.begin(), cmdList.end());
bool offsetRegisterFound = false;
for (size_t i = 0; i < loadRegisterImmList.size(); i++) {
auto loadRegImm = reinterpret_cast<MI_LOAD_REGISTER_IMM *>(*loadRegisterImmList[i]);
if (NEO::PartitionRegisters<GfxFamily>::addressOffsetCCSOffset == loadRegImm->getRegisterOffset()) {
offsetRegisterFound = true;
}
}
if (expectToFind) {
EXPECT_TRUE(wparidRegisterFound);
EXPECT_TRUE(offsetRegisterFound);
} else {
EXPECT_FALSE(wparidRegisterFound);
EXPECT_FALSE(offsetRegisterFound);
}
}
HWTEST2_F(MultiDeviceCommandQueueExecuteCommandLists, givenMultiplePartitionCountWhenExecutingCmdListThenExpectMmioProgrammingAndCorrectEstimation, IsAtLeastXeHpCore) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using POST_SYNC_OPERATION = typename FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION;
using MI_LOAD_REGISTER_MEM = typename FamilyType::MI_LOAD_REGISTER_MEM;
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
using PARSE = typename FamilyType::PARSE;
ze_command_queue_desc_t desc{};
@@ -846,6 +877,8 @@ HWTEST2_F(MultiDeviceCommandQueueExecuteCommandLists, givenMultiplePartitionCoun
false,
returnValue));
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
EXPECT_EQ(2u, commandQueue->partitionCount);
ASSERT_NE(nullptr, commandQueue->commandStream);
auto &commandStreamReceiver = device->getNEODevice()->getDefaultEngine().commandStreamReceiver;
if (device->getNEODevice()->getPreemptionMode() == PreemptionMode::MidThread || device->getNEODevice()->isDebuggerActive()) {
@@ -858,28 +891,34 @@ HWTEST2_F(MultiDeviceCommandQueueExecuteCommandLists, givenMultiplePartitionCoun
EXPECT_EQ(1u, fence->partitionCount);
ze_fence_handle_t fenceHandle = fence->toHandle();
ASSERT_NE(nullptr, commandQueue->commandStream);
fence->partitionCount = 2;
//1st execute call initialized pipeline
auto usedSpaceBefore = commandQueue->commandStream->getUsed();
auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, fenceHandle, true);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
auto usedSpaceAfter = commandQueue->commandStream->getUsed();
auto usedSpaceBefore = commandQueue->commandStream->getUsed();
//1st call then initialize registers
GenCmdList cmdList;
ASSERT_TRUE(PARSE::parseCommandBuffer(cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), usedSpaceBefore), usedSpaceAfter));
findPartitionRegister<FamilyType>(cmdList, true);
usedSpaceBefore = commandQueue->commandStream->getUsed();
result = commandQueue->executeCommandLists(numCommandLists, commandLists, fenceHandle, true);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto usedSpaceAfter = commandQueue->commandStream->getUsed();
usedSpaceAfter = commandQueue->commandStream->getUsed();
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
size_t cmdBufferSizeWithoutMmioProgramming = usedSpaceAfter - usedSpaceBefore;
EXPECT_EQ(1u, fence->partitionCount);
auto workPartitionAddress = device->getNEODevice()->getDefaultEngine().commandStreamReceiver->getWorkPartitionAllocationGpuAddress();
EXPECT_EQ(2u, fence->partitionCount);
for (auto i = 0u; i < numCommandLists; i++) {
auto commandList = CommandList::fromHandle(commandLists[i]);
commandList->partitionCount = 2;
}
cmdList.clear();
ASSERT_TRUE(PARSE::parseCommandBuffer(cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), usedSpaceBefore), usedSpaceAfter));
findPartitionRegister<FamilyType>(cmdList, false);
usedSpaceBefore = commandQueue->commandStream->getUsed();
result = commandQueue->executeCommandLists(numCommandLists, commandLists, fenceHandle, true);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
@@ -888,24 +927,12 @@ HWTEST2_F(MultiDeviceCommandQueueExecuteCommandLists, givenMultiplePartitionCoun
size_t cmdBufferSizeWithtMmioProgramming = usedSpaceAfter - usedSpaceBefore;
EXPECT_EQ(2u, fence->partitionCount);
size_t expectedSizeWithMmioProgramming = cmdBufferSizeWithoutMmioProgramming + sizeof(MI_LOAD_REGISTER_IMM) + sizeof(MI_LOAD_REGISTER_MEM);
size_t expectedSizeWithMmioProgramming = cmdBufferSizeWithoutMmioProgramming;
EXPECT_GE(expectedSizeWithMmioProgramming, cmdBufferSizeWithtMmioProgramming);
GenCmdList cmdList;
cmdList.clear();
ASSERT_TRUE(PARSE::parseCommandBuffer(cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), usedSpaceBefore), usedSpaceAfter));
auto itorLri = find<MI_LOAD_REGISTER_IMM *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itorLri);
auto itorLrm = find<MI_LOAD_REGISTER_MEM *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itorLrm);
auto loadRegisterImm = static_cast<MI_LOAD_REGISTER_IMM *>(*itorLri);
EXPECT_EQ(0x23B4u, loadRegisterImm->getRegisterOffset());
EXPECT_EQ(8u, loadRegisterImm->getDataDword());
auto loadRegisterMem = static_cast<MI_LOAD_REGISTER_MEM *>(*itorLrm);
EXPECT_EQ(0x221Cu, loadRegisterMem->getRegisterAddress());
EXPECT_EQ(workPartitionAddress, loadRegisterMem->getMemoryAddress());
findPartitionRegister<FamilyType>(cmdList, false);
auto pipeControlList = findAll<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());