Add pipeline select state tracking

This optimization removes pipeline select from command list preamble
and presented to command queue for necessary state update.
Code is disabled by default and available under debug key.

Related-To: NEO-5019

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2022-09-20 16:46:15 +00:00
committed by Compute-Runtime-Automation
parent 7aecea534f
commit e960802e33
25 changed files with 514 additions and 181 deletions

View File

@ -276,6 +276,10 @@ struct CommandList : _ze_command_list_handle_t {
void makeResidentAndMigrate(bool);
void migrateSharedAllocations();
bool getSystolicModeSupport() const {
return systolicModeSupport;
}
ze_context_handle_t hContext = nullptr;
std::vector<Kernel *> printfKernelContainer;
CommandQueue *cmdQImmediate = nullptr;
@ -318,6 +322,7 @@ struct CommandList : _ze_command_list_handle_t {
bool performMemoryPrefetch = false;
bool multiReturnPointCommandList = false;
bool systolicModeSupport = false;
bool pipelineSelectStateTracking = false;
};
using CommandListAllocatorFn = CommandList *(*)(uint32_t);

View File

@ -140,7 +140,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO
commandContainer.setReservedSshSize(getReserveSshSize());
DeviceImp *deviceImp = static_cast<DeviceImp *>(device);
auto returnValue = commandContainer.initialize(deviceImp->getActiveDevice(), deviceImp->allocationsForReuse.get(), !isCopyOnly());
commandContainer.systolicModeSupport = this->systolicModeSupport;
if (!this->pipelineSelectStateTracking) {
// allow systolic support set in container when tracking disabled
// setting systolic support allows dispatching untracked command in legacy mode
commandContainer.systolicModeSupport = this->systolicModeSupport;
}
ze_result_t returnType = parseErrorCode(returnValue);
if (returnType == ZE_RESULT_SUCCESS) {
@ -2323,10 +2327,21 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel
containsAnyKernel = true;
}
finalStreamState.frontEndState.setProperties(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, disableOverdispatch, -1, hwInfo);
bool isPatchingVfeStateAllowed = NEO::DebugManager.flags.AllowPatchingVfeStateInCommandLists.get();
auto logicalStateHelperBlock = !getLogicalStateHelper();
finalStreamState.pipelineSelect.setProperties(true, false, kernelAttributes.flags.usesSystolicPipelineSelectMode, hwInfo);
if (this->pipelineSelectStateTracking && finalStreamState.pipelineSelect.isDirty() && logicalStateHelperBlock) {
NEO::PipelineSelectArgs pipelineSelectArgs;
pipelineSelectArgs.systolicPipelineSelectMode = kernelAttributes.flags.usesSystolicPipelineSelectMode;
pipelineSelectArgs.systolicPipelineSelectSupport = this->systolicModeSupport;
NEO::PreambleHelper<GfxFamily>::programPipelineSelect(commandContainer.getCommandStream(),
pipelineSelectArgs,
hwInfo);
}
finalStreamState.frontEndState.setProperties(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, disableOverdispatch, -1, hwInfo);
bool isPatchingVfeStateAllowed = NEO::DebugManager.flags.AllowPatchingVfeStateInCommandLists.get();
if (finalStreamState.frontEndState.isDirty() && logicalStateHelperBlock) {
if (isPatchingVfeStateAllowed) {
auto pVfeStateAddress = NEO::PreambleHelper<GfxFamily>::getSpaceForVfeState(commandContainer.getCommandStream(), hwInfo, engineGroupType);
@ -2348,14 +2363,15 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel
}
finalStreamState.stateComputeMode.setProperties(false, kernelAttributes.numGrfRequired, kernelAttributes.threadArbitrationPolicy, device->getDevicePreemptionMode(), hwInfo);
if (finalStreamState.stateComputeMode.isDirty() && logicalStateHelperBlock) {
bool isRcs = (this->engineGroupType == NEO::EngineGroupType::RenderCompute);
NEO::EncodeComputeMode<GfxFamily>::programComputeModeCommandWithSynchronization(
*commandContainer.getCommandStream(), finalStreamState.stateComputeMode, {}, false, hwInfo, isRcs, nullptr);
}
NEO::PipelineSelectArgs pipelineSelectArgs;
pipelineSelectArgs.systolicPipelineSelectMode = kernelAttributes.flags.usesSystolicPipelineSelectMode;
pipelineSelectArgs.systolicPipelineSelectSupport = this->systolicModeSupport;
finalStreamState.pipelineSelect.setProperties(true, false, kernelAttributes.flags.usesSystolicPipelineSelectMode, hwInfo);
NEO::EncodeComputeMode<GfxFamily>::programComputeModeCommandWithSynchronization(
*commandContainer.getCommandStream(), finalStreamState.stateComputeMode, pipelineSelectArgs, false, hwInfo, isRcs, nullptr);
}
}
template <GFXCORE_FAMILY gfxCoreFamily>

View File

@ -28,6 +28,7 @@ namespace L0 {
CommandList::CommandList(uint32_t numIddsPerBlock) : commandContainer(numIddsPerBlock) {
multiReturnPointCommandList = L0HwHelper::enableMultiReturnPointCommandList();
pipelineSelectStateTracking = L0HwHelper::enablePipelineSelectStateTracking();
}
CommandListAllocatorFn commandListFactory[IGFX_MAX_PRODUCT] = {};

View File

@ -45,6 +45,7 @@ CommandQueueImp::CommandQueueImp(Device *device, NEO::CommandStreamReceiver *csr
}
multiReturnPointCommandList = L0HwHelper::enableMultiReturnPointCommandList();
pipelineSelectStateTracking = L0HwHelper::enablePipelineSelectStateTracking();
}
ze_result_t CommandQueueImp::destroy() {

View File

@ -68,6 +68,7 @@ struct CommandQueue : _ze_command_queue_handle_t {
bool isCopyOnlyCommandQueue = false;
bool internalUsage = false;
bool multiReturnPointCommandList = false;
bool pipelineSelectStateTracking = false;
};
using CommandQueueAllocatorFn = CommandQueue *(*)(Device *device, NEO::CommandStreamReceiver *csr,

View File

@ -44,7 +44,6 @@ struct CommandQueueHw : public CommandQueueImp {
size_t estimateFrontEndCmdSize();
size_t estimateFrontEndCmdSize(bool isFrontEndDirty);
size_t estimatePipelineSelect();
void programPipelineSelectIfGpgpuDisabled(NEO::LinearStream &commandStream);
MOCKABLE_VIRTUAL void handleScratchSpace(NEO::HeapContainer &heapContainer,
@ -171,7 +170,16 @@ struct CommandQueueHw : public CommandQueueImp {
inline void updateTaskCountAndPostSync(bool isDispatchTaskCountPostSyncRequired);
inline ze_result_t waitForCommandQueueCompletionAndCleanHeapContainer();
inline ze_result_t handleSubmissionAndCompletionResults(NEO::SubmissionStatus submitRet, ze_result_t completionRet);
inline void updatePipelineSelectState(CommandList *commandList);
inline size_t estimatePipelineSelectCmdSizeForMultipleCommandLists(NEO::StreamProperties &csrStateCopy,
const NEO::StreamProperties &cmdListRequired,
const NEO::StreamProperties &cmdListFinal,
bool &gpgpuEnabled);
inline size_t estimatePipelineSelectCmdSize();
inline void programOneCmdListPipelineSelect(CommandList *commandList,
NEO::LinearStream &commandStream,
NEO::StreamProperties &csrState,
const NEO::StreamProperties &cmdListRequired,
const NEO::StreamProperties &cmdListFinal);
size_t alignedChildStreamPadding{};
};

View File

@ -146,7 +146,18 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegular(
this->csr->programHardwareContext(child);
this->makeSbaTrackingBufferResidentIfL0DebuggerEnabled(ctx.isDebugEnabled);
this->programPipelineSelectIfGpgpuDisabled(child);
auto &csrStateProperties = csr->getStreamProperties();
if (!this->pipelineSelectStateTracking) {
this->programPipelineSelectIfGpgpuDisabled(child);
} else {
// Setting systolic/pipeline select here for 1st command list is to preserve dispatch order of hw commands
auto commandList = CommandList::fromHandle(phCommandLists[0]);
auto &requiredStreamState = commandList->getRequiredStreamState();
// Provide cmdlist required state as cmdlist final state, so csr state does not transition to final
// By preserving required state in csr - keeping csr state not dirty - it will not dispatch 1st command list pipeline select/systolic in main loop
// Csr state will transition to final of 1st command list in main loop
this->programOneCmdListPipelineSelect(commandList, child, csrStateProperties, requiredStreamState, requiredStreamState);
}
this->programCommandQueueDebugCmdsForSourceLevelOrL0DebuggerIfEnabled(ctx.isDebugEnabled, child);
this->programStateBaseAddressWithGsbaIfDirty(ctx, phCommandLists[0], child);
this->programCsrBaseAddressIfPreemptionModeInitial(ctx.isPreemptionModeInitial, child);
@ -157,7 +168,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegular(
this->programActivePartitionConfig(ctx.isProgramActivePartitionConfigRequired, child);
this->encodeKernelArgsBufferAndMakeItResident();
auto &csrStateProperties = csr->getStreamProperties();
bool shouldProgramVfe = this->csr->getLogicalStateHelper() && ctx.frontEndStateDirty;
this->programFrontEndAndClearDirtyFlag(shouldProgramVfe, ctx, child, csrStateProperties);
@ -171,7 +182,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegular(
auto &finalStreamState = commandList->getFinalStreamState();
this->updateOneCmdListPreemptionModeAndCtxStatePreemption(ctx, commandList->getCommandListPreemptionMode(), child);
this->updatePipelineSelectState(commandList);
this->programOneCmdListPipelineSelect(commandList, child, csrStateProperties, requiredStreamState, finalStreamState);
this->programOneCmdListFrontEndIfDirty(ctx, child, csrStateProperties, requiredStreamState, finalStreamState);
this->patchCommands(*commandList, this->csr->getScratchSpaceController()->getScratchPatchAddress());
@ -388,11 +399,6 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateFrontEndCmdSizeForMultipleCommandL
return estimatedSize;
}
template <GFXCORE_FAMILY gfxCoreFamily>
size_t CommandQueueHw<gfxCoreFamily>::estimatePipelineSelect() {
return NEO::PreambleHelper<GfxFamily>::getCmdSizeForPipelineSelect(device->getHwInfo());
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandQueueHw<gfxCoreFamily>::programPipelineSelectIfGpgpuDisabled(NEO::LinearStream &cmdStream) {
bool gpgpuEnabled = this->csr->getPreambleSetFlag();
@ -647,17 +653,14 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateLinearStreamSizeComplementary(
uint32_t numCommandLists) {
size_t linearStreamSizeEstimate = 0u;
bool gpgpuEnabled = csr->getPreambleSetFlag();
if (!gpgpuEnabled) {
linearStreamSizeEstimate += estimatePipelineSelect();
}
linearStreamSizeEstimate += estimateFrontEndCmdSize(ctx.frontEndStateDirty);
linearStreamSizeEstimate += estimatePipelineSelectCmdSize();
if (frontEndTrackingEnabled()) {
if (this->pipelineSelectStateTracking || frontEndTrackingEnabled()) {
bool frontEndStateDirtyCopy = ctx.frontEndStateDirty;
auto streamPropertiesCopy = csr->getStreamProperties();
bool gpgpuEnabledCopy = csr->getPreambleSetFlag();
for (uint32_t i = 0; i < numCommandLists; i++) {
auto cmdList = CommandList::fromHandle(phCommandLists[i]);
auto &requiredStreamState = cmdList->getRequiredStreamState();
@ -665,6 +668,7 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateLinearStreamSizeComplementary(
linearStreamSizeEstimate += estimateFrontEndCmdSizeForMultipleCommandLists(frontEndStateDirtyCopy, ctx.engineInstanced, cmdList,
streamPropertiesCopy, requiredStreamState, finalStreamState);
linearStreamSizeEstimate += estimatePipelineSelectCmdSizeForMultipleCommandLists(streamPropertiesCopy, requiredStreamState, finalStreamState, gpgpuEnabledCopy);
}
}
@ -1119,14 +1123,59 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::handleSubmissionAndCompletionResults(
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandQueueHw<gfxCoreFamily>::updatePipelineSelectState(CommandList *commandList) {
auto &streamProperties = this->csr->getStreamProperties();
size_t CommandQueueHw<gfxCoreFamily>::estimatePipelineSelectCmdSize() {
if (!this->pipelineSelectStateTracking) {
bool gpgpuEnabled = csr->getPreambleSetFlag();
return !gpgpuEnabled * NEO::PreambleHelper<GfxFamily>::getCmdSizeForPipelineSelect(device->getHwInfo());
}
return 0;
}
auto &requiredStreamState = commandList->getRequiredStreamState();
auto &finalStreamState = commandList->getFinalStreamState();
template <GFXCORE_FAMILY gfxCoreFamily>
size_t CommandQueueHw<gfxCoreFamily>::estimatePipelineSelectCmdSizeForMultipleCommandLists(NEO::StreamProperties &csrStateCopy,
const NEO::StreamProperties &cmdListRequired,
const NEO::StreamProperties &cmdListFinal,
bool &gpgpuEnabled) {
if (!this->pipelineSelectStateTracking) {
return 0;
}
streamProperties.pipelineSelect.setProperties(requiredStreamState.pipelineSelect);
streamProperties.pipelineSelect.setProperties(finalStreamState.pipelineSelect);
size_t singlePipelineSelectSize = NEO::PreambleHelper<GfxFamily>::getCmdSizeForPipelineSelect(device->getHwInfo());
size_t estimatedSize = 0;
csrStateCopy.pipelineSelect.setProperties(cmdListRequired.pipelineSelect);
if (!gpgpuEnabled || csrStateCopy.pipelineSelect.isDirty()) {
estimatedSize += singlePipelineSelectSize;
gpgpuEnabled = true;
}
csrStateCopy.pipelineSelect.setProperties(cmdListFinal.pipelineSelect);
return estimatedSize;
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandQueueHw<gfxCoreFamily>::programOneCmdListPipelineSelect(CommandList *commandList, NEO::LinearStream &commandStream, NEO::StreamProperties &csrState,
const NEO::StreamProperties &cmdListRequired, const NEO::StreamProperties &cmdListFinal) {
if (!this->pipelineSelectStateTracking) {
return;
}
bool preambleSet = csr->getPreambleSetFlag();
csrState.pipelineSelect.setProperties(cmdListRequired.pipelineSelect);
if (!preambleSet || csrState.pipelineSelect.isDirty()) {
NEO::PipelineSelectArgs args = {
!!csrState.pipelineSelect.systolicMode.value,
false,
false,
commandList->getSystolicModeSupport()};
NEO::PreambleHelper<GfxFamily>::programPipelineSelect(&commandStream, args, device->getHwInfo());
csr->setPreambleSetFlag(true);
}
csrState.pipelineSelect.setProperties(cmdListFinal.pipelineSelect);
}
template <GFXCORE_FAMILY gfxCoreFamily>

View File

@ -23,4 +23,12 @@ bool L0HwHelper::enableMultiReturnPointCommandList() {
return defaultValue;
}
bool L0HwHelper::enablePipelineSelectStateTracking() {
constexpr bool defaultValue = false;
if (NEO::DebugManager.flags.EnablePipelineSelectTracking.get() != -1) {
return !!NEO::DebugManager.flags.EnablePipelineSelectTracking.get();
}
return defaultValue;
}
} // namespace L0

View File

@ -31,6 +31,7 @@ class L0HwHelper {
public:
static L0HwHelper &get(GFXCORE_FAMILY gfxCore);
static bool enableMultiReturnPointCommandList();
static bool enablePipelineSelectStateTracking();
virtual void setAdditionalGroupProperty(ze_command_queue_group_properties_t &groupProperty, NEO::EngineGroupT &group) const = 0;
virtual L0::Event *createEvent(L0::EventPool *eventPool, const ze_event_desc_t *desc, L0::Device *device) const = 0;

View File

@ -11,6 +11,7 @@ set(L0_FIXTURES_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/aub_csr_fixture.h
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_fixture.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_fixture.h
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_fixture.inl
${CMAKE_CURRENT_SOURCE_DIR}/device_fixture.h
${CMAKE_CURRENT_SOURCE_DIR}/device_fixture.cpp
${CMAKE_CURRENT_SOURCE_DIR}/host_pointer_manager_fixture.h

View File

@ -118,66 +118,9 @@ void MultiReturnCommandListFixture::setUp() {
ModuleMutableCommandListFixture::setUp(REVISION_B);
}
void CmdListPipelineSelectStateFixture::testBody() {
const ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
auto &cmdlistRequiredState = commandList->getRequiredStreamState();
auto &cmdListFinalState = commandList->getFinalStreamState();
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.usesSystolicPipelineSelectMode = 1;
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(1, cmdlistRequiredState.pipelineSelect.systolicMode.value);
EXPECT_EQ(1, cmdListFinalState.pipelineSelect.systolicMode.value);
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.usesSystolicPipelineSelectMode = 0;
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(1, cmdlistRequiredState.pipelineSelect.systolicMode.value);
EXPECT_EQ(0, cmdListFinalState.pipelineSelect.systolicMode.value);
commandList->close();
auto &csrState = commandQueue->csr->getStreamProperties();
auto commandListHandle = commandList->toHandle();
result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(0, csrState.pipelineSelect.systolicMode.value);
commandList->reset();
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.usesSystolicPipelineSelectMode = 1;
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(1, cmdlistRequiredState.pipelineSelect.systolicMode.value);
EXPECT_EQ(1, cmdListFinalState.pipelineSelect.systolicMode.value);
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.usesSystolicPipelineSelectMode = 0;
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(1, cmdlistRequiredState.pipelineSelect.systolicMode.value);
EXPECT_EQ(0, cmdListFinalState.pipelineSelect.systolicMode.value);
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.usesSystolicPipelineSelectMode = 1;
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(1, cmdlistRequiredState.pipelineSelect.systolicMode.value);
EXPECT_EQ(1, cmdListFinalState.pipelineSelect.systolicMode.value);
commandList->close();
result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(1, csrState.pipelineSelect.systolicMode.value);
void CmdListPipelineSelectStateFixture::setUp() {
DebugManager.flags.EnablePipelineSelectTracking.set(1);
ModuleMutableCommandListFixture::setUp();
}
} // namespace ult

View File

@ -64,89 +64,7 @@ void validateTimestampRegisters(GenCmdList &cmdList,
uint64_t firstStoreRegMemAddress,
uint32_t secondLoadRegisterRegSrcAddress,
uint64_t secondStoreRegMemAddress,
bool workloadPartition) {
using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG;
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
using MI_MATH = typename FamilyType::MI_MATH;
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
constexpr uint32_t mask = 0xfffffffe;
auto itor = find<MI_LOAD_REGISTER_REG *>(startIt, cmdList.end());
{
ASSERT_NE(cmdList.end(), itor);
auto cmdLoadReg = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
EXPECT_EQ(firstLoadRegisterRegSrcAddress, cmdLoadReg->getSourceRegisterAddress());
EXPECT_EQ(CS_GPR_R0, cmdLoadReg->getDestinationRegisterAddress());
}
itor++;
{
ASSERT_NE(cmdList.end(), itor);
auto cmdLoadImm = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itor);
EXPECT_EQ(CS_GPR_R1, cmdLoadImm->getRegisterOffset());
EXPECT_EQ(mask, cmdLoadImm->getDataDword());
}
itor++;
{
ASSERT_NE(cmdList.end(), itor);
auto cmdMath = genCmdCast<MI_MATH *>(*itor);
EXPECT_EQ(3u, cmdMath->DW0.BitField.DwordLength);
}
itor++;
{
ASSERT_NE(cmdList.end(), itor);
auto cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(CS_GPR_R2, cmdMem->getRegisterAddress());
EXPECT_EQ(firstStoreRegMemAddress, cmdMem->getMemoryAddress());
if (workloadPartition) {
EXPECT_TRUE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
} else {
EXPECT_FALSE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
}
}
itor++;
{
ASSERT_NE(cmdList.end(), itor);
auto cmdLoadReg = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
EXPECT_EQ(secondLoadRegisterRegSrcAddress, cmdLoadReg->getSourceRegisterAddress());
EXPECT_EQ(CS_GPR_R0, cmdLoadReg->getDestinationRegisterAddress());
}
itor++;
{
ASSERT_NE(cmdList.end(), itor);
auto cmdLoadImm = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itor);
EXPECT_EQ(CS_GPR_R1, cmdLoadImm->getRegisterOffset());
EXPECT_EQ(mask, cmdLoadImm->getDataDword());
}
itor++;
{
ASSERT_NE(cmdList.end(), itor);
auto cmdMath = genCmdCast<MI_MATH *>(*itor);
EXPECT_EQ(3u, cmdMath->DW0.BitField.DwordLength);
}
itor++;
{
ASSERT_NE(cmdList.end(), itor);
auto cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(CS_GPR_R2, cmdMem->getRegisterAddress());
EXPECT_EQ(secondStoreRegMemAddress, cmdMem->getMemoryAddress());
if (workloadPartition) {
EXPECT_TRUE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
} else {
EXPECT_FALSE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
}
}
itor++;
startIt = itor;
}
bool workloadPartition);
struct ModuleMutableCommandListFixture : public ModuleImmutableDataFixture {
void setUp() {
@ -168,7 +86,12 @@ struct MultiReturnCommandListFixture : public ModuleMutableCommandListFixture {
};
struct CmdListPipelineSelectStateFixture : public ModuleMutableCommandListFixture {
void setUp();
template <typename FamilyType>
void testBody();
DebugManagerStateRestore restorer;
};
} // namespace ult

View File

@ -0,0 +1,369 @@
/*
* Copyright (C) 2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/test/common/helpers/unit_test_helper.h"
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h"
namespace L0 {
namespace ult {
template <typename FamilyType>
void validateTimestampRegisters(GenCmdList &cmdList,
GenCmdList::iterator &startIt,
uint32_t firstLoadRegisterRegSrcAddress,
uint64_t firstStoreRegMemAddress,
uint32_t secondLoadRegisterRegSrcAddress,
uint64_t secondStoreRegMemAddress,
bool workloadPartition) {
using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG;
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
using MI_MATH = typename FamilyType::MI_MATH;
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
constexpr uint32_t mask = 0xfffffffe;
auto itor = find<MI_LOAD_REGISTER_REG *>(startIt, cmdList.end());
{
ASSERT_NE(cmdList.end(), itor);
auto cmdLoadReg = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
EXPECT_EQ(firstLoadRegisterRegSrcAddress, cmdLoadReg->getSourceRegisterAddress());
EXPECT_EQ(CS_GPR_R0, cmdLoadReg->getDestinationRegisterAddress());
}
itor++;
{
ASSERT_NE(cmdList.end(), itor);
auto cmdLoadImm = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itor);
EXPECT_EQ(CS_GPR_R1, cmdLoadImm->getRegisterOffset());
EXPECT_EQ(mask, cmdLoadImm->getDataDword());
}
itor++;
{
ASSERT_NE(cmdList.end(), itor);
auto cmdMath = genCmdCast<MI_MATH *>(*itor);
EXPECT_EQ(3u, cmdMath->DW0.BitField.DwordLength);
}
itor++;
{
ASSERT_NE(cmdList.end(), itor);
auto cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(CS_GPR_R2, cmdMem->getRegisterAddress());
EXPECT_EQ(firstStoreRegMemAddress, cmdMem->getMemoryAddress());
if (workloadPartition) {
EXPECT_TRUE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
} else {
EXPECT_FALSE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
}
}
itor++;
{
ASSERT_NE(cmdList.end(), itor);
auto cmdLoadReg = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
EXPECT_EQ(secondLoadRegisterRegSrcAddress, cmdLoadReg->getSourceRegisterAddress());
EXPECT_EQ(CS_GPR_R0, cmdLoadReg->getDestinationRegisterAddress());
}
itor++;
{
ASSERT_NE(cmdList.end(), itor);
auto cmdLoadImm = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itor);
EXPECT_EQ(CS_GPR_R1, cmdLoadImm->getRegisterOffset());
EXPECT_EQ(mask, cmdLoadImm->getDataDword());
}
itor++;
{
ASSERT_NE(cmdList.end(), itor);
auto cmdMath = genCmdCast<MI_MATH *>(*itor);
EXPECT_EQ(3u, cmdMath->DW0.BitField.DwordLength);
}
itor++;
{
ASSERT_NE(cmdList.end(), itor);
auto cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
EXPECT_EQ(CS_GPR_R2, cmdMem->getRegisterAddress());
EXPECT_EQ(secondStoreRegMemAddress, cmdMem->getMemoryAddress());
if (workloadPartition) {
EXPECT_TRUE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
} else {
EXPECT_FALSE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
}
}
itor++;
startIt = itor;
}
template <typename FamilyType>
void CmdListPipelineSelectStateFixture::testBody() {
using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT;
const ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
void *currentBuffer = nullptr;
auto &cmdlistRequiredState = commandList->getRequiredStreamState();
auto &cmdListFinalState = commandList->getFinalStreamState();
auto &csrState = commandQueue->csr->getStreamProperties();
auto commandListHandle = commandList->toHandle();
auto &commandListStream = *commandList->commandContainer.getCommandStream();
auto &cmdQueueStream = commandQueue->commandStream;
GenCmdList cmdList;
std::vector<GenCmdList::iterator> pipelineSelectList;
size_t sizeBefore = 0;
size_t sizeAfter = 0;
auto result = ZE_RESULT_SUCCESS;
{
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.usesSystolicPipelineSelectMode = 0;
sizeBefore = commandListStream.getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = commandListStream.getUsed();
EXPECT_EQ(0, cmdlistRequiredState.pipelineSelect.systolicMode.value);
EXPECT_EQ(0, cmdListFinalState.pipelineSelect.systolicMode.value);
currentBuffer = ptrOffset(commandListStream.getCpuBase(), sizeBefore);
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
currentBuffer,
(sizeAfter - sizeBefore)));
pipelineSelectList = findAll<PIPELINE_SELECT *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(0u, pipelineSelectList.size());
cmdList.clear();
pipelineSelectList.clear();
commandList->close();
sizeBefore = cmdQueueStream.getUsed();
result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = cmdQueueStream.getUsed();
EXPECT_EQ(0, csrState.pipelineSelect.systolicMode.value);
currentBuffer = ptrOffset(cmdQueueStream.getCpuBase(), sizeBefore);
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
currentBuffer,
(sizeAfter - sizeBefore)));
pipelineSelectList = findAll<PIPELINE_SELECT *>(cmdList.begin(), cmdList.end());
ASSERT_EQ(1u, pipelineSelectList.size());
auto pipelineSelectCmd = genCmdCast<PIPELINE_SELECT *>(*pipelineSelectList[0]);
EXPECT_FALSE(NEO::UnitTestHelper<FamilyType>::getSystolicFlagValueFromPipelineSelectCommand(*pipelineSelectCmd));
cmdList.clear();
pipelineSelectList.clear();
commandList->reset();
}
{
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.usesSystolicPipelineSelectMode = 1;
sizeBefore = commandListStream.getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = commandListStream.getUsed();
EXPECT_EQ(1, cmdlistRequiredState.pipelineSelect.systolicMode.value);
EXPECT_EQ(1, cmdListFinalState.pipelineSelect.systolicMode.value);
currentBuffer = ptrOffset(commandListStream.getCpuBase(), sizeBefore);
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
currentBuffer,
(sizeAfter - sizeBefore)));
pipelineSelectList = findAll<PIPELINE_SELECT *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(0u, pipelineSelectList.size());
cmdList.clear();
pipelineSelectList.clear();
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.usesSystolicPipelineSelectMode = 0;
sizeBefore = commandListStream.getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = commandListStream.getUsed();
EXPECT_EQ(1, cmdlistRequiredState.pipelineSelect.systolicMode.value);
EXPECT_EQ(0, cmdListFinalState.pipelineSelect.systolicMode.value);
currentBuffer = ptrOffset(commandListStream.getCpuBase(), sizeBefore);
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
currentBuffer,
(sizeAfter - sizeBefore)));
pipelineSelectList = findAll<PIPELINE_SELECT *>(cmdList.begin(), cmdList.end());
ASSERT_EQ(1u, pipelineSelectList.size());
auto pipelineSelectCmd = genCmdCast<PIPELINE_SELECT *>(*pipelineSelectList[0]);
EXPECT_FALSE(NEO::UnitTestHelper<FamilyType>::getSystolicFlagValueFromPipelineSelectCommand(*pipelineSelectCmd));
cmdList.clear();
pipelineSelectList.clear();
commandList->close();
sizeBefore = cmdQueueStream.getUsed();
result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = cmdQueueStream.getUsed();
EXPECT_EQ(0, csrState.pipelineSelect.systolicMode.value);
currentBuffer = ptrOffset(cmdQueueStream.getCpuBase(), sizeBefore);
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
currentBuffer,
(sizeAfter - sizeBefore)));
pipelineSelectList = findAll<PIPELINE_SELECT *>(cmdList.begin(), cmdList.end());
ASSERT_EQ(1u, pipelineSelectList.size());
pipelineSelectCmd = genCmdCast<PIPELINE_SELECT *>(*pipelineSelectList[0]);
EXPECT_TRUE(NEO::UnitTestHelper<FamilyType>::getSystolicFlagValueFromPipelineSelectCommand(*pipelineSelectCmd));
cmdList.clear();
pipelineSelectList.clear();
commandList->reset();
}
{
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.usesSystolicPipelineSelectMode = 1;
sizeBefore = commandListStream.getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = commandListStream.getUsed();
EXPECT_EQ(1, cmdlistRequiredState.pipelineSelect.systolicMode.value);
EXPECT_EQ(1, cmdListFinalState.pipelineSelect.systolicMode.value);
currentBuffer = ptrOffset(commandListStream.getCpuBase(), sizeBefore);
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
currentBuffer,
(sizeAfter - sizeBefore)));
pipelineSelectList = findAll<PIPELINE_SELECT *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(0u, pipelineSelectList.size());
cmdList.clear();
pipelineSelectList.clear();
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.usesSystolicPipelineSelectMode = 0;
sizeBefore = commandListStream.getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = commandListStream.getUsed();
EXPECT_EQ(1, cmdlistRequiredState.pipelineSelect.systolicMode.value);
EXPECT_EQ(0, cmdListFinalState.pipelineSelect.systolicMode.value);
currentBuffer = ptrOffset(commandListStream.getCpuBase(), sizeBefore);
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
currentBuffer,
(sizeAfter - sizeBefore)));
pipelineSelectList = findAll<PIPELINE_SELECT *>(cmdList.begin(), cmdList.end());
ASSERT_EQ(1u, pipelineSelectList.size());
auto pipelineSelectCmd = genCmdCast<PIPELINE_SELECT *>(*pipelineSelectList[0]);
EXPECT_FALSE(NEO::UnitTestHelper<FamilyType>::getSystolicFlagValueFromPipelineSelectCommand(*pipelineSelectCmd));
cmdList.clear();
pipelineSelectList.clear();
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.usesSystolicPipelineSelectMode = 1;
sizeBefore = commandListStream.getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = commandListStream.getUsed();
EXPECT_EQ(1, cmdlistRequiredState.pipelineSelect.systolicMode.value);
EXPECT_EQ(1, cmdListFinalState.pipelineSelect.systolicMode.value);
currentBuffer = ptrOffset(commandListStream.getCpuBase(), sizeBefore);
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
currentBuffer,
(sizeAfter - sizeBefore)));
pipelineSelectList = findAll<PIPELINE_SELECT *>(cmdList.begin(), cmdList.end());
ASSERT_EQ(1u, pipelineSelectList.size());
pipelineSelectCmd = genCmdCast<PIPELINE_SELECT *>(*pipelineSelectList[0]);
EXPECT_TRUE(NEO::UnitTestHelper<FamilyType>::getSystolicFlagValueFromPipelineSelectCommand(*pipelineSelectCmd));
cmdList.clear();
pipelineSelectList.clear();
commandList->close();
sizeBefore = cmdQueueStream.getUsed();
result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = cmdQueueStream.getUsed();
EXPECT_EQ(1, csrState.pipelineSelect.systolicMode.value);
currentBuffer = ptrOffset(cmdQueueStream.getCpuBase(), sizeBefore);
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
currentBuffer,
(sizeAfter - sizeBefore)));
pipelineSelectList = findAll<PIPELINE_SELECT *>(cmdList.begin(), cmdList.end());
ASSERT_EQ(1u, pipelineSelectList.size());
pipelineSelectCmd = genCmdCast<PIPELINE_SELECT *>(*pipelineSelectList[0]);
EXPECT_TRUE(NEO::UnitTestHelper<FamilyType>::getSystolicFlagValueFromPipelineSelectCommand(*pipelineSelectCmd));
cmdList.clear();
pipelineSelectList.clear();
commandList->reset();
}
{
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.usesSystolicPipelineSelectMode = 1;
sizeBefore = commandListStream.getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = commandListStream.getUsed();
EXPECT_EQ(1, cmdlistRequiredState.pipelineSelect.systolicMode.value);
EXPECT_EQ(1, cmdListFinalState.pipelineSelect.systolicMode.value);
currentBuffer = ptrOffset(commandListStream.getCpuBase(), sizeBefore);
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
currentBuffer,
(sizeAfter - sizeBefore)));
pipelineSelectList = findAll<PIPELINE_SELECT *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(0u, pipelineSelectList.size());
cmdList.clear();
pipelineSelectList.clear();
commandList->close();
sizeBefore = cmdQueueStream.getUsed();
result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = cmdQueueStream.getUsed();
EXPECT_EQ(1, csrState.pipelineSelect.systolicMode.value);
currentBuffer = ptrOffset(cmdQueueStream.getCpuBase(), sizeBefore);
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
currentBuffer,
(sizeAfter - sizeBefore)));
pipelineSelectList = findAll<PIPELINE_SELECT *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(0u, pipelineSelectList.size());
}
}
} // namespace ult
} // namespace L0

View File

@ -63,6 +63,7 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
using BaseClass::multiReturnPointCommandList;
using BaseClass::partitionCount;
using BaseClass::patternAllocations;
using BaseClass::pipelineSelectStateTracking;
using BaseClass::requiredStreamState;
using BaseClass::unifiedMemoryControls;
using BaseClass::updateStreamProperties;
@ -123,6 +124,7 @@ struct WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>
using BaseClass::isFlushTaskSubmissionEnabled;
using BaseClass::multiReturnPointCommandList;
using BaseClass::partitionCount;
using BaseClass::pipelineSelectStateTracking;
using BaseClass::requiredStreamState;
WhiteBox() : BaseClass(BaseClass::defaultNumIddsPerBlock) {}
@ -147,6 +149,7 @@ struct WhiteBox<::L0::CommandList> : public ::L0::CommandListImp {
using BaseClass::multiReturnPointCommandList;
using BaseClass::nonImmediateLogicalStateHelper;
using BaseClass::partitionCount;
using BaseClass::pipelineSelectStateTracking;
WhiteBox(Device *device);
~WhiteBox() override;

View File

@ -36,6 +36,7 @@ struct WhiteBox<::L0::CommandQueue> : public ::L0::CommandQueueImp {
using CommandQueue::internalUsage;
using CommandQueue::multiReturnPointCommandList;
using CommandQueue::partitionCount;
using CommandQueue::pipelineSelectStateTracking;
WhiteBox(Device *device, NEO::CommandStreamReceiver *csr,
const ze_command_queue_desc_t *desc);
@ -67,6 +68,7 @@ struct MockCommandQueueHw : public L0::CommandQueueHw<gfxCoreFamily> {
using L0::CommandQueue::internalUsage;
using L0::CommandQueue::multiReturnPointCommandList;
using L0::CommandQueue::partitionCount;
using L0::CommandQueue::pipelineSelectStateTracking;
using L0::CommandQueue::preemptionCmdSyncProgramming;
using L0::CommandQueueImp::csr;
using typename BaseClass::CommandListExecutionContext;

View File

@ -12,7 +12,7 @@
#include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h"
#include "level_zero/core/source/event/event.h"
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h"
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.inl"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
namespace L0 {

View File

@ -8,7 +8,7 @@
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
#include "shared/test/common/test_macros/hw_test.h"
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h"
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.inl"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h"

View File

@ -11,7 +11,7 @@
#include "shared/test/common/test_macros/hw_test.h"
#include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h"
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h"
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.inl"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h"

View File

@ -12,7 +12,7 @@
#include "level_zero/core/source/builtin/builtin_functions_lib_impl.h"
#include "level_zero/core/source/kernel/kernel_imp.h"
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h"
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.inl"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_built_ins.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"

View File

@ -12,7 +12,7 @@
#include "level_zero/core/source/cmdlist/cmdlist.h"
#include "level_zero/core/source/fence/fence.h"
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h"
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.inl"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/fixtures/module_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h"
@ -538,7 +538,7 @@ using CmdListPipelineSelectStateTest = Test<CmdListPipelineSelectStateFixture>;
using SystolicSupport = IsAnyProducts<IGFX_ALDERLAKE_P, IGFX_XE_HP_SDV, IGFX_DG2, IGFX_PVC>;
HWTEST2_F(CmdListPipelineSelectStateTest,
givenAppendSystolicKernelToCommandListWhenExecutingCommandListThenPipelineSelectStateIsTrackedCorrectly, SystolicSupport) {
testBody();
testBody<FamilyType>();
}
} // namespace ult

View File

@ -261,6 +261,8 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
}
auto newL3Config = PreambleHelper<GfxFamily>::getL3Config(hwInfo, dispatchFlags.useSLM);
dispatchFlags.pipelineSelectArgs.systolicPipelineSelectSupport = this->systolicModeConfigurable;
auto isSystolicPipelineSelectModeChanged = (this->lastSystolicPipelineSelectMode != dispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode) && this->systolicModeConfigurable;
auto requiresCoherency = hwHelper.forceNonGpuCoherencyWA(dispatchFlags.requiresCoherency);

View File

@ -62,7 +62,6 @@ void CommandStreamReceiverHw<GfxFamily>::programPipelineSelect(LinearStream &com
if (csrSizeRequestFlags.mediaSamplerConfigChanged || csrSizeRequestFlags.systolicPipelineSelectMode || !isPreambleSent) {
auto &hwInfo = peekHwInfo();
if (!isPipelineSelectAlreadyProgrammed()) {
pipelineSelectArgs.systolicPipelineSelectSupport = this->systolicModeConfigurable;
PreambleHelper<GfxFamily>::programPipelineSelect(&commandStream, pipelineSelectArgs, hwInfo);
}
this->lastMediaSamplerConfig = pipelineSelectArgs.mediaSamplerRequired;

View File

@ -48,7 +48,6 @@ template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::programPipelineSelect(LinearStream &commandStream, PipelineSelectArgs &pipelineSelectArgs) {
if (csrSizeRequestFlags.mediaSamplerConfigChanged || csrSizeRequestFlags.systolicPipelineSelectMode || !isPreambleSent) {
auto &hwInfo = peekHwInfo();
pipelineSelectArgs.systolicPipelineSelectSupport = this->systolicModeConfigurable;
PreambleHelper<GfxFamily>::programPipelineSelect(&commandStream, pipelineSelectArgs, hwInfo);
this->lastMediaSamplerConfig = pipelineSelectArgs.mediaSamplerRequired;
this->lastSystolicPipelineSelectMode = pipelineSelectArgs.systolicPipelineSelectMode;

View File

@ -474,6 +474,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, ForceHostPointerImport, -1, "-1: default, 0: dis
DECLARE_DEBUG_VARIABLE(int32_t, ProgramExtendedPipeControlPriorToNonPipelinedStateCommand, -1, "-1: default, 0: disable, 1: enable, Program additional extended version of PIPE CONTROL command before non pipelined state command")
DECLARE_DEBUG_VARIABLE(int32_t, OverrideDrmRegion, -1, "-1: disable, 0+: override to given memory region for all allocations")
DECLARE_DEBUG_VARIABLE(int32_t, MultiReturnPointCommandList, -1, "-1: default: disabled, 0: disabled, 1: enabled. This flag creates multiple return point from List to Queue for Front End reconfiguration on Queue buffer for single List")
DECLARE_DEBUG_VARIABLE(int32_t, EnablePipelineSelectTracking, -1, "-1: default: disabled, 0: disabled, 1: enabled. This flag enables optimization that limits number of pipeline select dispatched by command lists")
/* Binary Cache */
DECLARE_DEBUG_VARIABLE(bool, BinaryCacheTrace, false, "enable cl_cache to produce .trace files with information about hash computation")

View File

@ -308,6 +308,7 @@ ExperimentalSetWalkerPartitionType = -1
UseImmDataWriteModeOnPostSyncOperation = 0
OverridePostSyncMocs = -1
EnableImmediateVmBindExt = -1
EnablePipelineSelectTracking = -1
ForceExecutionTile = -1
DisableCachingForHeaps = 0
OverrideTimestampPacketSize = -1