Add KernelExecutionType argument to encodeAdditionalWalkerFields method

Signed-off-by: Rafal Maziejuk <rafal.maziejuk@intel.com>
This commit is contained in:
Rafal Maziejuk
2021-12-03 11:46:44 +00:00
committed by Compute-Runtime-Automation
parent 16e9aaa1cf
commit d5f3ac37bf
12 changed files with 34 additions and 14 deletions

View File

@@ -109,7 +109,7 @@ inline void HardwareInterface<GfxFamily>::programWalker(
numWorkGroups, localWorkSizes, simd, dim,
false, false, 0u);
EncodeDispatchKernel<GfxFamily>::encodeAdditionalWalkerFields(commandQueue.getDevice().getHardwareInfo(), walkerCmd);
EncodeDispatchKernel<GfxFamily>::encodeAdditionalWalkerFields(commandQueue.getDevice().getHardwareInfo(), walkerCmd, kernel.getExecutionType());
*walkerCmdBuf = walkerCmd;
}
} // namespace NEO

View File

@@ -122,7 +122,7 @@ inline void HardwareInterface<GfxFamily>::programWalker(
numWorkGroups, localWorkSizes, simd, dim,
localIdsGenerationByRuntime, inlineDataProgrammingRequired, requiredWalkOrder);
EncodeDispatchKernel<GfxFamily>::encodeAdditionalWalkerFields(commandQueue.getDevice().getHardwareInfo(), walkerCmd);
EncodeDispatchKernel<GfxFamily>::encodeAdditionalWalkerFields(commandQueue.getDevice().getHardwareInfo(), walkerCmd, kernel.getExecutionType());
auto devices = queueCsr.getOsContext().getDeviceBitfield();
auto partitionWalker = ImplicitScalingHelper::isImplicitScalingEnabled(devices, !kernel.isSingleSubdevicePreferred());

View File

@@ -82,14 +82,14 @@ HWTEST2_F(WalkerDispatchTestDg2AndLater, whenProgramComputeWalkerThenApplyL3WAFo
{
hwInfo.platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_B, hwInfo);
EncodeDispatchKernel<FamilyType>::encodeAdditionalWalkerFields(hwInfo, walkerCmd);
EncodeDispatchKernel<FamilyType>::encodeAdditionalWalkerFields(hwInfo, walkerCmd, KernelExecutionType::Default);
EXPECT_FALSE(walkerCmd.getL3PrefetchDisable());
}
{
hwInfo.platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_A0, hwInfo);
EncodeDispatchKernel<FamilyType>::encodeAdditionalWalkerFields(hwInfo, walkerCmd);
EncodeDispatchKernel<FamilyType>::encodeAdditionalWalkerFields(hwInfo, walkerCmd, KernelExecutionType::Default);
if (hwInfo.platform.eProductFamily == IGFX_DG2) {
EXPECT_TRUE(walkerCmd.getL3PrefetchDisable());
@@ -107,7 +107,7 @@ HWTEST2_F(WalkerDispatchTestDg2AndLater, givenDebugVariableSetWhenProgramCompute
for (auto forceL3PrefetchForComputeWalker : {false, true}) {
DebugManager.flags.ForceL3PrefetchForComputeWalker.set(forceL3PrefetchForComputeWalker);
EncodeDispatchKernel<FamilyType>::encodeAdditionalWalkerFields(hwInfo, walkerCmd);
EncodeDispatchKernel<FamilyType>::encodeAdditionalWalkerFields(hwInfo, walkerCmd, KernelExecutionType::Default);
EXPECT_EQ(!forceL3PrefetchForComputeWalker, walkerCmd.getL3PrefetchDisable());
}
}

View File

@@ -350,6 +350,7 @@ ToggleHintKernelDisableCompression = -1
EnableImplicitScaling = -1
DecompressInL3ForImage2dFromBuffer = -1
CFEComputeDispatchAllWalkerEnable = -1
ComputeDispatchAllWalkerEnableInComputeWalker = -1
EnableMemoryPrefetch = -1
ForceCsStallForStatePrefetch = -1
ProgramGlobalFenceAsMiMemFenceCommandInCommandStream = -1

View File

@@ -50,7 +50,7 @@ struct EncodeDispatchKernel {
bool isInternal,
bool isCooperative);
static void encodeAdditionalWalkerFields(const HardwareInfo &hwInfo, WALKER_TYPE &walkerCmd);
static void encodeAdditionalWalkerFields(const HardwareInfo &hwInfo, WALKER_TYPE &walkerCmd, KernelExecutionType kernelExecutionType);
static void appendAdditionalIDDFields(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const HardwareInfo &hwInfo, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy);

View File

@@ -327,7 +327,7 @@ void EncodeDispatchKernel<Family>::programBarrierEnable(INTERFACE_DESCRIPTOR_DAT
}
template <typename Family>
inline void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const HardwareInfo &hwInfo, WALKER_TYPE &walkerCmd) {}
inline void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const HardwareInfo &hwInfo, WALKER_TYPE &walkerCmd, KernelExecutionType kernelExecutionType) {}
template <typename Family>
void EncodeDispatchKernel<Family>::appendAdditionalIDDFields(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const HardwareInfo &hwInfo, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) {}

View File

@@ -267,7 +267,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
dispatchInterface->getSlmTotalSize(),
dispatchInterface->getSlmPolicy());
EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(hwInfo, walkerCmd);
EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(hwInfo, walkerCmd, isCooperative ? KernelExecutionType::Concurrent : KernelExecutionType::Default);
PreemptionHelper::applyPreemptionWaCmdsBegin<Family>(listCmdBufferStream, *device);
@@ -292,7 +292,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
}
template <typename Family>
inline void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const HardwareInfo &hwInfo, WALKER_TYPE &walkerCmd) {
inline void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const HardwareInfo &hwInfo, WALKER_TYPE &walkerCmd, KernelExecutionType kernelExecutionType) {
}
template <typename Family>

View File

@@ -166,7 +166,8 @@ DECLARE_DEBUG_VARIABLE(int32_t, ForceZPassAsyncComputeThreadLimit, -1, "-1: defa
DECLARE_DEBUG_VARIABLE(int32_t, ForcePixelAsyncComputeThreadLimit, -1, "-1: default, >0: Limit value in STATE_COMPUTE_MODE")
DECLARE_DEBUG_VARIABLE(int32_t, DecompressInL3ForImage2dFromBuffer, -1, "-1: default, 0: WA Disabled, 1: WA enabled - Enable DecompressInL3 for image 2d from compressed buffer")
DECLARE_DEBUG_VARIABLE(int32_t, ToggleHintKernelDisableCompression, -1, "-1: default - use kernel as source of hint, 0: provide hint to disable compression, 1: provide hint to enable compression")
DECLARE_DEBUG_VARIABLE(int32_t, CFEComputeDispatchAllWalkerEnable, -1, "Set Compute Dispatch All Walker flag in CFE_STATE on PVC+ platforms, -1: do not set")
DECLARE_DEBUG_VARIABLE(int32_t, CFEComputeDispatchAllWalkerEnable, -1, "Set Compute Dispatch All Walker flag in CFE_STATE on XE_HPC_CORE platforms, -1: do not set")
DECLARE_DEBUG_VARIABLE(int32_t, ComputeDispatchAllWalkerEnableInComputeWalker, -1, "Set Compute Dispatch All Walker flag in COMPUTE_WALKER on PVC+ platforms, -1: do not set")
DECLARE_DEBUG_VARIABLE(int32_t, EnableMemoryPrefetch, -1, "-1: default, 0: disable, 1: enable")
DECLARE_DEBUG_VARIABLE(int32_t, ForceCsStallForStatePrefetch, -1, "Set CSStall for STATE_PREFETCH cmd, -1:default, 0:disable, 1:enable")
DECLARE_DEBUG_VARIABLE(int32_t, ProgramGlobalFenceAsMiMemFenceCommandInCommandStream, -1, "-1: default, 0: disable, 1: enable, System Memory Fence executed as MI_MEM_FENCE command in command stream")

View File

@@ -5488,7 +5488,8 @@ typedef struct tagCOMPUTE_WALKER {
uint32_t Reserved_96 : BITFIELD_RANGE(0, 5);
uint32_t IndirectDataStartAddress : BITFIELD_RANGE(6, 31);
// DWORD 4
uint32_t Reserved_128 : BITFIELD_RANGE(0, 16);
uint32_t ComputeDispatchAllWalkerEnable : BITFIELD_RANGE(0, 0);
uint32_t Reserved_129 : BITFIELD_RANGE(1, 16);
uint32_t MessageSimd : BITFIELD_RANGE(17, 18);
uint32_t TileLayout : BITFIELD_RANGE(19, 21);
uint32_t WalkOrder : BITFIELD_RANGE(22, 24);
@@ -5686,6 +5687,12 @@ typedef struct tagCOMPUTE_WALKER {
inline uint32_t getIndirectDataStartAddress() const {
return TheStructure.Common.IndirectDataStartAddress << INDIRECTDATASTARTADDRESS_BIT_SHIFT;
}
inline void setComputeDispatchAllWalkerEnable(const bool value) {
TheStructure.Common.ComputeDispatchAllWalkerEnable = value;
}
inline bool getComputeDispatchAllWalkerEnable() const {
return TheStructure.Common.ComputeDispatchAllWalkerEnable;
}
inline void setMessageSimd(const uint32_t value) {
TheStructure.Common.MessageSimd = value;
}

View File

@@ -173,7 +173,7 @@ void EncodeDispatchKernel<Family>::programBarrierEnable(INTERFACE_DESCRIPTOR_DAT
}
template <>
void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const HardwareInfo &hwInfo, WALKER_TYPE &walkerCmd) {
void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const HardwareInfo &hwInfo, WALKER_TYPE &walkerCmd, KernelExecutionType kernelExecutionType) {
auto programGlobalFenceAsPostSyncOperationInComputeWalker = !Family::isXlA0(hwInfo);
if (DebugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.get() != -1) {
programGlobalFenceAsPostSyncOperationInComputeWalker = !!DebugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.get();
@@ -186,6 +186,16 @@ void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const HardwareIn
if (DebugManager.flags.ForceL3PrefetchForComputeWalker.get() != -1) {
walkerCmd.setL3PrefetchDisable(!DebugManager.flags.ForceL3PrefetchForComputeWalker.get());
}
auto programComputeDispatchAllWalkerEnableInComputeWalker = Family::isXtTemporary(hwInfo);
if (programComputeDispatchAllWalkerEnableInComputeWalker) {
if (kernelExecutionType == KernelExecutionType::Concurrent) {
walkerCmd.setComputeDispatchAllWalkerEnable(true);
}
}
if (DebugManager.flags.ComputeDispatchAllWalkerEnableInComputeWalker.get() != -1) {
walkerCmd.setComputeDispatchAllWalkerEnable(DebugManager.flags.ComputeDispatchAllWalkerEnableInComputeWalker.get());
}
}
template <>

View File

@@ -25,7 +25,8 @@ void PreambleHelper<Family>::appendProgramVFEState(const HardwareInfo &hwInfo, c
command->setSingleSliceDispatchCcsMode(streamProperties.frontEndState.singleSliceDispatchCcsMode.value == 1);
if (HwInfoConfig::get(hwInfo.platform.eProductFamily)->getSteppingFromHwRevId(hwInfo) >= REVISION_B) {
if (streamProperties.frontEndState.computeDispatchAllWalkerEnable.value > 0) {
const auto programComputeDispatchAllWalkerEnableInCfeState = !Family::isXtTemporary(hwInfo);
if (programComputeDispatchAllWalkerEnableInCfeState && streamProperties.frontEndState.computeDispatchAllWalkerEnable.value > 0) {
command->setComputeDispatchAllWalkerEnable(true);
command->setSingleSliceDispatchCcsMode(true);
}

View File

@@ -110,7 +110,7 @@ void EncodeDispatchKernel<Family>::programBarrierEnable(INTERFACE_DESCRIPTOR_DAT
}
template <>
void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const HardwareInfo &hwInfo, WALKER_TYPE &walkerCmd) {
void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const HardwareInfo &hwInfo, WALKER_TYPE &walkerCmd, KernelExecutionType kernelExecutionType) {
if (HwInfoConfig::get(hwInfo.platform.eProductFamily)->isPrefetchDisablingRequired(hwInfo)) {
walkerCmd.setL3PrefetchDisable(true);
}