mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-18 05:52:19 +08:00
Add KernelExecutionType argument to encodeAdditionalWalkerFields method
Signed-off-by: Rafal Maziejuk <rafal.maziejuk@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
16e9aaa1cf
commit
d5f3ac37bf
@@ -109,7 +109,7 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
||||
numWorkGroups, localWorkSizes, simd, dim,
|
||||
false, false, 0u);
|
||||
|
||||
EncodeDispatchKernel<GfxFamily>::encodeAdditionalWalkerFields(commandQueue.getDevice().getHardwareInfo(), walkerCmd);
|
||||
EncodeDispatchKernel<GfxFamily>::encodeAdditionalWalkerFields(commandQueue.getDevice().getHardwareInfo(), walkerCmd, kernel.getExecutionType());
|
||||
*walkerCmdBuf = walkerCmd;
|
||||
}
|
||||
} // namespace NEO
|
||||
|
||||
@@ -122,7 +122,7 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
||||
numWorkGroups, localWorkSizes, simd, dim,
|
||||
localIdsGenerationByRuntime, inlineDataProgrammingRequired, requiredWalkOrder);
|
||||
|
||||
EncodeDispatchKernel<GfxFamily>::encodeAdditionalWalkerFields(commandQueue.getDevice().getHardwareInfo(), walkerCmd);
|
||||
EncodeDispatchKernel<GfxFamily>::encodeAdditionalWalkerFields(commandQueue.getDevice().getHardwareInfo(), walkerCmd, kernel.getExecutionType());
|
||||
|
||||
auto devices = queueCsr.getOsContext().getDeviceBitfield();
|
||||
auto partitionWalker = ImplicitScalingHelper::isImplicitScalingEnabled(devices, !kernel.isSingleSubdevicePreferred());
|
||||
|
||||
@@ -82,14 +82,14 @@ HWTEST2_F(WalkerDispatchTestDg2AndLater, whenProgramComputeWalkerThenApplyL3WAFo
|
||||
|
||||
{
|
||||
hwInfo.platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_B, hwInfo);
|
||||
EncodeDispatchKernel<FamilyType>::encodeAdditionalWalkerFields(hwInfo, walkerCmd);
|
||||
EncodeDispatchKernel<FamilyType>::encodeAdditionalWalkerFields(hwInfo, walkerCmd, KernelExecutionType::Default);
|
||||
|
||||
EXPECT_FALSE(walkerCmd.getL3PrefetchDisable());
|
||||
}
|
||||
|
||||
{
|
||||
hwInfo.platform.usRevId = hwInfoConfig.getHwRevIdFromStepping(REVISION_A0, hwInfo);
|
||||
EncodeDispatchKernel<FamilyType>::encodeAdditionalWalkerFields(hwInfo, walkerCmd);
|
||||
EncodeDispatchKernel<FamilyType>::encodeAdditionalWalkerFields(hwInfo, walkerCmd, KernelExecutionType::Default);
|
||||
|
||||
if (hwInfo.platform.eProductFamily == IGFX_DG2) {
|
||||
EXPECT_TRUE(walkerCmd.getL3PrefetchDisable());
|
||||
@@ -107,7 +107,7 @@ HWTEST2_F(WalkerDispatchTestDg2AndLater, givenDebugVariableSetWhenProgramCompute
|
||||
|
||||
for (auto forceL3PrefetchForComputeWalker : {false, true}) {
|
||||
DebugManager.flags.ForceL3PrefetchForComputeWalker.set(forceL3PrefetchForComputeWalker);
|
||||
EncodeDispatchKernel<FamilyType>::encodeAdditionalWalkerFields(hwInfo, walkerCmd);
|
||||
EncodeDispatchKernel<FamilyType>::encodeAdditionalWalkerFields(hwInfo, walkerCmd, KernelExecutionType::Default);
|
||||
EXPECT_EQ(!forceL3PrefetchForComputeWalker, walkerCmd.getL3PrefetchDisable());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -350,6 +350,7 @@ ToggleHintKernelDisableCompression = -1
|
||||
EnableImplicitScaling = -1
|
||||
DecompressInL3ForImage2dFromBuffer = -1
|
||||
CFEComputeDispatchAllWalkerEnable = -1
|
||||
ComputeDispatchAllWalkerEnableInComputeWalker = -1
|
||||
EnableMemoryPrefetch = -1
|
||||
ForceCsStallForStatePrefetch = -1
|
||||
ProgramGlobalFenceAsMiMemFenceCommandInCommandStream = -1
|
||||
|
||||
@@ -50,7 +50,7 @@ struct EncodeDispatchKernel {
|
||||
bool isInternal,
|
||||
bool isCooperative);
|
||||
|
||||
static void encodeAdditionalWalkerFields(const HardwareInfo &hwInfo, WALKER_TYPE &walkerCmd);
|
||||
static void encodeAdditionalWalkerFields(const HardwareInfo &hwInfo, WALKER_TYPE &walkerCmd, KernelExecutionType kernelExecutionType);
|
||||
|
||||
static void appendAdditionalIDDFields(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const HardwareInfo &hwInfo, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy);
|
||||
|
||||
|
||||
@@ -327,7 +327,7 @@ void EncodeDispatchKernel<Family>::programBarrierEnable(INTERFACE_DESCRIPTOR_DAT
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
inline void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const HardwareInfo &hwInfo, WALKER_TYPE &walkerCmd) {}
|
||||
inline void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const HardwareInfo &hwInfo, WALKER_TYPE &walkerCmd, KernelExecutionType kernelExecutionType) {}
|
||||
|
||||
template <typename Family>
|
||||
void EncodeDispatchKernel<Family>::appendAdditionalIDDFields(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const HardwareInfo &hwInfo, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) {}
|
||||
|
||||
@@ -267,7 +267,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
|
||||
dispatchInterface->getSlmTotalSize(),
|
||||
dispatchInterface->getSlmPolicy());
|
||||
|
||||
EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(hwInfo, walkerCmd);
|
||||
EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(hwInfo, walkerCmd, isCooperative ? KernelExecutionType::Concurrent : KernelExecutionType::Default);
|
||||
|
||||
PreemptionHelper::applyPreemptionWaCmdsBegin<Family>(listCmdBufferStream, *device);
|
||||
|
||||
@@ -292,7 +292,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
inline void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const HardwareInfo &hwInfo, WALKER_TYPE &walkerCmd) {
|
||||
inline void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const HardwareInfo &hwInfo, WALKER_TYPE &walkerCmd, KernelExecutionType kernelExecutionType) {
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
|
||||
@@ -166,7 +166,8 @@ DECLARE_DEBUG_VARIABLE(int32_t, ForceZPassAsyncComputeThreadLimit, -1, "-1: defa
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, ForcePixelAsyncComputeThreadLimit, -1, "-1: default, >0: Limit value in STATE_COMPUTE_MODE")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DecompressInL3ForImage2dFromBuffer, -1, "-1: default, 0: WA Disabled, 1: WA enabled - Enable DecompressInL3 for image 2d from compressed buffer")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, ToggleHintKernelDisableCompression, -1, "-1: default - use kernel as source of hint, 0: provide hint to disable compression, 1: provide hint to enable compression")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, CFEComputeDispatchAllWalkerEnable, -1, "Set Compute Dispatch All Walker flag in CFE_STATE on PVC+ platforms, -1: do not set")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, CFEComputeDispatchAllWalkerEnable, -1, "Set Compute Dispatch All Walker flag in CFE_STATE on XE_HPC_CORE platforms, -1: do not set")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, ComputeDispatchAllWalkerEnableInComputeWalker, -1, "Set Compute Dispatch All Walker flag in COMPUTE_WALKER on PVC+ platforms, -1: do not set")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableMemoryPrefetch, -1, "-1: default, 0: disable, 1: enable")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, ForceCsStallForStatePrefetch, -1, "Set CSStall for STATE_PREFETCH cmd, -1:default, 0:disable, 1:enable")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, ProgramGlobalFenceAsMiMemFenceCommandInCommandStream, -1, "-1: default, 0: disable, 1: enable, System Memory Fence executed as MI_MEM_FENCE command in command stream")
|
||||
|
||||
@@ -5488,7 +5488,8 @@ typedef struct tagCOMPUTE_WALKER {
|
||||
uint32_t Reserved_96 : BITFIELD_RANGE(0, 5);
|
||||
uint32_t IndirectDataStartAddress : BITFIELD_RANGE(6, 31);
|
||||
// DWORD 4
|
||||
uint32_t Reserved_128 : BITFIELD_RANGE(0, 16);
|
||||
uint32_t ComputeDispatchAllWalkerEnable : BITFIELD_RANGE(0, 0);
|
||||
uint32_t Reserved_129 : BITFIELD_RANGE(1, 16);
|
||||
uint32_t MessageSimd : BITFIELD_RANGE(17, 18);
|
||||
uint32_t TileLayout : BITFIELD_RANGE(19, 21);
|
||||
uint32_t WalkOrder : BITFIELD_RANGE(22, 24);
|
||||
@@ -5686,6 +5687,12 @@ typedef struct tagCOMPUTE_WALKER {
|
||||
inline uint32_t getIndirectDataStartAddress() const {
|
||||
return TheStructure.Common.IndirectDataStartAddress << INDIRECTDATASTARTADDRESS_BIT_SHIFT;
|
||||
}
|
||||
inline void setComputeDispatchAllWalkerEnable(const bool value) {
|
||||
TheStructure.Common.ComputeDispatchAllWalkerEnable = value;
|
||||
}
|
||||
inline bool getComputeDispatchAllWalkerEnable() const {
|
||||
return TheStructure.Common.ComputeDispatchAllWalkerEnable;
|
||||
}
|
||||
inline void setMessageSimd(const uint32_t value) {
|
||||
TheStructure.Common.MessageSimd = value;
|
||||
}
|
||||
|
||||
@@ -173,7 +173,7 @@ void EncodeDispatchKernel<Family>::programBarrierEnable(INTERFACE_DESCRIPTOR_DAT
|
||||
}
|
||||
|
||||
template <>
|
||||
void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const HardwareInfo &hwInfo, WALKER_TYPE &walkerCmd) {
|
||||
void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const HardwareInfo &hwInfo, WALKER_TYPE &walkerCmd, KernelExecutionType kernelExecutionType) {
|
||||
auto programGlobalFenceAsPostSyncOperationInComputeWalker = !Family::isXlA0(hwInfo);
|
||||
if (DebugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.get() != -1) {
|
||||
programGlobalFenceAsPostSyncOperationInComputeWalker = !!DebugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.get();
|
||||
@@ -186,6 +186,16 @@ void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const HardwareIn
|
||||
if (DebugManager.flags.ForceL3PrefetchForComputeWalker.get() != -1) {
|
||||
walkerCmd.setL3PrefetchDisable(!DebugManager.flags.ForceL3PrefetchForComputeWalker.get());
|
||||
}
|
||||
|
||||
auto programComputeDispatchAllWalkerEnableInComputeWalker = Family::isXtTemporary(hwInfo);
|
||||
if (programComputeDispatchAllWalkerEnableInComputeWalker) {
|
||||
if (kernelExecutionType == KernelExecutionType::Concurrent) {
|
||||
walkerCmd.setComputeDispatchAllWalkerEnable(true);
|
||||
}
|
||||
}
|
||||
if (DebugManager.flags.ComputeDispatchAllWalkerEnableInComputeWalker.get() != -1) {
|
||||
walkerCmd.setComputeDispatchAllWalkerEnable(DebugManager.flags.ComputeDispatchAllWalkerEnableInComputeWalker.get());
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
|
||||
@@ -25,7 +25,8 @@ void PreambleHelper<Family>::appendProgramVFEState(const HardwareInfo &hwInfo, c
|
||||
command->setSingleSliceDispatchCcsMode(streamProperties.frontEndState.singleSliceDispatchCcsMode.value == 1);
|
||||
|
||||
if (HwInfoConfig::get(hwInfo.platform.eProductFamily)->getSteppingFromHwRevId(hwInfo) >= REVISION_B) {
|
||||
if (streamProperties.frontEndState.computeDispatchAllWalkerEnable.value > 0) {
|
||||
const auto programComputeDispatchAllWalkerEnableInCfeState = !Family::isXtTemporary(hwInfo);
|
||||
if (programComputeDispatchAllWalkerEnableInCfeState && streamProperties.frontEndState.computeDispatchAllWalkerEnable.value > 0) {
|
||||
command->setComputeDispatchAllWalkerEnable(true);
|
||||
command->setSingleSliceDispatchCcsMode(true);
|
||||
}
|
||||
|
||||
@@ -110,7 +110,7 @@ void EncodeDispatchKernel<Family>::programBarrierEnable(INTERFACE_DESCRIPTOR_DAT
|
||||
}
|
||||
|
||||
template <>
|
||||
void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const HardwareInfo &hwInfo, WALKER_TYPE &walkerCmd) {
|
||||
void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const HardwareInfo &hwInfo, WALKER_TYPE &walkerCmd, KernelExecutionType kernelExecutionType) {
|
||||
if (HwInfoConfig::get(hwInfo.platform.eProductFamily)->isPrefetchDisablingRequired(hwInfo)) {
|
||||
walkerCmd.setL3PrefetchDisable(true);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user