Add programming of Dispatch Walk Order in COMPUTE_WALKER for xe_hpg

- update xe_hpg generated commands
- add method isAdjustWalkOrderAvailable

Related-To: NEO-7065
Signed-off-by: Katarzyna Cencelewska <katarzyna.cencelewska@intel.com>
This commit is contained in:
Katarzyna Cencelewska
2022-06-14 18:17:04 +00:00
committed by Compute-Runtime-Automation
parent 7f4e25a3f6
commit 615fd4c37a
12 changed files with 98 additions and 28 deletions

View File

@@ -94,7 +94,8 @@ struct EncodeDispatchKernel {
bool localIdsGenerationByRuntime,
bool inlineDataProgrammingRequired,
bool isIndirect,
uint32_t requiredWorkGroupOrder);
uint32_t requiredWorkGroupOrder,
const HardwareInfo &hwInfo);
static void programBarrierEnable(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo);
@@ -106,7 +107,7 @@ struct EncodeDispatchKernel {
static void setupPostSyncMocs(WALKER_TYPE &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment);
static void adjustWalkOrder(WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder);
static void adjustWalkOrder(WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const HardwareInfo &hwInfo);
static constexpr bool shouldUpdateGlobalAtomics(bool &currentVal, bool refVal, bool updateCurrent);
};

View File

@@ -213,7 +213,8 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
true,
false,
args.isIndirect,
args.dispatchInterface->getRequiredWorkgroupOrder());
args.dispatchInterface->getRequiredWorkgroupOrder(),
hwInfo);
cmd.setPredicateEnable(args.isPredicate);
@@ -283,7 +284,8 @@ void EncodeDispatchKernel<Family>::encodeThreadData(WALKER_TYPE &walkerCmd,
bool localIdsGenerationByRuntime,
bool inlineDataProgrammingRequired,
bool isIndirect,
uint32_t requiredWorkGroupOrder) {
uint32_t requiredWorkGroupOrder,
const HardwareInfo &hwInfo) {
if (isIndirect) {
walkerCmd.setIndirectParameterEnable(true);
@@ -505,6 +507,6 @@ template <typename Family>
void EncodeDispatchKernel<Family>::setupPostSyncMocs(WALKER_TYPE &walkerCmd, const RootDeviceEnvironment &rootDeviceEnvironment) {}
template <typename Family>
void EncodeDispatchKernel<Family>::adjustWalkOrder(WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder) {}
void EncodeDispatchKernel<Family>::adjustWalkOrder(WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const HardwareInfo &hwInfo) {}
} // namespace NEO

View File

@@ -234,7 +234,8 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
localIdsGenerationByRuntime,
inlineDataProgramming,
args.isIndirect,
requiredWorkgroupOrder);
requiredWorkgroupOrder,
hwInfo);
using POSTSYNC_DATA = typename Family::POSTSYNC_DATA;
auto &postSync = walkerCmd.getPostSync();
@@ -394,7 +395,8 @@ void EncodeDispatchKernel<Family>::encodeThreadData(WALKER_TYPE &walkerCmd,
bool localIdsGenerationByRuntime,
bool inlineDataProgrammingRequired,
bool isIndirect,
uint32_t requiredWorkGroupOrder) {
uint32_t requiredWorkGroupOrder,
const HardwareInfo &hwInfo) {
if (isIndirect) {
walkerCmd.setIndirectParameterEnable(true);
@@ -444,7 +446,7 @@ void EncodeDispatchKernel<Family>::encodeThreadData(WALKER_TYPE &walkerCmd,
walkerCmd.setGenerateLocalId(1);
walkerCmd.setWalkOrder(requiredWorkGroupOrder);
}
adjustWalkOrder(walkerCmd, requiredWorkGroupOrder);
adjustWalkOrder(walkerCmd, requiredWorkGroupOrder, hwInfo);
if (inlineDataProgrammingRequired == true) {
walkerCmd.setEmitInlineParameter(1);
}
@@ -744,6 +746,6 @@ inline void EncodeStoreMMIO<Family>::appendFlags(MI_STORE_REGISTER_MEM *storeReg
}
template <typename Family>
void EncodeDispatchKernel<Family>::adjustWalkOrder(WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder) {}
void EncodeDispatchKernel<Family>::adjustWalkOrder(WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const HardwareInfo &hwInfo) {}
} // namespace NEO

View File

@@ -5207,7 +5207,7 @@ typedef struct tagCOMPUTE_WALKER {
uint32_t ComputeCommandOpcode : BITFIELD_RANGE(24, 26);
uint32_t Pipeline : BITFIELD_RANGE(27, 28);
uint32_t CommandType : BITFIELD_RANGE(29, 31);
// DWORD 1
// DWORD 1 of COMPUTE WALKER = DWORD 0 of COMPUTE_WALKER_BODY
uint32_t Reserved_32 : BITFIELD_RANGE(0, 7);
uint32_t Reserved_40 : BITFIELD_RANGE(8, 31);
// DWORD 2
@@ -5219,7 +5219,9 @@ typedef struct tagCOMPUTE_WALKER {
uint32_t Reserved_96 : BITFIELD_RANGE(0, 5);
uint32_t IndirectDataStartAddress : BITFIELD_RANGE(6, 31);
// DWORD 4
uint32_t Reserved_128 : BITFIELD_RANGE(0, 16);
uint32_t Reserved_128 : BITFIELD_RANGE(0, 4);
uint32_t DispatchWalkOrder : BITFIELD_RANGE(5, 6);
uint32_t Reserved_135 : BITFIELD_RANGE(7, 16);
uint32_t MessageSimd : BITFIELD_RANGE(17, 18);
uint32_t TileLayout : BITFIELD_RANGE(19, 21);
uint32_t WalkOrder : BITFIELD_RANGE(22, 24);
@@ -5298,6 +5300,10 @@ typedef struct tagCOMPUTE_WALKER {
PARTITION_ID_SUPPORTED_MIN = 0x0,
PARTITION_ID_SUPPORTED_MAX = 0xf,
} PARTITION_ID;
typedef enum tagDISPATCH_WALK_ORDER { // patched
LINERAR_WALKER = 0x0,
Y_ORDER_WALKER = 0x1,
} DISPATCH_WALK_ORDER;
inline void init() {
memset(&TheStructure, 0, sizeof(TheStructure));
TheStructure.Common.DwordLength = DWORD_LENGTH_FIXED_SIZE;
@@ -5405,6 +5411,12 @@ typedef struct tagCOMPUTE_WALKER {
inline PARTITION_TYPE getPartitionType() const {
return static_cast<PARTITION_TYPE>(TheStructure.Common.PartitionType);
}
inline void setDispatchWalkOrder(const DISPATCH_WALK_ORDER value) { // patched
TheStructure.Common.DispatchWalkOrder = value;
}
inline DISPATCH_WALK_ORDER getDispatchWalkOrder() const { // patched
return static_cast<DISPATCH_WALK_ORDER>(TheStructure.Common.DispatchWalkOrder);
}
typedef enum tagINDIRECTDATASTARTADDRESS {
INDIRECTDATASTARTADDRESS_BIT_SHIFT = 0x6,
INDIRECTDATASTARTADDRESS_ALIGN_SIZE = 0x40,

View File

@@ -115,6 +115,7 @@ class HwInfoConfig {
virtual bool isBlitCopyRequiredForLocalMemory(const HardwareInfo &hwInfo, const GraphicsAllocation &allocation) const = 0;
virtual bool isImplicitScalingSupported(const HardwareInfo &hwInfo) const = 0;
virtual bool isCpuCopyNecessary(const void *ptr, MemoryManager *memoryManager) const = 0;
virtual bool isAdjustWalkOrderAvailable(const HardwareInfo &hwInfo) const = 0;
MOCKABLE_VIRTUAL ~HwInfoConfig() = default;
@@ -211,6 +212,7 @@ class HwInfoConfigHw : public HwInfoConfig {
bool isBlitCopyRequiredForLocalMemory(const HardwareInfo &hwInfo, const GraphicsAllocation &allocation) const override;
bool isImplicitScalingSupported(const HardwareInfo &hwInfo) const override;
bool isCpuCopyNecessary(const void *ptr, MemoryManager *memoryManager) const override;
bool isAdjustWalkOrderAvailable(const HardwareInfo &hwInfo) const override;
protected:
HwInfoConfigHw() = default;

View File

@@ -418,4 +418,7 @@ template <PRODUCT_FAMILY gfxProduct>
bool HwInfoConfigHw<gfxProduct>::isCpuCopyNecessary(const void *ptr, MemoryManager *memoryManager) const {
return false;
}
template <PRODUCT_FAMILY gfxProduct>
bool HwInfoConfigHw<gfxProduct>::isAdjustWalkOrderAvailable(const HardwareInfo &hwInfo) const { return false; }
} // namespace NEO

View File

@@ -173,6 +173,18 @@ void EncodeSurfaceState<Family>::appendParamsForImageFromBuffer(R_SURFACE_STATE
}
}
template <>
void EncodeDispatchKernel<Family>::adjustWalkOrder(WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const HardwareInfo &hwInfo) {
auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily);
if (hwInfoConfig.isAdjustWalkOrderAvailable(hwInfo)) {
if (HwWalkOrderHelper::compatibleDimensionOrders[requiredWorkGroupOrder] == HwWalkOrderHelper::linearWalk) {
walkerCmd.setDispatchWalkOrder(WALKER_TYPE::DISPATCH_WALK_ORDER::LINERAR_WALKER);
} else if (HwWalkOrderHelper::compatibleDimensionOrders[requiredWorkGroupOrder] == HwWalkOrderHelper::yOrderWalk) {
walkerCmd.setDispatchWalkOrder(WALKER_TYPE::DISPATCH_WALK_ORDER::Y_ORDER_WALKER);
}
}
}
template struct EncodeDispatchKernel<Family>;
template struct EncodeStates<Family>;
template struct EncodeMath<Family>;