Revert "performance: Set dispatch all for small TG"

This reverts commit 0dc2870513.

Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
This commit is contained in:
Compute-Runtime-Validation
2024-10-02 05:04:43 +02:00
committed by Compute-Runtime-Automation
parent ed08e751fb
commit 680e62d333
19 changed files with 66 additions and 107 deletions

View File

@@ -125,8 +125,8 @@ struct EncodeDispatchKernel {
template <typename WalkerType>
static void encode(CommandContainer &container, EncodeDispatchKernelArgs &args);
template <typename WalkerType, typename InterfaceDescriptorType>
static void encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, WalkerType &walkerCmd, const InterfaceDescriptorType *idd, const EncodeWalkerArgs &walkerArgs);
template <typename WalkerType>
static void encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs);
template <typename InterfaceDescriptorType>
static void appendAdditionalIDDFields(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment,

View File

@@ -406,8 +406,8 @@ void EncodeDispatchKernel<Family>::programBarrierEnable(InterfaceDescriptorType
}
template <typename Family>
template <typename WalkerType, typename InterfaceDescriptorType>
inline void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, WalkerType &walkerCmd, const InterfaceDescriptorType *idd, const EncodeWalkerArgs &walkerArgs) {}
template <typename WalkerType>
inline void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs) {}
template <typename Family>
template <typename InterfaceDescriptorType>

View File

@@ -10,7 +10,7 @@
#include "shared/source/command_container/command_encoder.h"
template struct NEO::EncodeDispatchKernel<Family>;
template void NEO::EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields<Family::DefaultWalkerType, Family::INTERFACE_DESCRIPTOR_DATA>(const RootDeviceEnvironment &rootDeviceEnvironment, Family::DefaultWalkerType &walkerCmd, const Family::INTERFACE_DESCRIPTOR_DATA *idd, const EncodeWalkerArgs &walkerArgs);
template void NEO::EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields<Family::DefaultWalkerType>(const RootDeviceEnvironment &rootDeviceEnvironment, Family::DefaultWalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs);
template void NEO::EncodeDispatchKernel<Family>::adjustTimestampPacket<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd, const EncodeDispatchKernelArgs &args);
template void NEO::EncodeDispatchKernel<Family>::setupPostSyncForRegularEvent<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd, const EncodeDispatchKernelArgs &args);
template void NEO::EncodeDispatchKernel<Family>::setupPostSyncForInOrderExec<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd, const EncodeDispatchKernelArgs &args);

View File

@@ -408,7 +408,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
args.requiredDispatchWalkOrder,
args.additionalSizeParam,
args.device->getDeviceInfo().maxFrontEndThreads};
EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, &idd, walkerArgs);
EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, walkerArgs);
uint32_t workgroupSize = args.dispatchInterface->getGroupSize()[0] * args.dispatchInterface->getGroupSize()[1] * args.dispatchInterface->getGroupSize()[2];
bool isRequiredWorkGroupOrder = args.requiredDispatchWalkOrder != NEO::RequiredDispatchWalkOrder::none;

View File

@@ -359,7 +359,6 @@ void ExecutionEnvironment::setDeviceHierarchy(const GfxCoreHelper &gfxCoreHelper
void ExecutionEnvironment::adjustCcsCountImpl(RootDeviceEnvironment *rootDeviceEnvironment) const {
auto hwInfo = rootDeviceEnvironment->getMutableHardwareInfo();
auto &productHelper = rootDeviceEnvironment->getHelper<ProductHelper>();
rootDeviceEnvironment->setNonLimitedNumberOfCcs(hwInfo->gtSystemInfo.CCSInfo.NumberOfCCSEnabled);
productHelper.adjustNumberOfCcs(*hwInfo);
}

View File

@@ -88,8 +88,6 @@ struct RootDeviceEnvironment : NonCopyableClass {
void initCompilerProductHelper();
void initReleaseHelper();
void initAilConfigurationHelper();
void setNonLimitedNumberOfCcs(uint32_t numberOfCss) { this->nonLimitedNumberOfCcs = numberOfCss; };
uint32_t getNonLimitedNumberOfCcs() const { return this->nonLimitedNumberOfCcs; };
ReleaseHelper *getReleaseHelper() const;
AILConfiguration *getAILConfigurationHelper() const;
template <typename HelperType>
@@ -126,7 +124,6 @@ struct RootDeviceEnvironment : NonCopyableClass {
protected:
using GraphicsAllocationUniquePtrType = std::unique_ptr<GraphicsAllocation, std::function<void(GraphicsAllocation *)>>;
GraphicsAllocationUniquePtrType dummyAllocation = nullptr;
uint32_t nonLimitedNumberOfCcs = 0u;
bool limitedNumberOfCcs = false;
bool isWddmOnLinuxEnable = false;

View File

@@ -230,8 +230,8 @@ void EncodeSurfaceState<Family>::disableCompressionFlags(R_SURFACE_STATE *surfac
}
template <>
template <typename WalkerType, typename InterfaceDescriptorType>
void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, WalkerType &walkerCmd, const InterfaceDescriptorType *idd, const EncodeWalkerArgs &walkerArgs) {
template <typename WalkerType>
void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs) {
auto programGlobalFenceAsPostSyncOperationInComputeWalker = walkerArgs.requiredSystemFence;
int32_t overrideProgramSystemMemoryFence = debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.get();
if (overrideProgramSystemMemoryFence != -1) {
@@ -245,11 +245,7 @@ void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const RootDevice
walkerCmd.setL3PrefetchDisable(!forceL3PrefetchForComputeWalker);
}
bool computeDispatchAllWalkerEnable = walkerArgs.kernelExecutionType == KernelExecutionType::concurrent || (rootDeviceEnvironment.getNonLimitedNumberOfCcs() > 1 &&
idd &&
idd->getThreadGroupDispatchSize() == InterfaceDescriptorType::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1 &&
walkerCmd.getThreadGroupIdXDimension() * walkerCmd.getThreadGroupIdYDimension() * walkerCmd.getThreadGroupIdZDimension() * idd->getNumberOfThreadsInGpgpuThreadGroup() < walkerArgs.maxFrontEndThreads);
bool computeDispatchAllWalkerEnable = walkerArgs.kernelExecutionType == KernelExecutionType::concurrent;
int32_t overrideComputeDispatchAllWalkerEnable = debugManager.flags.ComputeDispatchAllWalkerEnableInComputeWalker.get();
if (overrideComputeDispatchAllWalkerEnable != -1) {
computeDispatchAllWalkerEnable = !!overrideComputeDispatchAllWalkerEnable;

View File

@@ -161,8 +161,8 @@ void EncodeDispatchKernel<Family>::programBarrierEnable(INTERFACE_DESCRIPTOR_DAT
}
template <>
template <typename WalkerType, typename InterfaceDescriptorType>
void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, WalkerType &walkerCmd, const InterfaceDescriptorType *idd, const EncodeWalkerArgs &walkerArgs) {
template <typename WalkerType>
void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs) {
const auto &productHelper = rootDeviceEnvironment.getHelper<ProductHelper>();
auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo();
auto programGlobalFenceAsPostSyncOperationInComputeWalker = productHelper.isGlobalFenceInCommandStreamRequired(hwInfo) &&

View File

@@ -116,8 +116,8 @@ void EncodeDispatchKernel<Family>::programBarrierEnable(INTERFACE_DESCRIPTOR_DAT
}
template <>
template <typename WalkerType, typename InterfaceDescriptorType>
void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, WalkerType &walkerCmd, const InterfaceDescriptorType *idd, const EncodeWalkerArgs &walkerArgs) {
template <typename WalkerType>
void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs) {
auto *releaseHelper = rootDeviceEnvironment.getReleaseHelper();
bool l3PrefetchDisable = releaseHelper->isPrefetchDisablingRequired();
int32_t overrideL3PrefetchDisable = debugManager.flags.ForceL3PrefetchForComputeWalker.get();