mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 06:49:52 +08:00
Revert "performance: Set dispatch all for small TG"
This reverts commit 0dc2870513.
Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
ed08e751fb
commit
680e62d333
@@ -125,8 +125,8 @@ struct EncodeDispatchKernel {
|
||||
template <typename WalkerType>
|
||||
static void encode(CommandContainer &container, EncodeDispatchKernelArgs &args);
|
||||
|
||||
template <typename WalkerType, typename InterfaceDescriptorType>
|
||||
static void encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, WalkerType &walkerCmd, const InterfaceDescriptorType *idd, const EncodeWalkerArgs &walkerArgs);
|
||||
template <typename WalkerType>
|
||||
static void encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs);
|
||||
|
||||
template <typename InterfaceDescriptorType>
|
||||
static void appendAdditionalIDDFields(InterfaceDescriptorType *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment,
|
||||
|
||||
@@ -406,8 +406,8 @@ void EncodeDispatchKernel<Family>::programBarrierEnable(InterfaceDescriptorType
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
template <typename WalkerType, typename InterfaceDescriptorType>
|
||||
inline void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, WalkerType &walkerCmd, const InterfaceDescriptorType *idd, const EncodeWalkerArgs &walkerArgs) {}
|
||||
template <typename WalkerType>
|
||||
inline void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs) {}
|
||||
|
||||
template <typename Family>
|
||||
template <typename InterfaceDescriptorType>
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
#include "shared/source/command_container/command_encoder.h"
|
||||
|
||||
template struct NEO::EncodeDispatchKernel<Family>;
|
||||
template void NEO::EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields<Family::DefaultWalkerType, Family::INTERFACE_DESCRIPTOR_DATA>(const RootDeviceEnvironment &rootDeviceEnvironment, Family::DefaultWalkerType &walkerCmd, const Family::INTERFACE_DESCRIPTOR_DATA *idd, const EncodeWalkerArgs &walkerArgs);
|
||||
template void NEO::EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields<Family::DefaultWalkerType>(const RootDeviceEnvironment &rootDeviceEnvironment, Family::DefaultWalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs);
|
||||
template void NEO::EncodeDispatchKernel<Family>::adjustTimestampPacket<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd, const EncodeDispatchKernelArgs &args);
|
||||
template void NEO::EncodeDispatchKernel<Family>::setupPostSyncForRegularEvent<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd, const EncodeDispatchKernelArgs &args);
|
||||
template void NEO::EncodeDispatchKernel<Family>::setupPostSyncForInOrderExec<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd, const EncodeDispatchKernelArgs &args);
|
||||
|
||||
@@ -408,7 +408,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
||||
args.requiredDispatchWalkOrder,
|
||||
args.additionalSizeParam,
|
||||
args.device->getDeviceInfo().maxFrontEndThreads};
|
||||
EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, &idd, walkerArgs);
|
||||
EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, walkerArgs);
|
||||
|
||||
uint32_t workgroupSize = args.dispatchInterface->getGroupSize()[0] * args.dispatchInterface->getGroupSize()[1] * args.dispatchInterface->getGroupSize()[2];
|
||||
bool isRequiredWorkGroupOrder = args.requiredDispatchWalkOrder != NEO::RequiredDispatchWalkOrder::none;
|
||||
|
||||
@@ -359,7 +359,6 @@ void ExecutionEnvironment::setDeviceHierarchy(const GfxCoreHelper &gfxCoreHelper
|
||||
void ExecutionEnvironment::adjustCcsCountImpl(RootDeviceEnvironment *rootDeviceEnvironment) const {
|
||||
auto hwInfo = rootDeviceEnvironment->getMutableHardwareInfo();
|
||||
auto &productHelper = rootDeviceEnvironment->getHelper<ProductHelper>();
|
||||
rootDeviceEnvironment->setNonLimitedNumberOfCcs(hwInfo->gtSystemInfo.CCSInfo.NumberOfCCSEnabled);
|
||||
productHelper.adjustNumberOfCcs(*hwInfo);
|
||||
}
|
||||
|
||||
|
||||
@@ -88,8 +88,6 @@ struct RootDeviceEnvironment : NonCopyableClass {
|
||||
void initCompilerProductHelper();
|
||||
void initReleaseHelper();
|
||||
void initAilConfigurationHelper();
|
||||
void setNonLimitedNumberOfCcs(uint32_t numberOfCss) { this->nonLimitedNumberOfCcs = numberOfCss; };
|
||||
uint32_t getNonLimitedNumberOfCcs() const { return this->nonLimitedNumberOfCcs; };
|
||||
ReleaseHelper *getReleaseHelper() const;
|
||||
AILConfiguration *getAILConfigurationHelper() const;
|
||||
template <typename HelperType>
|
||||
@@ -126,7 +124,6 @@ struct RootDeviceEnvironment : NonCopyableClass {
|
||||
protected:
|
||||
using GraphicsAllocationUniquePtrType = std::unique_ptr<GraphicsAllocation, std::function<void(GraphicsAllocation *)>>;
|
||||
GraphicsAllocationUniquePtrType dummyAllocation = nullptr;
|
||||
uint32_t nonLimitedNumberOfCcs = 0u;
|
||||
|
||||
bool limitedNumberOfCcs = false;
|
||||
bool isWddmOnLinuxEnable = false;
|
||||
|
||||
@@ -230,8 +230,8 @@ void EncodeSurfaceState<Family>::disableCompressionFlags(R_SURFACE_STATE *surfac
|
||||
}
|
||||
|
||||
template <>
|
||||
template <typename WalkerType, typename InterfaceDescriptorType>
|
||||
void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, WalkerType &walkerCmd, const InterfaceDescriptorType *idd, const EncodeWalkerArgs &walkerArgs) {
|
||||
template <typename WalkerType>
|
||||
void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs) {
|
||||
auto programGlobalFenceAsPostSyncOperationInComputeWalker = walkerArgs.requiredSystemFence;
|
||||
int32_t overrideProgramSystemMemoryFence = debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.get();
|
||||
if (overrideProgramSystemMemoryFence != -1) {
|
||||
@@ -245,11 +245,7 @@ void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const RootDevice
|
||||
walkerCmd.setL3PrefetchDisable(!forceL3PrefetchForComputeWalker);
|
||||
}
|
||||
|
||||
bool computeDispatchAllWalkerEnable = walkerArgs.kernelExecutionType == KernelExecutionType::concurrent || (rootDeviceEnvironment.getNonLimitedNumberOfCcs() > 1 &&
|
||||
idd &&
|
||||
idd->getThreadGroupDispatchSize() == InterfaceDescriptorType::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1 &&
|
||||
walkerCmd.getThreadGroupIdXDimension() * walkerCmd.getThreadGroupIdYDimension() * walkerCmd.getThreadGroupIdZDimension() * idd->getNumberOfThreadsInGpgpuThreadGroup() < walkerArgs.maxFrontEndThreads);
|
||||
|
||||
bool computeDispatchAllWalkerEnable = walkerArgs.kernelExecutionType == KernelExecutionType::concurrent;
|
||||
int32_t overrideComputeDispatchAllWalkerEnable = debugManager.flags.ComputeDispatchAllWalkerEnableInComputeWalker.get();
|
||||
if (overrideComputeDispatchAllWalkerEnable != -1) {
|
||||
computeDispatchAllWalkerEnable = !!overrideComputeDispatchAllWalkerEnable;
|
||||
|
||||
@@ -161,8 +161,8 @@ void EncodeDispatchKernel<Family>::programBarrierEnable(INTERFACE_DESCRIPTOR_DAT
|
||||
}
|
||||
|
||||
template <>
|
||||
template <typename WalkerType, typename InterfaceDescriptorType>
|
||||
void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, WalkerType &walkerCmd, const InterfaceDescriptorType *idd, const EncodeWalkerArgs &walkerArgs) {
|
||||
template <typename WalkerType>
|
||||
void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs) {
|
||||
const auto &productHelper = rootDeviceEnvironment.getHelper<ProductHelper>();
|
||||
auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo();
|
||||
auto programGlobalFenceAsPostSyncOperationInComputeWalker = productHelper.isGlobalFenceInCommandStreamRequired(hwInfo) &&
|
||||
|
||||
@@ -116,8 +116,8 @@ void EncodeDispatchKernel<Family>::programBarrierEnable(INTERFACE_DESCRIPTOR_DAT
|
||||
}
|
||||
|
||||
template <>
|
||||
template <typename WalkerType, typename InterfaceDescriptorType>
|
||||
void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, WalkerType &walkerCmd, const InterfaceDescriptorType *idd, const EncodeWalkerArgs &walkerArgs) {
|
||||
template <typename WalkerType>
|
||||
void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const RootDeviceEnvironment &rootDeviceEnvironment, WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs) {
|
||||
auto *releaseHelper = rootDeviceEnvironment.getReleaseHelper();
|
||||
bool l3PrefetchDisable = releaseHelper->isPrefetchDisablingRequired();
|
||||
int32_t overrideL3PrefetchDisable = debugManager.flags.ForceL3PrefetchForComputeWalker.get();
|
||||
|
||||
Reference in New Issue
Block a user