mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
performance: Set dispatch all for small TG
Resolves: NEO-11814 Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
143bcc74c8
commit
af0bb7964a
@ -243,8 +243,8 @@ struct EncodeDispatchKernel {
|
||||
static void overrideDefaultValues(WalkerType &walkerCmd, InterfaceDescriptorType &interfaceDescriptor);
|
||||
template <typename WalkerType>
|
||||
static void encodeWalkerPostSyncFields(WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs);
|
||||
template <typename WalkerType>
|
||||
static void encodeComputeDispatchAllWalker(WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs);
|
||||
template <typename WalkerType, typename InterfaceDescriptorType>
|
||||
static void encodeComputeDispatchAllWalker(WalkerType &walkerCmd, const InterfaceDescriptorType *idd, const RootDeviceEnvironment &rootDeviceEnvironment, const EncodeWalkerArgs &walkerArgs);
|
||||
};
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
@ -287,7 +287,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
||||
args.requiresSystemMemoryFence()}; // requiredSystemFence
|
||||
EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(rootDeviceEnvironment, cmd, walkerArgs);
|
||||
EncodeDispatchKernel<Family>::encodeWalkerPostSyncFields(cmd, walkerArgs);
|
||||
EncodeDispatchKernel<Family>::encodeComputeDispatchAllWalker(cmd, walkerArgs);
|
||||
EncodeDispatchKernel<Family>::template encodeComputeDispatchAllWalker<WalkerType, INTERFACE_DESCRIPTOR_DATA>(cmd, nullptr, rootDeviceEnvironment, walkerArgs);
|
||||
|
||||
memcpy_s(iddPtr, sizeof(idd), &idd, sizeof(idd));
|
||||
|
||||
@ -419,8 +419,8 @@ template <typename WalkerType>
|
||||
inline void EncodeDispatchKernel<Family>::encodeWalkerPostSyncFields(WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs) {}
|
||||
|
||||
template <typename Family>
|
||||
template <typename WalkerType>
|
||||
inline void EncodeDispatchKernel<Family>::encodeComputeDispatchAllWalker(WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs) {}
|
||||
template <typename WalkerType, typename InterfaceDescriptorType>
|
||||
inline void EncodeDispatchKernel<Family>::encodeComputeDispatchAllWalker(WalkerType &walkerCmd, const InterfaceDescriptorType *idd, const RootDeviceEnvironment &rootDeviceEnvironment, const EncodeWalkerArgs &walkerArgs) {}
|
||||
|
||||
template <typename Family>
|
||||
template <typename InterfaceDescriptorType>
|
||||
|
@ -33,7 +33,7 @@ template void NEO::EncodeDispatchKernel<Family>::setWalkerRegionSettings<Family:
|
||||
uint32_t workgroupSize, uint32_t threadGroupCount, uint32_t maxWgCountPerTile, bool requiredDispatchWalkOrder);
|
||||
template void NEO::EncodeDispatchKernel<Family>::overrideDefaultValues<Family::DefaultWalkerType, Family::DefaultWalkerType::InterfaceDescriptorType>(Family::DefaultWalkerType &walkerCmd, Family::DefaultWalkerType::InterfaceDescriptorType &interfaceDescriptor);
|
||||
template void NEO::EncodeDispatchKernel<Family>::encodeWalkerPostSyncFields<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs);
|
||||
template void NEO::EncodeDispatchKernel<Family>::encodeComputeDispatchAllWalker<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs);
|
||||
template void NEO::EncodeDispatchKernel<Family>::encodeComputeDispatchAllWalker<Family::DefaultWalkerType, Family::DefaultWalkerType::InterfaceDescriptorType>(Family::DefaultWalkerType &walkerCmd, const Family::DefaultWalkerType::InterfaceDescriptorType *idd, const RootDeviceEnvironment &rootDeviceEnvironment, const EncodeWalkerArgs &walkerArgs);
|
||||
|
||||
template struct NEO::EncodeStates<Family>;
|
||||
template struct NEO::EncodeMath<Family>;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2024 Intel Corporation
|
||||
* Copyright (C) 2024-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -30,9 +30,13 @@ uint32_t EncodeDispatchKernel<Family>::alignPreferredSlmSize(uint32_t slmSize) {
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
template <typename WalkerType>
|
||||
void EncodeDispatchKernel<Family>::encodeComputeDispatchAllWalker(WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs) {
|
||||
bool computeDispatchAllWalkerEnable = walkerArgs.kernelExecutionType == KernelExecutionType::concurrent;
|
||||
template <typename WalkerType, typename InterfaceDescriptorType>
|
||||
void EncodeDispatchKernel<Family>::encodeComputeDispatchAllWalker(WalkerType &walkerCmd, const InterfaceDescriptorType *idd, const RootDeviceEnvironment &rootDeviceEnvironment, const EncodeWalkerArgs &walkerArgs) {
|
||||
bool computeDispatchAllWalkerEnable = walkerArgs.kernelExecutionType == KernelExecutionType::concurrent || (rootDeviceEnvironment.getNonLimitedNumberOfCcs() == 1u &&
|
||||
rootDeviceEnvironment.getHardwareInfo()->gtSystemInfo.SliceCount > 2u &&
|
||||
idd &&
|
||||
idd->getThreadGroupDispatchSize() == InterfaceDescriptorType::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1 &&
|
||||
walkerCmd.getThreadGroupIdXDimension() * walkerCmd.getThreadGroupIdYDimension() * walkerCmd.getThreadGroupIdZDimension() * idd->getNumberOfThreadsInGpgpuThreadGroup() < walkerArgs.maxFrontEndThreads);
|
||||
int32_t overrideComputeDispatchAllWalkerEnable = debugManager.flags.ComputeDispatchAllWalkerEnableInComputeWalker.get();
|
||||
if (overrideComputeDispatchAllWalkerEnable != -1) {
|
||||
computeDispatchAllWalkerEnable = !!overrideComputeDispatchAllWalkerEnable;
|
||||
|
@ -411,7 +411,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
||||
args.requiresSystemMemoryFence()}; // requiresMemoryFence
|
||||
EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, walkerArgs);
|
||||
EncodeDispatchKernel<Family>::encodeWalkerPostSyncFields(walkerCmd, walkerArgs);
|
||||
EncodeDispatchKernel<Family>::encodeComputeDispatchAllWalker(walkerCmd, walkerArgs);
|
||||
EncodeDispatchKernel<Family>::encodeComputeDispatchAllWalker(walkerCmd, &idd, rootDeviceEnvironment, walkerArgs);
|
||||
|
||||
EncodeDispatchKernel<Family>::overrideDefaultValues(walkerCmd, idd);
|
||||
|
||||
|
@ -329,6 +329,7 @@ void ExecutionEnvironment::setDeviceHierarchyMode(const GfxCoreHelper &gfxCoreHe
|
||||
void ExecutionEnvironment::adjustCcsCountImpl(RootDeviceEnvironment *rootDeviceEnvironment) const {
|
||||
auto hwInfo = rootDeviceEnvironment->getMutableHardwareInfo();
|
||||
auto &productHelper = rootDeviceEnvironment->getHelper<ProductHelper>();
|
||||
rootDeviceEnvironment->setNonLimitedNumberOfCcs(hwInfo->gtSystemInfo.CCSInfo.NumberOfCCSEnabled);
|
||||
productHelper.adjustNumberOfCcs(*hwInfo);
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2024 Intel Corporation
|
||||
* Copyright (C) 2019-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -95,6 +95,8 @@ struct RootDeviceEnvironment : NonCopyableClass {
|
||||
const ProductHelper &getProductHelper() const;
|
||||
GraphicsAllocation *getDummyAllocation() const;
|
||||
void releaseDummyAllocation();
|
||||
void setNonLimitedNumberOfCcs(uint32_t numberOfCss) { this->nonLimitedNumberOfCcs = numberOfCss; };
|
||||
uint32_t getNonLimitedNumberOfCcs() const { return this->nonLimitedNumberOfCcs; };
|
||||
|
||||
std::unique_ptr<SipKernel> sipKernels[static_cast<uint32_t>(SipKernelType::count)];
|
||||
std::unique_ptr<GmmHelper> gmmHelper;
|
||||
@ -124,6 +126,7 @@ struct RootDeviceEnvironment : NonCopyableClass {
|
||||
protected:
|
||||
using GraphicsAllocationUniquePtrType = std::unique_ptr<GraphicsAllocation, std::function<void(GraphicsAllocation *)>>;
|
||||
GraphicsAllocationUniquePtrType dummyAllocation = nullptr;
|
||||
uint32_t nonLimitedNumberOfCcs = 0u;
|
||||
|
||||
bool limitedNumberOfCcs = false;
|
||||
bool isWddmOnLinuxEnable = false;
|
||||
|
@ -148,8 +148,8 @@ void EncodeDispatchKernel<Family>::adjustBindingTablePrefetch(INTERFACE_DESCRIPT
|
||||
}
|
||||
|
||||
template <>
|
||||
template <typename WalkerType>
|
||||
void EncodeDispatchKernel<Family>::encodeComputeDispatchAllWalker(WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs) {
|
||||
template <typename WalkerType, typename InterfaceDescriptorType>
|
||||
void EncodeDispatchKernel<Family>::encodeComputeDispatchAllWalker(WalkerType &walkerCmd, const InterfaceDescriptorType *idd, const RootDeviceEnvironment &rootDeviceEnvironment, const EncodeWalkerArgs &walkerArgs) {
|
||||
int32_t overrideDispatchAllWalkerEnableInComputeWalker = debugManager.flags.ComputeDispatchAllWalkerEnableInComputeWalker.get();
|
||||
if (overrideDispatchAllWalkerEnableInComputeWalker != -1) {
|
||||
walkerCmd.setComputeDispatchAllWalkerEnable(overrideDispatchAllWalkerEnableInComputeWalker);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
* Copyright (C) 2021-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -62,8 +62,8 @@ template <typename WalkerType>
|
||||
void EncodeDispatchKernel<Family>::encodeWalkerPostSyncFields(WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs) {}
|
||||
|
||||
template <>
|
||||
template <typename WalkerType>
|
||||
void EncodeDispatchKernel<Family>::encodeComputeDispatchAllWalker(WalkerType &walkerCmd, const EncodeWalkerArgs &walkerArgs) {}
|
||||
template <typename WalkerType, typename InterfaceDescriptorType>
|
||||
void EncodeDispatchKernel<Family>::encodeComputeDispatchAllWalker(WalkerType &walkerCmd, const InterfaceDescriptorType *idd, const RootDeviceEnvironment &rootDeviceEnvironment, const EncodeWalkerArgs &walkerArgs) {}
|
||||
|
||||
template <>
|
||||
void EncodeComputeMode<Family>::programComputeModeCommand(LinearStream &csr, StateComputeModeProperties &properties, const RootDeviceEnvironment &rootDeviceEnvironment) {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
* Copyright (C) 2021-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -21,6 +21,7 @@ using WalkerDispatchTestsXeHpcCore = ::testing::Test;
|
||||
|
||||
XE_HPC_CORETEST_F(WalkerDispatchTestsXeHpcCore, givenXeHpcWhenEncodeAdditionalWalkerFieldsIsCalledThenComputeDispatchAllIsCorrectlySet) {
|
||||
DebugManagerStateRestore debugRestorer;
|
||||
MockExecutionEnvironment executionEnvironment;
|
||||
auto walkerCmd = FamilyType::cmdInitGpgpuWalker;
|
||||
|
||||
KernelDescriptor kernelDescriptor;
|
||||
@ -28,13 +29,13 @@ XE_HPC_CORETEST_F(WalkerDispatchTestsXeHpcCore, givenXeHpcWhenEncodeAdditionalWa
|
||||
walkerArgs.requiredSystemFence = true;
|
||||
|
||||
{
|
||||
EncodeDispatchKernel<FamilyType>::encodeComputeDispatchAllWalker(walkerCmd, walkerArgs);
|
||||
EncodeDispatchKernel<FamilyType>::encodeComputeDispatchAllWalker(walkerCmd, &walkerCmd.getInterfaceDescriptor(), *executionEnvironment.rootDeviceEnvironments[0], walkerArgs);
|
||||
EXPECT_FALSE(walkerCmd.getComputeDispatchAllWalkerEnable());
|
||||
}
|
||||
|
||||
{
|
||||
debugManager.flags.ComputeDispatchAllWalkerEnableInComputeWalker.set(1);
|
||||
EncodeDispatchKernel<FamilyType>::encodeComputeDispatchAllWalker(walkerCmd, walkerArgs);
|
||||
EncodeDispatchKernel<FamilyType>::encodeComputeDispatchAllWalker(walkerCmd, &walkerCmd.getInterfaceDescriptor(), *executionEnvironment.rootDeviceEnvironments[0], walkerArgs);
|
||||
EXPECT_TRUE(walkerCmd.getComputeDispatchAllWalkerEnable());
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user