mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
fix: Apply dispatch all for small TG only on BMG
Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
4af92c20e4
commit
c0838e1f76
@ -38,8 +38,6 @@ XE2_HPG_CORETEST_F(WalkerDispatchTestsXe2HpGCore, whenEncodeAdditionalWalkerFiel
|
||||
EXPECT_TRUE(walkerCmd.getComputeDispatchAllWalkerEnable());
|
||||
}
|
||||
|
||||
auto backupCcsNumber = executionEnvironment.rootDeviceEnvironments[0]->getNonLimitedNumberOfCcs();
|
||||
executionEnvironment.rootDeviceEnvironments[0]->setNonLimitedNumberOfCcs(1);
|
||||
VariableBackup<uint32_t> sliceCountBackup(&executionEnvironment.rootDeviceEnvironments[0]->getMutableHardwareInfo()->gtSystemInfo.SliceCount, 4);
|
||||
|
||||
{
|
||||
@ -51,7 +49,11 @@ XE2_HPG_CORETEST_F(WalkerDispatchTestsXe2HpGCore, whenEncodeAdditionalWalkerFiel
|
||||
{
|
||||
walkerArgs.kernelExecutionType = KernelExecutionType::defaultType;
|
||||
EncodeDispatchKernel<FamilyType>::encodeComputeDispatchAllWalker(walkerCmd, &walkerCmd.getInterfaceDescriptor(), *executionEnvironment.rootDeviceEnvironments[0], walkerArgs);
|
||||
EXPECT_TRUE(walkerCmd.getComputeDispatchAllWalkerEnable());
|
||||
if (executionEnvironment.rootDeviceEnvironments[0]->getProductHelper().adjustDispatchAllRequired(*executionEnvironment.rootDeviceEnvironments[0]->getHardwareInfo())) {
|
||||
EXPECT_TRUE(walkerCmd.getComputeDispatchAllWalkerEnable());
|
||||
} else {
|
||||
EXPECT_FALSE(walkerCmd.getComputeDispatchAllWalkerEnable());
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
@ -78,14 +80,6 @@ XE2_HPG_CORETEST_F(WalkerDispatchTestsXe2HpGCore, whenEncodeAdditionalWalkerFiel
|
||||
}
|
||||
|
||||
{
|
||||
walkerCmd.getInterfaceDescriptor().setThreadGroupDispatchSize(FamilyType::INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1);
|
||||
executionEnvironment.rootDeviceEnvironments[0]->setNonLimitedNumberOfCcs(2);
|
||||
EncodeDispatchKernel<FamilyType>::encodeComputeDispatchAllWalker(walkerCmd, &walkerCmd.getInterfaceDescriptor(), *executionEnvironment.rootDeviceEnvironments[0], walkerArgs);
|
||||
EXPECT_FALSE(walkerCmd.getComputeDispatchAllWalkerEnable());
|
||||
}
|
||||
|
||||
{
|
||||
executionEnvironment.rootDeviceEnvironments[0]->setNonLimitedNumberOfCcs(backupCcsNumber);
|
||||
debugManager.flags.ComputeDispatchAllWalkerEnableInComputeWalker.set(1);
|
||||
EncodeDispatchKernel<FamilyType>::encodeComputeDispatchAllWalker(walkerCmd, &walkerCmd.getInterfaceDescriptor(), *executionEnvironment.rootDeviceEnvironments[0], walkerArgs);
|
||||
EXPECT_TRUE(walkerCmd.getComputeDispatchAllWalkerEnable());
|
||||
|
@ -39,20 +39,18 @@ XE3_CORETEST_F(WalkerDispatchTestsXe3Core, whenEncodeAdditionalWalkerFieldsIsCal
|
||||
EXPECT_TRUE(walkerCmd.getComputeDispatchAllWalkerEnable());
|
||||
}
|
||||
|
||||
auto backupCcsNumber = executionEnvironment.rootDeviceEnvironments[0]->getNonLimitedNumberOfCcs();
|
||||
executionEnvironment.rootDeviceEnvironments[0]->setNonLimitedNumberOfCcs(1);
|
||||
VariableBackup<uint32_t> sliceCountBackup(&executionEnvironment.rootDeviceEnvironments[0]->getMutableHardwareInfo()->gtSystemInfo.SliceCount, 4);
|
||||
|
||||
{
|
||||
walkerCmd.getInterfaceDescriptor().setThreadGroupDispatchSize(FamilyType::INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1);
|
||||
EncodeDispatchKernel<FamilyType>::encodeComputeDispatchAllWalker(walkerCmd, &walkerCmd.getInterfaceDescriptor(), *executionEnvironment.rootDeviceEnvironments[0], walkerArgs);
|
||||
EXPECT_TRUE(walkerCmd.getComputeDispatchAllWalkerEnable());
|
||||
}
|
||||
|
||||
{
|
||||
walkerArgs.kernelExecutionType = KernelExecutionType::defaultType;
|
||||
EncodeDispatchKernel<FamilyType>::encodeComputeDispatchAllWalker(walkerCmd, &walkerCmd.getInterfaceDescriptor(), *executionEnvironment.rootDeviceEnvironments[0], walkerArgs);
|
||||
EXPECT_TRUE(walkerCmd.getComputeDispatchAllWalkerEnable());
|
||||
EXPECT_FALSE(walkerCmd.getComputeDispatchAllWalkerEnable());
|
||||
}
|
||||
|
||||
{
|
||||
walkerCmd.getInterfaceDescriptor().setThreadGroupDispatchSize(FamilyType::INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1);
|
||||
EncodeDispatchKernel<FamilyType>::encodeComputeDispatchAllWalker(walkerCmd, &walkerCmd.getInterfaceDescriptor(), *executionEnvironment.rootDeviceEnvironments[0], walkerArgs);
|
||||
EXPECT_FALSE(walkerCmd.getComputeDispatchAllWalkerEnable());
|
||||
}
|
||||
|
||||
{
|
||||
@ -79,14 +77,6 @@ XE3_CORETEST_F(WalkerDispatchTestsXe3Core, whenEncodeAdditionalWalkerFieldsIsCal
|
||||
}
|
||||
|
||||
{
|
||||
walkerCmd.getInterfaceDescriptor().setThreadGroupDispatchSize(FamilyType::INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1);
|
||||
executionEnvironment.rootDeviceEnvironments[0]->setNonLimitedNumberOfCcs(2);
|
||||
EncodeDispatchKernel<FamilyType>::encodeComputeDispatchAllWalker(walkerCmd, &walkerCmd.getInterfaceDescriptor(), *executionEnvironment.rootDeviceEnvironments[0], walkerArgs);
|
||||
EXPECT_FALSE(walkerCmd.getComputeDispatchAllWalkerEnable());
|
||||
}
|
||||
|
||||
{
|
||||
executionEnvironment.rootDeviceEnvironments[0]->setNonLimitedNumberOfCcs(backupCcsNumber);
|
||||
debugManager.flags.ComputeDispatchAllWalkerEnableInComputeWalker.set(1);
|
||||
EncodeDispatchKernel<FamilyType>::encodeComputeDispatchAllWalker(walkerCmd, &walkerCmd.getInterfaceDescriptor(), *executionEnvironment.rootDeviceEnvironments[0], walkerArgs);
|
||||
EXPECT_TRUE(walkerCmd.getComputeDispatchAllWalkerEnable());
|
||||
|
@ -32,8 +32,7 @@ uint32_t EncodeDispatchKernel<Family>::alignPreferredSlmSize(uint32_t slmSize) {
|
||||
template <typename Family>
|
||||
template <typename WalkerType, typename InterfaceDescriptorType>
|
||||
void EncodeDispatchKernel<Family>::encodeComputeDispatchAllWalker(WalkerType &walkerCmd, const InterfaceDescriptorType *idd, const RootDeviceEnvironment &rootDeviceEnvironment, const EncodeWalkerArgs &walkerArgs) {
|
||||
bool computeDispatchAllWalkerEnable = walkerArgs.kernelExecutionType == KernelExecutionType::concurrent || (rootDeviceEnvironment.getNonLimitedNumberOfCcs() == 1u &&
|
||||
rootDeviceEnvironment.getHardwareInfo()->gtSystemInfo.SliceCount > 2u &&
|
||||
bool computeDispatchAllWalkerEnable = walkerArgs.kernelExecutionType == KernelExecutionType::concurrent || (rootDeviceEnvironment.getProductHelper().adjustDispatchAllRequired(*rootDeviceEnvironment.getHardwareInfo()) &&
|
||||
idd &&
|
||||
idd->getThreadGroupDispatchSize() == InterfaceDescriptorType::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1 &&
|
||||
walkerCmd.getThreadGroupIdXDimension() * walkerCmd.getThreadGroupIdYDimension() * walkerCmd.getThreadGroupIdZDimension() * idd->getNumberOfThreadsInGpgpuThreadGroup() < walkerArgs.maxFrontEndThreads);
|
||||
|
@ -347,7 +347,6 @@ void ExecutionEnvironment::setDeviceHierarchyMode(const GfxCoreHelper &gfxCoreHe
|
||||
void ExecutionEnvironment::adjustCcsCountImpl(RootDeviceEnvironment *rootDeviceEnvironment) const {
|
||||
auto hwInfo = rootDeviceEnvironment->getMutableHardwareInfo();
|
||||
auto &productHelper = rootDeviceEnvironment->getHelper<ProductHelper>();
|
||||
rootDeviceEnvironment->setNonLimitedNumberOfCcs(hwInfo->gtSystemInfo.CCSInfo.NumberOfCCSEnabled);
|
||||
productHelper.adjustNumberOfCcs(*hwInfo);
|
||||
}
|
||||
|
||||
|
@ -95,8 +95,6 @@ struct RootDeviceEnvironment : NonCopyableClass {
|
||||
const ProductHelper &getProductHelper() const;
|
||||
GraphicsAllocation *getDummyAllocation() const;
|
||||
void releaseDummyAllocation();
|
||||
void setNonLimitedNumberOfCcs(uint32_t numberOfCss) { this->nonLimitedNumberOfCcs = numberOfCss; };
|
||||
uint32_t getNonLimitedNumberOfCcs() const { return this->nonLimitedNumberOfCcs; };
|
||||
|
||||
std::unique_ptr<SipKernel> sipKernels[static_cast<uint32_t>(SipKernelType::count)];
|
||||
std::unique_ptr<GmmHelper> gmmHelper;
|
||||
@ -126,7 +124,6 @@ struct RootDeviceEnvironment : NonCopyableClass {
|
||||
protected:
|
||||
using GraphicsAllocationUniquePtrType = std::unique_ptr<GraphicsAllocation, std::function<void(GraphicsAllocation *)>>;
|
||||
GraphicsAllocationUniquePtrType dummyAllocation = nullptr;
|
||||
uint32_t nonLimitedNumberOfCcs = 0u;
|
||||
|
||||
bool limitedNumberOfCcs = false;
|
||||
bool isWddmOnLinuxEnable = false;
|
||||
|
@ -148,6 +148,7 @@ class ProductHelper {
|
||||
virtual void adjustScratchSize(size_t &requiredScratchSize) const = 0;
|
||||
virtual size_t getSvmCpuAlignment() const = 0;
|
||||
virtual bool isComputeDispatchAllWalkerEnableInCfeStateRequired(const HardwareInfo &hwInfo) const = 0;
|
||||
virtual bool adjustDispatchAllRequired(const HardwareInfo &hwInfo) const = 0;
|
||||
virtual bool isVmBindPatIndexProgrammingSupported() const = 0;
|
||||
virtual bool isIpSamplingSupported(const HardwareInfo &hwInfo) const = 0;
|
||||
virtual bool isGrfNumReportedWithScm() const = 0;
|
||||
|
@ -542,6 +542,11 @@ size_t ProductHelperHw<gfxProduct>::getSvmCpuAlignment() const {
|
||||
return MemoryConstants::pageSize2M;
|
||||
}
|
||||
|
||||
template <PRODUCT_FAMILY gfxProduct>
|
||||
bool ProductHelperHw<gfxProduct>::adjustDispatchAllRequired(const HardwareInfo &hwInfo) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
template <PRODUCT_FAMILY gfxProduct>
|
||||
bool ProductHelperHw<gfxProduct>::isComputeDispatchAllWalkerEnableInCfeStateRequired(const HardwareInfo &hwInfo) const {
|
||||
return getFrontEndPropertyComputeDispatchAllWalkerSupport();
|
||||
|
@ -91,6 +91,7 @@ class ProductHelperHw : public ProductHelper {
|
||||
void adjustScratchSize(size_t &requiredScratchSize) const override;
|
||||
size_t getSvmCpuAlignment() const override;
|
||||
bool isComputeDispatchAllWalkerEnableInCfeStateRequired(const HardwareInfo &hwInfo) const override;
|
||||
bool adjustDispatchAllRequired(const HardwareInfo &hwInfo) const override;
|
||||
bool isVmBindPatIndexProgrammingSupported() const override;
|
||||
bool isIpSamplingSupported(const HardwareInfo &hwInfo) const override;
|
||||
bool isGrfNumReportedWithScm() const override;
|
||||
|
@ -45,6 +45,11 @@ bool ProductHelperHw<gfxProduct>::isDirectSubmissionSupported(ReleaseHelper *rel
|
||||
return true;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool ProductHelperHw<gfxProduct>::adjustDispatchAllRequired(const HardwareInfo &hwInfo) const {
|
||||
return hwInfo.gtSystemInfo.SliceCount > 2u;
|
||||
}
|
||||
|
||||
template <>
|
||||
void ProductHelperHw<gfxProduct>::adjustScratchSize(size_t &requiredScratchSize) const {
|
||||
requiredScratchSize *= 2;
|
||||
|
Reference in New Issue
Block a user