mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
fix: Apply dispatch all for small TG only on BMG
Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
4af92c20e4
commit
c0838e1f76
@ -38,8 +38,6 @@ XE2_HPG_CORETEST_F(WalkerDispatchTestsXe2HpGCore, whenEncodeAdditionalWalkerFiel
|
|||||||
EXPECT_TRUE(walkerCmd.getComputeDispatchAllWalkerEnable());
|
EXPECT_TRUE(walkerCmd.getComputeDispatchAllWalkerEnable());
|
||||||
}
|
}
|
||||||
|
|
||||||
auto backupCcsNumber = executionEnvironment.rootDeviceEnvironments[0]->getNonLimitedNumberOfCcs();
|
|
||||||
executionEnvironment.rootDeviceEnvironments[0]->setNonLimitedNumberOfCcs(1);
|
|
||||||
VariableBackup<uint32_t> sliceCountBackup(&executionEnvironment.rootDeviceEnvironments[0]->getMutableHardwareInfo()->gtSystemInfo.SliceCount, 4);
|
VariableBackup<uint32_t> sliceCountBackup(&executionEnvironment.rootDeviceEnvironments[0]->getMutableHardwareInfo()->gtSystemInfo.SliceCount, 4);
|
||||||
|
|
||||||
{
|
{
|
||||||
@ -51,7 +49,11 @@ XE2_HPG_CORETEST_F(WalkerDispatchTestsXe2HpGCore, whenEncodeAdditionalWalkerFiel
|
|||||||
{
|
{
|
||||||
walkerArgs.kernelExecutionType = KernelExecutionType::defaultType;
|
walkerArgs.kernelExecutionType = KernelExecutionType::defaultType;
|
||||||
EncodeDispatchKernel<FamilyType>::encodeComputeDispatchAllWalker(walkerCmd, &walkerCmd.getInterfaceDescriptor(), *executionEnvironment.rootDeviceEnvironments[0], walkerArgs);
|
EncodeDispatchKernel<FamilyType>::encodeComputeDispatchAllWalker(walkerCmd, &walkerCmd.getInterfaceDescriptor(), *executionEnvironment.rootDeviceEnvironments[0], walkerArgs);
|
||||||
EXPECT_TRUE(walkerCmd.getComputeDispatchAllWalkerEnable());
|
if (executionEnvironment.rootDeviceEnvironments[0]->getProductHelper().adjustDispatchAllRequired(*executionEnvironment.rootDeviceEnvironments[0]->getHardwareInfo())) {
|
||||||
|
EXPECT_TRUE(walkerCmd.getComputeDispatchAllWalkerEnable());
|
||||||
|
} else {
|
||||||
|
EXPECT_FALSE(walkerCmd.getComputeDispatchAllWalkerEnable());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
@ -78,14 +80,6 @@ XE2_HPG_CORETEST_F(WalkerDispatchTestsXe2HpGCore, whenEncodeAdditionalWalkerFiel
|
|||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
walkerCmd.getInterfaceDescriptor().setThreadGroupDispatchSize(FamilyType::INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1);
|
|
||||||
executionEnvironment.rootDeviceEnvironments[0]->setNonLimitedNumberOfCcs(2);
|
|
||||||
EncodeDispatchKernel<FamilyType>::encodeComputeDispatchAllWalker(walkerCmd, &walkerCmd.getInterfaceDescriptor(), *executionEnvironment.rootDeviceEnvironments[0], walkerArgs);
|
|
||||||
EXPECT_FALSE(walkerCmd.getComputeDispatchAllWalkerEnable());
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
executionEnvironment.rootDeviceEnvironments[0]->setNonLimitedNumberOfCcs(backupCcsNumber);
|
|
||||||
debugManager.flags.ComputeDispatchAllWalkerEnableInComputeWalker.set(1);
|
debugManager.flags.ComputeDispatchAllWalkerEnableInComputeWalker.set(1);
|
||||||
EncodeDispatchKernel<FamilyType>::encodeComputeDispatchAllWalker(walkerCmd, &walkerCmd.getInterfaceDescriptor(), *executionEnvironment.rootDeviceEnvironments[0], walkerArgs);
|
EncodeDispatchKernel<FamilyType>::encodeComputeDispatchAllWalker(walkerCmd, &walkerCmd.getInterfaceDescriptor(), *executionEnvironment.rootDeviceEnvironments[0], walkerArgs);
|
||||||
EXPECT_TRUE(walkerCmd.getComputeDispatchAllWalkerEnable());
|
EXPECT_TRUE(walkerCmd.getComputeDispatchAllWalkerEnable());
|
||||||
|
@ -39,20 +39,18 @@ XE3_CORETEST_F(WalkerDispatchTestsXe3Core, whenEncodeAdditionalWalkerFieldsIsCal
|
|||||||
EXPECT_TRUE(walkerCmd.getComputeDispatchAllWalkerEnable());
|
EXPECT_TRUE(walkerCmd.getComputeDispatchAllWalkerEnable());
|
||||||
}
|
}
|
||||||
|
|
||||||
auto backupCcsNumber = executionEnvironment.rootDeviceEnvironments[0]->getNonLimitedNumberOfCcs();
|
|
||||||
executionEnvironment.rootDeviceEnvironments[0]->setNonLimitedNumberOfCcs(1);
|
|
||||||
VariableBackup<uint32_t> sliceCountBackup(&executionEnvironment.rootDeviceEnvironments[0]->getMutableHardwareInfo()->gtSystemInfo.SliceCount, 4);
|
VariableBackup<uint32_t> sliceCountBackup(&executionEnvironment.rootDeviceEnvironments[0]->getMutableHardwareInfo()->gtSystemInfo.SliceCount, 4);
|
||||||
|
|
||||||
{
|
|
||||||
walkerCmd.getInterfaceDescriptor().setThreadGroupDispatchSize(FamilyType::INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1);
|
|
||||||
EncodeDispatchKernel<FamilyType>::encodeComputeDispatchAllWalker(walkerCmd, &walkerCmd.getInterfaceDescriptor(), *executionEnvironment.rootDeviceEnvironments[0], walkerArgs);
|
|
||||||
EXPECT_TRUE(walkerCmd.getComputeDispatchAllWalkerEnable());
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
{
|
||||||
walkerArgs.kernelExecutionType = KernelExecutionType::defaultType;
|
walkerArgs.kernelExecutionType = KernelExecutionType::defaultType;
|
||||||
EncodeDispatchKernel<FamilyType>::encodeComputeDispatchAllWalker(walkerCmd, &walkerCmd.getInterfaceDescriptor(), *executionEnvironment.rootDeviceEnvironments[0], walkerArgs);
|
EncodeDispatchKernel<FamilyType>::encodeComputeDispatchAllWalker(walkerCmd, &walkerCmd.getInterfaceDescriptor(), *executionEnvironment.rootDeviceEnvironments[0], walkerArgs);
|
||||||
EXPECT_TRUE(walkerCmd.getComputeDispatchAllWalkerEnable());
|
EXPECT_FALSE(walkerCmd.getComputeDispatchAllWalkerEnable());
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
walkerCmd.getInterfaceDescriptor().setThreadGroupDispatchSize(FamilyType::INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1);
|
||||||
|
EncodeDispatchKernel<FamilyType>::encodeComputeDispatchAllWalker(walkerCmd, &walkerCmd.getInterfaceDescriptor(), *executionEnvironment.rootDeviceEnvironments[0], walkerArgs);
|
||||||
|
EXPECT_FALSE(walkerCmd.getComputeDispatchAllWalkerEnable());
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
@ -79,14 +77,6 @@ XE3_CORETEST_F(WalkerDispatchTestsXe3Core, whenEncodeAdditionalWalkerFieldsIsCal
|
|||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
walkerCmd.getInterfaceDescriptor().setThreadGroupDispatchSize(FamilyType::INTERFACE_DESCRIPTOR_DATA::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1);
|
|
||||||
executionEnvironment.rootDeviceEnvironments[0]->setNonLimitedNumberOfCcs(2);
|
|
||||||
EncodeDispatchKernel<FamilyType>::encodeComputeDispatchAllWalker(walkerCmd, &walkerCmd.getInterfaceDescriptor(), *executionEnvironment.rootDeviceEnvironments[0], walkerArgs);
|
|
||||||
EXPECT_FALSE(walkerCmd.getComputeDispatchAllWalkerEnable());
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
executionEnvironment.rootDeviceEnvironments[0]->setNonLimitedNumberOfCcs(backupCcsNumber);
|
|
||||||
debugManager.flags.ComputeDispatchAllWalkerEnableInComputeWalker.set(1);
|
debugManager.flags.ComputeDispatchAllWalkerEnableInComputeWalker.set(1);
|
||||||
EncodeDispatchKernel<FamilyType>::encodeComputeDispatchAllWalker(walkerCmd, &walkerCmd.getInterfaceDescriptor(), *executionEnvironment.rootDeviceEnvironments[0], walkerArgs);
|
EncodeDispatchKernel<FamilyType>::encodeComputeDispatchAllWalker(walkerCmd, &walkerCmd.getInterfaceDescriptor(), *executionEnvironment.rootDeviceEnvironments[0], walkerArgs);
|
||||||
EXPECT_TRUE(walkerCmd.getComputeDispatchAllWalkerEnable());
|
EXPECT_TRUE(walkerCmd.getComputeDispatchAllWalkerEnable());
|
||||||
|
@ -32,8 +32,7 @@ uint32_t EncodeDispatchKernel<Family>::alignPreferredSlmSize(uint32_t slmSize) {
|
|||||||
template <typename Family>
|
template <typename Family>
|
||||||
template <typename WalkerType, typename InterfaceDescriptorType>
|
template <typename WalkerType, typename InterfaceDescriptorType>
|
||||||
void EncodeDispatchKernel<Family>::encodeComputeDispatchAllWalker(WalkerType &walkerCmd, const InterfaceDescriptorType *idd, const RootDeviceEnvironment &rootDeviceEnvironment, const EncodeWalkerArgs &walkerArgs) {
|
void EncodeDispatchKernel<Family>::encodeComputeDispatchAllWalker(WalkerType &walkerCmd, const InterfaceDescriptorType *idd, const RootDeviceEnvironment &rootDeviceEnvironment, const EncodeWalkerArgs &walkerArgs) {
|
||||||
bool computeDispatchAllWalkerEnable = walkerArgs.kernelExecutionType == KernelExecutionType::concurrent || (rootDeviceEnvironment.getNonLimitedNumberOfCcs() == 1u &&
|
bool computeDispatchAllWalkerEnable = walkerArgs.kernelExecutionType == KernelExecutionType::concurrent || (rootDeviceEnvironment.getProductHelper().adjustDispatchAllRequired(*rootDeviceEnvironment.getHardwareInfo()) &&
|
||||||
rootDeviceEnvironment.getHardwareInfo()->gtSystemInfo.SliceCount > 2u &&
|
|
||||||
idd &&
|
idd &&
|
||||||
idd->getThreadGroupDispatchSize() == InterfaceDescriptorType::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1 &&
|
idd->getThreadGroupDispatchSize() == InterfaceDescriptorType::THREAD_GROUP_DISPATCH_SIZE_TG_SIZE_1 &&
|
||||||
walkerCmd.getThreadGroupIdXDimension() * walkerCmd.getThreadGroupIdYDimension() * walkerCmd.getThreadGroupIdZDimension() * idd->getNumberOfThreadsInGpgpuThreadGroup() < walkerArgs.maxFrontEndThreads);
|
walkerCmd.getThreadGroupIdXDimension() * walkerCmd.getThreadGroupIdYDimension() * walkerCmd.getThreadGroupIdZDimension() * idd->getNumberOfThreadsInGpgpuThreadGroup() < walkerArgs.maxFrontEndThreads);
|
||||||
|
@ -347,7 +347,6 @@ void ExecutionEnvironment::setDeviceHierarchyMode(const GfxCoreHelper &gfxCoreHe
|
|||||||
void ExecutionEnvironment::adjustCcsCountImpl(RootDeviceEnvironment *rootDeviceEnvironment) const {
|
void ExecutionEnvironment::adjustCcsCountImpl(RootDeviceEnvironment *rootDeviceEnvironment) const {
|
||||||
auto hwInfo = rootDeviceEnvironment->getMutableHardwareInfo();
|
auto hwInfo = rootDeviceEnvironment->getMutableHardwareInfo();
|
||||||
auto &productHelper = rootDeviceEnvironment->getHelper<ProductHelper>();
|
auto &productHelper = rootDeviceEnvironment->getHelper<ProductHelper>();
|
||||||
rootDeviceEnvironment->setNonLimitedNumberOfCcs(hwInfo->gtSystemInfo.CCSInfo.NumberOfCCSEnabled);
|
|
||||||
productHelper.adjustNumberOfCcs(*hwInfo);
|
productHelper.adjustNumberOfCcs(*hwInfo);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -95,8 +95,6 @@ struct RootDeviceEnvironment : NonCopyableClass {
|
|||||||
const ProductHelper &getProductHelper() const;
|
const ProductHelper &getProductHelper() const;
|
||||||
GraphicsAllocation *getDummyAllocation() const;
|
GraphicsAllocation *getDummyAllocation() const;
|
||||||
void releaseDummyAllocation();
|
void releaseDummyAllocation();
|
||||||
void setNonLimitedNumberOfCcs(uint32_t numberOfCss) { this->nonLimitedNumberOfCcs = numberOfCss; };
|
|
||||||
uint32_t getNonLimitedNumberOfCcs() const { return this->nonLimitedNumberOfCcs; };
|
|
||||||
|
|
||||||
std::unique_ptr<SipKernel> sipKernels[static_cast<uint32_t>(SipKernelType::count)];
|
std::unique_ptr<SipKernel> sipKernels[static_cast<uint32_t>(SipKernelType::count)];
|
||||||
std::unique_ptr<GmmHelper> gmmHelper;
|
std::unique_ptr<GmmHelper> gmmHelper;
|
||||||
@ -126,7 +124,6 @@ struct RootDeviceEnvironment : NonCopyableClass {
|
|||||||
protected:
|
protected:
|
||||||
using GraphicsAllocationUniquePtrType = std::unique_ptr<GraphicsAllocation, std::function<void(GraphicsAllocation *)>>;
|
using GraphicsAllocationUniquePtrType = std::unique_ptr<GraphicsAllocation, std::function<void(GraphicsAllocation *)>>;
|
||||||
GraphicsAllocationUniquePtrType dummyAllocation = nullptr;
|
GraphicsAllocationUniquePtrType dummyAllocation = nullptr;
|
||||||
uint32_t nonLimitedNumberOfCcs = 0u;
|
|
||||||
|
|
||||||
bool limitedNumberOfCcs = false;
|
bool limitedNumberOfCcs = false;
|
||||||
bool isWddmOnLinuxEnable = false;
|
bool isWddmOnLinuxEnable = false;
|
||||||
|
@ -148,6 +148,7 @@ class ProductHelper {
|
|||||||
virtual void adjustScratchSize(size_t &requiredScratchSize) const = 0;
|
virtual void adjustScratchSize(size_t &requiredScratchSize) const = 0;
|
||||||
virtual size_t getSvmCpuAlignment() const = 0;
|
virtual size_t getSvmCpuAlignment() const = 0;
|
||||||
virtual bool isComputeDispatchAllWalkerEnableInCfeStateRequired(const HardwareInfo &hwInfo) const = 0;
|
virtual bool isComputeDispatchAllWalkerEnableInCfeStateRequired(const HardwareInfo &hwInfo) const = 0;
|
||||||
|
virtual bool adjustDispatchAllRequired(const HardwareInfo &hwInfo) const = 0;
|
||||||
virtual bool isVmBindPatIndexProgrammingSupported() const = 0;
|
virtual bool isVmBindPatIndexProgrammingSupported() const = 0;
|
||||||
virtual bool isIpSamplingSupported(const HardwareInfo &hwInfo) const = 0;
|
virtual bool isIpSamplingSupported(const HardwareInfo &hwInfo) const = 0;
|
||||||
virtual bool isGrfNumReportedWithScm() const = 0;
|
virtual bool isGrfNumReportedWithScm() const = 0;
|
||||||
|
@ -542,6 +542,11 @@ size_t ProductHelperHw<gfxProduct>::getSvmCpuAlignment() const {
|
|||||||
return MemoryConstants::pageSize2M;
|
return MemoryConstants::pageSize2M;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <PRODUCT_FAMILY gfxProduct>
|
||||||
|
bool ProductHelperHw<gfxProduct>::adjustDispatchAllRequired(const HardwareInfo &hwInfo) const {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
template <PRODUCT_FAMILY gfxProduct>
|
template <PRODUCT_FAMILY gfxProduct>
|
||||||
bool ProductHelperHw<gfxProduct>::isComputeDispatchAllWalkerEnableInCfeStateRequired(const HardwareInfo &hwInfo) const {
|
bool ProductHelperHw<gfxProduct>::isComputeDispatchAllWalkerEnableInCfeStateRequired(const HardwareInfo &hwInfo) const {
|
||||||
return getFrontEndPropertyComputeDispatchAllWalkerSupport();
|
return getFrontEndPropertyComputeDispatchAllWalkerSupport();
|
||||||
|
@ -91,6 +91,7 @@ class ProductHelperHw : public ProductHelper {
|
|||||||
void adjustScratchSize(size_t &requiredScratchSize) const override;
|
void adjustScratchSize(size_t &requiredScratchSize) const override;
|
||||||
size_t getSvmCpuAlignment() const override;
|
size_t getSvmCpuAlignment() const override;
|
||||||
bool isComputeDispatchAllWalkerEnableInCfeStateRequired(const HardwareInfo &hwInfo) const override;
|
bool isComputeDispatchAllWalkerEnableInCfeStateRequired(const HardwareInfo &hwInfo) const override;
|
||||||
|
bool adjustDispatchAllRequired(const HardwareInfo &hwInfo) const override;
|
||||||
bool isVmBindPatIndexProgrammingSupported() const override;
|
bool isVmBindPatIndexProgrammingSupported() const override;
|
||||||
bool isIpSamplingSupported(const HardwareInfo &hwInfo) const override;
|
bool isIpSamplingSupported(const HardwareInfo &hwInfo) const override;
|
||||||
bool isGrfNumReportedWithScm() const override;
|
bool isGrfNumReportedWithScm() const override;
|
||||||
|
@ -45,6 +45,11 @@ bool ProductHelperHw<gfxProduct>::isDirectSubmissionSupported(ReleaseHelper *rel
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
bool ProductHelperHw<gfxProduct>::adjustDispatchAllRequired(const HardwareInfo &hwInfo) const {
|
||||||
|
return hwInfo.gtSystemInfo.SliceCount > 2u;
|
||||||
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
void ProductHelperHw<gfxProduct>::adjustScratchSize(size_t &requiredScratchSize) const {
|
void ProductHelperHw<gfxProduct>::adjustScratchSize(size_t &requiredScratchSize) const {
|
||||||
requiredScratchSize *= 2;
|
requiredScratchSize *= 2;
|
||||||
|
Reference in New Issue
Block a user