mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-05 09:09:04 +08:00
Limit space in tile for concurrent kernels on pvc
Related-To: NEO-7658, HSD-16016919338 Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
2d9f1caa8c
commit
83eb52591d
@@ -63,13 +63,14 @@ uint32_t GfxCoreHelperHw<Family>::adjustMaxWorkGroupCount(uint32_t maxWorkGroupC
|
||||
bool requiresLimitation = productHelper.isCooperativeEngineSupported(hwInfo) &&
|
||||
(engineGroupType != EngineGroupType::CooperativeCompute) &&
|
||||
(!isEngineInstanced);
|
||||
auto numberOfpartsInTileForConcurrentKernels = productHelper.getNumberOfPartsInTileForConcurrentKernel();
|
||||
if (requiresLimitation) {
|
||||
|
||||
auto ccsCount = hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled;
|
||||
UNRECOVERABLE_IF(ccsCount == 0);
|
||||
return maxWorkGroupCount / ccsCount;
|
||||
numberOfpartsInTileForConcurrentKernels = std::max(numberOfpartsInTileForConcurrentKernels, ccsCount);
|
||||
}
|
||||
return maxWorkGroupCount;
|
||||
return maxWorkGroupCount / numberOfpartsInTileForConcurrentKernels;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
|
||||
@@ -197,6 +197,7 @@ class ProductHelper {
|
||||
virtual bool isMultiContextResourceDeferDeletionSupported() const = 0;
|
||||
virtual bool isFusedEuDisabledForDpas(bool kernelHasDpasInstructions, const uint32_t *lws, const uint32_t *groupCount) const = 0;
|
||||
virtual bool isCalculationForDisablingEuFusionWithDpasNeeded() const = 0;
|
||||
virtual uint32_t getNumberOfPartsInTileForConcurrentKernel() const = 0;
|
||||
|
||||
virtual ~ProductHelper() = default;
|
||||
|
||||
|
||||
@@ -526,6 +526,10 @@ template <PRODUCT_FAMILY gfxProduct>
|
||||
bool ProductHelperHw<gfxProduct>::isStatefulAddressingModeSupported() const {
|
||||
return true;
|
||||
}
|
||||
template <PRODUCT_FAMILY gfxProduct>
|
||||
uint32_t ProductHelperHw<gfxProduct>::getNumberOfPartsInTileForConcurrentKernel() const {
|
||||
return 1u;
|
||||
}
|
||||
|
||||
template <PRODUCT_FAMILY gfxProduct>
|
||||
bool ProductHelperHw<gfxProduct>::isPlatformQuerySupported() const {
|
||||
|
||||
@@ -116,6 +116,7 @@ class ProductHelperHw : public ProductHelper {
|
||||
void adjustNumberOfCcs(HardwareInfo &hwInfo) const override;
|
||||
bool isPrefetcherDisablingInDirectSubmissionRequired() const override;
|
||||
bool isStatefulAddressingModeSupported() const override;
|
||||
uint32_t getNumberOfPartsInTileForConcurrentKernel() const override;
|
||||
bool isPlatformQuerySupported() const override;
|
||||
bool isNonBlockingGpuSubmissionSupported() const override;
|
||||
bool isResolveDependenciesByPipeControlsSupported(const HardwareInfo &hwInfo, bool isOOQ) const override;
|
||||
|
||||
@@ -25,6 +25,7 @@ struct PVC : public XeHpcCoreFamily {
|
||||
static const uint32_t maxSubslicesSupported = 64;
|
||||
static const uint32_t maxDualSubslicesSupported = 64;
|
||||
static const RuntimeCapabilityTable capabilityTable;
|
||||
static constexpr uint32_t numberOfpartsInTileForConcurrentKernels = 8u;
|
||||
|
||||
struct FrontEndStateSupport {
|
||||
static constexpr bool scratchSize = true;
|
||||
|
||||
@@ -226,4 +226,8 @@ std::optional<aub_stream::ProductFamily> ProductHelperHw<gfxProduct>::getAubStre
|
||||
return aub_stream::ProductFamily::Pvc;
|
||||
};
|
||||
|
||||
template <>
|
||||
uint32_t ProductHelperHw<gfxProduct>::getNumberOfPartsInTileForConcurrentKernel() const {
|
||||
return PVC::numberOfpartsInTileForConcurrentKernels;
|
||||
}
|
||||
} // namespace NEO
|
||||
|
||||
@@ -1496,4 +1496,55 @@ HWTEST_F(ProductHelperCommonTest, givenHwHelperWhenIsFusedEuDisabledForDpasCalle
|
||||
HWTEST_F(ProductHelperCommonTest, givenProductHelperWhenCallingIsCalculationForDisablingEuFusionWithDpasNeededThenFalseReturned) {
|
||||
auto &gfxCoreHelper = getHelper<ProductHelper>();
|
||||
EXPECT_FALSE(gfxCoreHelper.isCalculationForDisablingEuFusionWithDpasNeeded());
|
||||
}
|
||||
HWTEST_F(GfxCoreHelperTest, GivenCooperativeEngineSupportedAndNotUsedWhenAdjustMaxWorkGroupCountIsCalledThenSmallerValueIsReturned) {
|
||||
|
||||
MockExecutionEnvironment mockExecutionEnvironment{};
|
||||
auto &rootDeviceEnvironment = *mockExecutionEnvironment.rootDeviceEnvironments[0];
|
||||
const auto &productHelper = rootDeviceEnvironment.getHelper<ProductHelper>();
|
||||
auto &gfxCoreHelper = rootDeviceEnvironment.getHelper<GfxCoreHelper>();
|
||||
auto &hwInfo = *rootDeviceEnvironment.getMutableHardwareInfo();
|
||||
|
||||
hwInfo.capabilityTable.defaultEngineType = aub_stream::EngineType::ENGINE_CCS;
|
||||
hwInfo.featureTable.flags.ftrRcsNode = false;
|
||||
|
||||
uint32_t revisions[] = {REVISION_A0, REVISION_B};
|
||||
for (auto &revision : revisions) {
|
||||
auto hwRevId = productHelper.getHwRevIdFromStepping(revision, hwInfo);
|
||||
if (hwRevId == CommonConstants::invalidStepping) {
|
||||
continue;
|
||||
}
|
||||
hwInfo.platform.usRevId = hwRevId;
|
||||
|
||||
for (auto isEngineInstanced : ::testing::Bool()) {
|
||||
for (auto isRcsEnabled : ::testing::Bool()) {
|
||||
hwInfo.featureTable.flags.ftrRcsNode = isRcsEnabled;
|
||||
for (auto engineGroupType : {EngineGroupType::RenderCompute, EngineGroupType::Compute, EngineGroupType::CooperativeCompute}) {
|
||||
if (productHelper.isCooperativeEngineSupported(hwInfo)) {
|
||||
bool disallowDispatch = (engineGroupType == EngineGroupType::RenderCompute) ||
|
||||
((engineGroupType == EngineGroupType::Compute) && isRcsEnabled);
|
||||
bool applyLimitation = !isEngineInstanced &&
|
||||
(engineGroupType != EngineGroupType::CooperativeCompute);
|
||||
if (disallowDispatch) {
|
||||
EXPECT_EQ(1u, gfxCoreHelper.adjustMaxWorkGroupCount(4u, engineGroupType, rootDeviceEnvironment, isEngineInstanced));
|
||||
EXPECT_EQ(1u, gfxCoreHelper.adjustMaxWorkGroupCount(1024u, engineGroupType, rootDeviceEnvironment, isEngineInstanced));
|
||||
} else if (applyLimitation) {
|
||||
hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 4;
|
||||
EXPECT_EQ(1u, gfxCoreHelper.adjustMaxWorkGroupCount(4u, engineGroupType, rootDeviceEnvironment, isEngineInstanced));
|
||||
EXPECT_EQ(256u, gfxCoreHelper.adjustMaxWorkGroupCount(1024u, engineGroupType, rootDeviceEnvironment, isEngineInstanced));
|
||||
hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 2;
|
||||
EXPECT_EQ(2u, gfxCoreHelper.adjustMaxWorkGroupCount(4u, engineGroupType, rootDeviceEnvironment, isEngineInstanced));
|
||||
EXPECT_EQ(512u, gfxCoreHelper.adjustMaxWorkGroupCount(1024u, engineGroupType, rootDeviceEnvironment, isEngineInstanced));
|
||||
} else {
|
||||
EXPECT_EQ(4u, gfxCoreHelper.adjustMaxWorkGroupCount(4u, engineGroupType, rootDeviceEnvironment, isEngineInstanced));
|
||||
EXPECT_EQ(1024u, gfxCoreHelper.adjustMaxWorkGroupCount(1024u, engineGroupType, rootDeviceEnvironment, isEngineInstanced));
|
||||
}
|
||||
} else {
|
||||
EXPECT_EQ(4u, gfxCoreHelper.adjustMaxWorkGroupCount(4u, engineGroupType, rootDeviceEnvironment, isEngineInstanced));
|
||||
EXPECT_EQ(1024u, gfxCoreHelper.adjustMaxWorkGroupCount(1024u, engineGroupType, rootDeviceEnvironment, isEngineInstanced));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 Intel Corporation
|
||||
* Copyright (C) 2022-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -9,3 +9,4 @@
|
||||
|
||||
HWTEST_EXCLUDE_PRODUCT(ProductHelperTest, givenProductHelperWhenAskedIfIsBlitSplitEnqueueWARequiredThenReturnFalse, IGFX_PVC);
|
||||
HWTEST_EXCLUDE_PRODUCT(BlitTests, GivenCpuAccessToLocalMemoryWhenGettingMaxBlitSizeThenValuesAreOverriden_BlitPlatforms, IGFX_PVC);
|
||||
HWTEST_EXCLUDE_PRODUCT(GfxCoreHelperTest, GivenCooperativeEngineSupportedAndNotUsedWhenAdjustMaxWorkGroupCountIsCalledThenSmallerValueIsReturned, IGFX_PVC);
|
||||
|
||||
@@ -157,4 +157,54 @@ PVCTEST_F(GfxCoreHelperTestsPvc, givenMemorySynchronizationCommandsWhenAddingSyn
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PVCTEST_F(GfxCoreHelperTestsPvc, GivenCooperativeEngineSupportedAndNotUsedWhenAdjustMaxWorkGroupCountIsCalledThenSmallerValueIsReturned) {
|
||||
|
||||
MockExecutionEnvironment mockExecutionEnvironment{};
|
||||
auto &rootDeviceEnvironment = *mockExecutionEnvironment.rootDeviceEnvironments[0];
|
||||
const auto &productHelper = rootDeviceEnvironment.getHelper<ProductHelper>();
|
||||
auto &gfxCoreHelper = rootDeviceEnvironment.getHelper<GfxCoreHelper>();
|
||||
auto &hwInfo = *rootDeviceEnvironment.getMutableHardwareInfo();
|
||||
|
||||
hwInfo.capabilityTable.defaultEngineType = aub_stream::EngineType::ENGINE_CCS;
|
||||
hwInfo.featureTable.flags.ftrRcsNode = false;
|
||||
|
||||
auto tilePartsForConcurrentKernels = PVC::numberOfpartsInTileForConcurrentKernels;
|
||||
auto passedMaxWorkGroupCount = 1024;
|
||||
|
||||
uint32_t revisions[] = {REVISION_A0, REVISION_B};
|
||||
for (auto &revision : revisions) {
|
||||
auto hwRevId = productHelper.getHwRevIdFromStepping(revision, hwInfo);
|
||||
if (hwRevId == CommonConstants::invalidStepping) {
|
||||
continue;
|
||||
}
|
||||
hwInfo.platform.usRevId = hwRevId;
|
||||
|
||||
for (auto isEngineInstanced : ::testing::Bool()) {
|
||||
for (auto isRcsEnabled : ::testing::Bool()) {
|
||||
hwInfo.featureTable.flags.ftrRcsNode = isRcsEnabled;
|
||||
for (auto engineGroupType : {EngineGroupType::RenderCompute, EngineGroupType::Compute, EngineGroupType::CooperativeCompute}) {
|
||||
if (productHelper.isCooperativeEngineSupported(hwInfo)) {
|
||||
bool disallowDispatch = (engineGroupType == EngineGroupType::RenderCompute) ||
|
||||
((engineGroupType == EngineGroupType::Compute) && isRcsEnabled);
|
||||
bool applyLimitation = !isEngineInstanced &&
|
||||
(engineGroupType != EngineGroupType::CooperativeCompute);
|
||||
if (disallowDispatch) {
|
||||
EXPECT_EQ(1u, gfxCoreHelper.adjustMaxWorkGroupCount(passedMaxWorkGroupCount, engineGroupType, rootDeviceEnvironment, isEngineInstanced));
|
||||
} else if (applyLimitation) {
|
||||
hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 4;
|
||||
EXPECT_EQ(passedMaxWorkGroupCount / tilePartsForConcurrentKernels, gfxCoreHelper.adjustMaxWorkGroupCount(passedMaxWorkGroupCount, engineGroupType, rootDeviceEnvironment, isEngineInstanced));
|
||||
hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 16;
|
||||
EXPECT_EQ(passedMaxWorkGroupCount / hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled, gfxCoreHelper.adjustMaxWorkGroupCount(passedMaxWorkGroupCount, engineGroupType, rootDeviceEnvironment, isEngineInstanced));
|
||||
} else {
|
||||
EXPECT_EQ(passedMaxWorkGroupCount / tilePartsForConcurrentKernels, gfxCoreHelper.adjustMaxWorkGroupCount(passedMaxWorkGroupCount, engineGroupType, rootDeviceEnvironment, isEngineInstanced));
|
||||
}
|
||||
} else {
|
||||
EXPECT_EQ(passedMaxWorkGroupCount / tilePartsForConcurrentKernels, gfxCoreHelper.adjustMaxWorkGroupCount(passedMaxWorkGroupCount, engineGroupType, rootDeviceEnvironment, isEngineInstanced));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace NEO
|
||||
|
||||
Reference in New Issue
Block a user