mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-20 00:24:58 +08:00
Limit space in tile for concurrent kernels on pvc
Related-To: NEO-7658, HSD-16016919338 Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
aa661c1878
commit
829c93ca68
@@ -17,6 +17,7 @@ set(L0_FIXTURES_SOURCES
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/event_fixture.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/event_fixture.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/host_pointer_manager_fixture.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_max_cooperative_groups_count_fixture.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/module_fixture.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/module_fixture.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/memory_ipc_fixture.h
|
||||
|
||||
@@ -0,0 +1,69 @@
|
||||
/*
|
||||
* Copyright (C) 2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/helpers/gfx_core_helper.h"
|
||||
|
||||
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_module.h"
|
||||
|
||||
namespace L0 {
|
||||
namespace ult {
|
||||
|
||||
class KernelImpSuggestMaxCooperativeGroupCountTests : public Test<DeviceFixture> {
|
||||
public:
|
||||
const uint32_t numGrf = 128;
|
||||
const uint32_t simd = 8;
|
||||
const uint32_t lws[3] = {1, 1, 1};
|
||||
uint32_t usedSlm = 0;
|
||||
uint32_t usesBarriers = 0;
|
||||
|
||||
uint32_t availableThreadCount;
|
||||
uint32_t dssCount;
|
||||
uint32_t availableSlm;
|
||||
uint32_t maxBarrierCount;
|
||||
WhiteBox<::L0::KernelImmutableData> kernelInfo;
|
||||
NEO::KernelDescriptor kernelDescriptor;
|
||||
|
||||
void SetUp() override {
|
||||
Test<DeviceFixture>::SetUp();
|
||||
kernelInfo.kernelDescriptor = &kernelDescriptor;
|
||||
auto &hardwareInfo = device->getHwInfo();
|
||||
auto &helper = device->getNEODevice()->getRootDeviceEnvironment().getHelper<GfxCoreHelper>();
|
||||
availableThreadCount = helper.calculateAvailableThreadCount(hardwareInfo, numGrf);
|
||||
|
||||
dssCount = hardwareInfo.gtSystemInfo.DualSubSliceCount;
|
||||
if (dssCount == 0) {
|
||||
dssCount = hardwareInfo.gtSystemInfo.SubSliceCount;
|
||||
}
|
||||
availableSlm = dssCount * KB * hardwareInfo.capabilityTable.slmSize;
|
||||
maxBarrierCount = static_cast<uint32_t>(helper.getMaxBarrierRegisterPerSlice());
|
||||
|
||||
kernelInfo.kernelDescriptor->kernelAttributes.simdSize = simd;
|
||||
kernelInfo.kernelDescriptor->kernelAttributes.numGrfRequired = numGrf;
|
||||
}
|
||||
|
||||
uint32_t getMaxWorkGroupCount() {
|
||||
kernelInfo.kernelDescriptor->kernelAttributes.slmInlineSize = usedSlm;
|
||||
kernelInfo.kernelDescriptor->kernelAttributes.barrierCount = usesBarriers;
|
||||
|
||||
Mock<Kernel> kernel;
|
||||
kernel.kernelImmData = &kernelInfo;
|
||||
auto module = std::make_unique<ModuleImp>(device, nullptr, ModuleType::User);
|
||||
kernel.module = module.get();
|
||||
|
||||
kernel.groupSize[0] = lws[0];
|
||||
kernel.groupSize[1] = lws[1];
|
||||
kernel.groupSize[2] = lws[2];
|
||||
uint32_t totalGroupCount = 0;
|
||||
kernel.KernelImp::suggestMaxCooperativeGroupCount(&totalGroupCount, NEO::EngineGroupType::CooperativeCompute, true);
|
||||
return totalGroupCount;
|
||||
}
|
||||
};
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
@@ -15,6 +15,7 @@
|
||||
#include "shared/test/common/test_macros/test.h"
|
||||
|
||||
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/kernel_max_cooperative_groups_count_fixture.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_module.h"
|
||||
|
||||
@@ -351,70 +352,19 @@ TEST(zeKernelGetProperties, WhenGettingKernelPropertiesThenSuccessIsReturned) {
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
}
|
||||
|
||||
class KernelImpSuggestMaxCooperativeGroupCountTests : public KernelImp {
|
||||
public:
|
||||
const uint32_t numGrf = 128;
|
||||
const uint32_t simd = 8;
|
||||
const uint32_t lws[3] = {1, 1, 1};
|
||||
uint32_t usedSlm = 0;
|
||||
uint32_t usesBarriers = 0;
|
||||
|
||||
uint32_t availableThreadCount;
|
||||
uint32_t dssCount;
|
||||
uint32_t availableSlm;
|
||||
uint32_t maxBarrierCount;
|
||||
WhiteBox<::L0::KernelImmutableData> kernelInfo;
|
||||
NEO::KernelDescriptor kernelDescriptor;
|
||||
|
||||
void SetUp() override {
|
||||
KernelImp::SetUp();
|
||||
kernelInfo.kernelDescriptor = &kernelDescriptor;
|
||||
auto &hardwareInfo = device->getHwInfo();
|
||||
auto &helper = device->getNEODevice()->getRootDeviceEnvironment().getHelper<GfxCoreHelper>();
|
||||
availableThreadCount = helper.calculateAvailableThreadCount(hardwareInfo, numGrf);
|
||||
|
||||
dssCount = hardwareInfo.gtSystemInfo.DualSubSliceCount;
|
||||
if (dssCount == 0) {
|
||||
dssCount = hardwareInfo.gtSystemInfo.SubSliceCount;
|
||||
}
|
||||
availableSlm = dssCount * KB * hardwareInfo.capabilityTable.slmSize;
|
||||
maxBarrierCount = static_cast<uint32_t>(helper.getMaxBarrierRegisterPerSlice());
|
||||
|
||||
kernelInfo.kernelDescriptor->kernelAttributes.simdSize = simd;
|
||||
kernelInfo.kernelDescriptor->kernelAttributes.numGrfRequired = numGrf;
|
||||
}
|
||||
|
||||
uint32_t getMaxWorkGroupCount() {
|
||||
kernelInfo.kernelDescriptor->kernelAttributes.slmInlineSize = usedSlm;
|
||||
kernelInfo.kernelDescriptor->kernelAttributes.barrierCount = usesBarriers;
|
||||
|
||||
Mock<Kernel> kernel;
|
||||
kernel.kernelImmData = &kernelInfo;
|
||||
auto module = std::make_unique<ModuleImp>(device, nullptr, ModuleType::User);
|
||||
kernel.module = module.get();
|
||||
|
||||
kernel.groupSize[0] = lws[0];
|
||||
kernel.groupSize[1] = lws[1];
|
||||
kernel.groupSize[2] = lws[2];
|
||||
uint32_t totalGroupCount = 0;
|
||||
kernel.KernelImp::suggestMaxCooperativeGroupCount(&totalGroupCount, NEO::EngineGroupType::CooperativeCompute, true);
|
||||
return totalGroupCount;
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(KernelImpSuggestMaxCooperativeGroupCountTests, GivenNoBarriersOrSlmUsedWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithSimd) {
|
||||
HWTEST_F(KernelImpSuggestMaxCooperativeGroupCountTests, GivenNoBarriersOrSlmUsedWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithSimd) {
|
||||
auto workGroupSize = lws[0] * lws[1] * lws[2];
|
||||
auto expected = availableThreadCount / Math::divideAndRoundUp(workGroupSize, simd);
|
||||
EXPECT_EQ(expected, getMaxWorkGroupCount());
|
||||
}
|
||||
|
||||
TEST_F(KernelImpSuggestMaxCooperativeGroupCountTests, GivenBarriersWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithRegardToBarriersCount) {
|
||||
HWTEST_F(KernelImpSuggestMaxCooperativeGroupCountTests, GivenBarriersWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithRegardToBarriersCount) {
|
||||
usesBarriers = 1;
|
||||
auto expected = dssCount * (maxBarrierCount / usesBarriers);
|
||||
EXPECT_EQ(expected, getMaxWorkGroupCount());
|
||||
}
|
||||
|
||||
TEST_F(KernelImpSuggestMaxCooperativeGroupCountTests, GivenUsedSlmSizeWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithRegardToUsedSlmSize) {
|
||||
HWTEST_F(KernelImpSuggestMaxCooperativeGroupCountTests, GivenUsedSlmSizeWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithRegardToUsedSlmSize) {
|
||||
usedSlm = 64 * KB;
|
||||
auto expected = availableSlm / usedSlm;
|
||||
EXPECT_EQ(expected, getMaxWorkGroupCount());
|
||||
|
||||
@@ -9,6 +9,7 @@ if(TESTS_PVC)
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_device_pvc.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_pvc.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_kernel_pvc.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_sampler_pvc.cpp
|
||||
)
|
||||
endif()
|
||||
|
||||
@@ -0,0 +1,42 @@
|
||||
/*
|
||||
* Copyright (C) 2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/helpers/basic_math.h"
|
||||
#include "shared/source/xe_hpc_core/hw_cmds_pvc.h"
|
||||
#include "shared/test/common/test_macros/header/per_product_test_definitions.h"
|
||||
#include "shared/test/common/test_macros/hw_test_base.h"
|
||||
|
||||
#include "level_zero/core/test/unit_tests/fixtures/kernel_max_cooperative_groups_count_fixture.h"
|
||||
namespace L0 {
|
||||
namespace ult {
|
||||
|
||||
HWTEST_EXCLUDE_PRODUCT(KernelImpSuggestMaxCooperativeGroupCountTests, GivenUsedSlmSizeWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithRegardToUsedSlmSize, IGFX_PVC);
|
||||
HWTEST_EXCLUDE_PRODUCT(KernelImpSuggestMaxCooperativeGroupCountTests, GivenBarriersWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithRegardToBarriersCount, IGFX_PVC);
|
||||
HWTEST_EXCLUDE_PRODUCT(KernelImpSuggestMaxCooperativeGroupCountTests, GivenNoBarriersOrSlmUsedWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithSimd, IGFX_PVC);
|
||||
|
||||
using KernelImpSuggestMaxCooperativeGroupCountTestsPvc = KernelImpSuggestMaxCooperativeGroupCountTests;
|
||||
|
||||
PVCTEST_F(KernelImpSuggestMaxCooperativeGroupCountTestsPvc, GivenNoBarriersOrSlmUsedWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithSimd) {
|
||||
auto workGroupSize = lws[0] * lws[1] * lws[2];
|
||||
auto expected = (availableThreadCount / Math::divideAndRoundUp(workGroupSize, simd)) / PVC::numberOfpartsInTileForConcurrentKernels;
|
||||
EXPECT_EQ(expected, getMaxWorkGroupCount());
|
||||
}
|
||||
|
||||
PVCTEST_F(KernelImpSuggestMaxCooperativeGroupCountTestsPvc, GivenBarriersWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithRegardToBarriersCount) {
|
||||
usesBarriers = 1;
|
||||
auto expected = (dssCount * (maxBarrierCount / usesBarriers)) / PVC::numberOfpartsInTileForConcurrentKernels;
|
||||
EXPECT_EQ(expected, getMaxWorkGroupCount());
|
||||
}
|
||||
|
||||
PVCTEST_F(KernelImpSuggestMaxCooperativeGroupCountTestsPvc, GivenUsedSlmSizeWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithRegardToUsedSlmSize) {
|
||||
usedSlm = 64 * KB;
|
||||
auto expected = (availableSlm / usedSlm) / PVC::numberOfpartsInTileForConcurrentKernels;
|
||||
EXPECT_EQ(expected, getMaxWorkGroupCount());
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
@@ -63,13 +63,14 @@ uint32_t GfxCoreHelperHw<Family>::adjustMaxWorkGroupCount(uint32_t maxWorkGroupC
|
||||
bool requiresLimitation = productHelper.isCooperativeEngineSupported(hwInfo) &&
|
||||
(engineGroupType != EngineGroupType::CooperativeCompute) &&
|
||||
(!isEngineInstanced);
|
||||
auto numberOfpartsInTileForConcurrentKernels = productHelper.getNumberOfPartsInTileForConcurrentKernel();
|
||||
if (requiresLimitation) {
|
||||
|
||||
auto ccsCount = hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled;
|
||||
UNRECOVERABLE_IF(ccsCount == 0);
|
||||
return maxWorkGroupCount / ccsCount;
|
||||
numberOfpartsInTileForConcurrentKernels = std::max(numberOfpartsInTileForConcurrentKernels, ccsCount);
|
||||
}
|
||||
return maxWorkGroupCount;
|
||||
return maxWorkGroupCount / numberOfpartsInTileForConcurrentKernels;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
|
||||
@@ -198,6 +198,7 @@ class ProductHelper {
|
||||
virtual bool isMultiContextResourceDeferDeletionSupported() const = 0;
|
||||
virtual bool isFusedEuDisabledForDpas(bool kernelHasDpasInstructions, const uint32_t *lws, const uint32_t *groupCount) const = 0;
|
||||
virtual bool isCalculationForDisablingEuFusionWithDpasNeeded() const = 0;
|
||||
virtual uint32_t getNumberOfPartsInTileForConcurrentKernel() const = 0;
|
||||
|
||||
virtual ~ProductHelper() = default;
|
||||
|
||||
|
||||
@@ -526,6 +526,10 @@ template <PRODUCT_FAMILY gfxProduct>
|
||||
bool ProductHelperHw<gfxProduct>::isStatefulAddressingModeSupported() const {
|
||||
return true;
|
||||
}
|
||||
template <PRODUCT_FAMILY gfxProduct>
|
||||
uint32_t ProductHelperHw<gfxProduct>::getNumberOfPartsInTileForConcurrentKernel() const {
|
||||
return 1u;
|
||||
}
|
||||
|
||||
template <PRODUCT_FAMILY gfxProduct>
|
||||
bool ProductHelperHw<gfxProduct>::isPlatformQuerySupported() const {
|
||||
|
||||
@@ -117,6 +117,7 @@ class ProductHelperHw : public ProductHelper {
|
||||
void adjustNumberOfCcs(HardwareInfo &hwInfo) const override;
|
||||
bool isPrefetcherDisablingInDirectSubmissionRequired() const override;
|
||||
bool isStatefulAddressingModeSupported() const override;
|
||||
uint32_t getNumberOfPartsInTileForConcurrentKernel() const override;
|
||||
bool isPlatformQuerySupported() const override;
|
||||
bool isNonBlockingGpuSubmissionSupported() const override;
|
||||
bool isResolveDependenciesByPipeControlsSupported(const HardwareInfo &hwInfo, bool isOOQ) const override;
|
||||
|
||||
@@ -25,6 +25,7 @@ struct PVC : public XeHpcCoreFamily {
|
||||
static const uint32_t maxSubslicesSupported = 64;
|
||||
static const uint32_t maxDualSubslicesSupported = 64;
|
||||
static const RuntimeCapabilityTable capabilityTable;
|
||||
static constexpr uint32_t numberOfpartsInTileForConcurrentKernels = 8u;
|
||||
|
||||
struct FrontEndStateSupport {
|
||||
static constexpr bool scratchSize = true;
|
||||
|
||||
@@ -226,4 +226,8 @@ std::optional<aub_stream::ProductFamily> ProductHelperHw<gfxProduct>::getAubStre
|
||||
return aub_stream::ProductFamily::Pvc;
|
||||
};
|
||||
|
||||
template <>
|
||||
uint32_t ProductHelperHw<gfxProduct>::getNumberOfPartsInTileForConcurrentKernel() const {
|
||||
return PVC::numberOfpartsInTileForConcurrentKernels;
|
||||
}
|
||||
} // namespace NEO
|
||||
|
||||
@@ -1474,4 +1474,55 @@ HWTEST_F(ProductHelperCommonTest, givenHwHelperWhenIsFusedEuDisabledForDpasCalle
|
||||
HWTEST_F(ProductHelperCommonTest, givenProductHelperWhenCallingIsCalculationForDisablingEuFusionWithDpasNeededThenFalseReturned) {
|
||||
auto &gfxCoreHelper = getHelper<ProductHelper>();
|
||||
EXPECT_FALSE(gfxCoreHelper.isCalculationForDisablingEuFusionWithDpasNeeded());
|
||||
}
|
||||
HWTEST_F(GfxCoreHelperTest, GivenCooperativeEngineSupportedAndNotUsedWhenAdjustMaxWorkGroupCountIsCalledThenSmallerValueIsReturned) {
|
||||
|
||||
MockExecutionEnvironment mockExecutionEnvironment{};
|
||||
auto &rootDeviceEnvironment = *mockExecutionEnvironment.rootDeviceEnvironments[0];
|
||||
const auto &productHelper = rootDeviceEnvironment.getHelper<ProductHelper>();
|
||||
auto &gfxCoreHelper = rootDeviceEnvironment.getHelper<GfxCoreHelper>();
|
||||
auto &hwInfo = *rootDeviceEnvironment.getMutableHardwareInfo();
|
||||
|
||||
hwInfo.capabilityTable.defaultEngineType = aub_stream::EngineType::ENGINE_CCS;
|
||||
hwInfo.featureTable.flags.ftrRcsNode = false;
|
||||
|
||||
uint32_t revisions[] = {REVISION_A0, REVISION_B};
|
||||
for (auto &revision : revisions) {
|
||||
auto hwRevId = productHelper.getHwRevIdFromStepping(revision, hwInfo);
|
||||
if (hwRevId == CommonConstants::invalidStepping) {
|
||||
continue;
|
||||
}
|
||||
hwInfo.platform.usRevId = hwRevId;
|
||||
|
||||
for (auto isEngineInstanced : ::testing::Bool()) {
|
||||
for (auto isRcsEnabled : ::testing::Bool()) {
|
||||
hwInfo.featureTable.flags.ftrRcsNode = isRcsEnabled;
|
||||
for (auto engineGroupType : {EngineGroupType::RenderCompute, EngineGroupType::Compute, EngineGroupType::CooperativeCompute}) {
|
||||
if (productHelper.isCooperativeEngineSupported(hwInfo)) {
|
||||
bool disallowDispatch = (engineGroupType == EngineGroupType::RenderCompute) ||
|
||||
((engineGroupType == EngineGroupType::Compute) && isRcsEnabled);
|
||||
bool applyLimitation = !isEngineInstanced &&
|
||||
(engineGroupType != EngineGroupType::CooperativeCompute);
|
||||
if (disallowDispatch) {
|
||||
EXPECT_EQ(1u, gfxCoreHelper.adjustMaxWorkGroupCount(4u, engineGroupType, rootDeviceEnvironment, isEngineInstanced));
|
||||
EXPECT_EQ(1u, gfxCoreHelper.adjustMaxWorkGroupCount(1024u, engineGroupType, rootDeviceEnvironment, isEngineInstanced));
|
||||
} else if (applyLimitation) {
|
||||
hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 4;
|
||||
EXPECT_EQ(1u, gfxCoreHelper.adjustMaxWorkGroupCount(4u, engineGroupType, rootDeviceEnvironment, isEngineInstanced));
|
||||
EXPECT_EQ(256u, gfxCoreHelper.adjustMaxWorkGroupCount(1024u, engineGroupType, rootDeviceEnvironment, isEngineInstanced));
|
||||
hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 2;
|
||||
EXPECT_EQ(2u, gfxCoreHelper.adjustMaxWorkGroupCount(4u, engineGroupType, rootDeviceEnvironment, isEngineInstanced));
|
||||
EXPECT_EQ(512u, gfxCoreHelper.adjustMaxWorkGroupCount(1024u, engineGroupType, rootDeviceEnvironment, isEngineInstanced));
|
||||
} else {
|
||||
EXPECT_EQ(4u, gfxCoreHelper.adjustMaxWorkGroupCount(4u, engineGroupType, rootDeviceEnvironment, isEngineInstanced));
|
||||
EXPECT_EQ(1024u, gfxCoreHelper.adjustMaxWorkGroupCount(1024u, engineGroupType, rootDeviceEnvironment, isEngineInstanced));
|
||||
}
|
||||
} else {
|
||||
EXPECT_EQ(4u, gfxCoreHelper.adjustMaxWorkGroupCount(4u, engineGroupType, rootDeviceEnvironment, isEngineInstanced));
|
||||
EXPECT_EQ(1024u, gfxCoreHelper.adjustMaxWorkGroupCount(1024u, engineGroupType, rootDeviceEnvironment, isEngineInstanced));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -9,3 +9,4 @@
|
||||
|
||||
HWTEST_EXCLUDE_PRODUCT(ProductHelperTest, givenProductHelperWhenAskedIfIsBlitSplitEnqueueWARequiredThenReturnFalse, IGFX_PVC);
|
||||
HWTEST_EXCLUDE_PRODUCT(BlitTests, GivenCpuAccessToLocalMemoryWhenGettingMaxBlitSizeThenValuesAreOverriden_BlitPlatforms, IGFX_PVC);
|
||||
HWTEST_EXCLUDE_PRODUCT(GfxCoreHelperTest, GivenCooperativeEngineSupportedAndNotUsedWhenAdjustMaxWorkGroupCountIsCalledThenSmallerValueIsReturned, IGFX_PVC);
|
||||
|
||||
@@ -157,4 +157,54 @@ PVCTEST_F(GfxCoreHelperTestsPvc, givenMemorySynchronizationCommandsWhenAddingSyn
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PVCTEST_F(GfxCoreHelperTestsPvc, GivenCooperativeEngineSupportedAndNotUsedWhenAdjustMaxWorkGroupCountIsCalledThenSmallerValueIsReturned) {
|
||||
|
||||
MockExecutionEnvironment mockExecutionEnvironment{};
|
||||
auto &rootDeviceEnvironment = *mockExecutionEnvironment.rootDeviceEnvironments[0];
|
||||
const auto &productHelper = rootDeviceEnvironment.getHelper<ProductHelper>();
|
||||
auto &gfxCoreHelper = rootDeviceEnvironment.getHelper<GfxCoreHelper>();
|
||||
auto &hwInfo = *rootDeviceEnvironment.getMutableHardwareInfo();
|
||||
|
||||
hwInfo.capabilityTable.defaultEngineType = aub_stream::EngineType::ENGINE_CCS;
|
||||
hwInfo.featureTable.flags.ftrRcsNode = false;
|
||||
|
||||
auto tilePartsForConcurrentKernels = PVC::numberOfpartsInTileForConcurrentKernels;
|
||||
auto passedMaxWorkGroupCount = 1024;
|
||||
|
||||
uint32_t revisions[] = {REVISION_A0, REVISION_B};
|
||||
for (auto &revision : revisions) {
|
||||
auto hwRevId = productHelper.getHwRevIdFromStepping(revision, hwInfo);
|
||||
if (hwRevId == CommonConstants::invalidStepping) {
|
||||
continue;
|
||||
}
|
||||
hwInfo.platform.usRevId = hwRevId;
|
||||
|
||||
for (auto isEngineInstanced : ::testing::Bool()) {
|
||||
for (auto isRcsEnabled : ::testing::Bool()) {
|
||||
hwInfo.featureTable.flags.ftrRcsNode = isRcsEnabled;
|
||||
for (auto engineGroupType : {EngineGroupType::RenderCompute, EngineGroupType::Compute, EngineGroupType::CooperativeCompute}) {
|
||||
if (productHelper.isCooperativeEngineSupported(hwInfo)) {
|
||||
bool disallowDispatch = (engineGroupType == EngineGroupType::RenderCompute) ||
|
||||
((engineGroupType == EngineGroupType::Compute) && isRcsEnabled);
|
||||
bool applyLimitation = !isEngineInstanced &&
|
||||
(engineGroupType != EngineGroupType::CooperativeCompute);
|
||||
if (disallowDispatch) {
|
||||
EXPECT_EQ(1u, gfxCoreHelper.adjustMaxWorkGroupCount(passedMaxWorkGroupCount, engineGroupType, rootDeviceEnvironment, isEngineInstanced));
|
||||
} else if (applyLimitation) {
|
||||
hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 4;
|
||||
EXPECT_EQ(passedMaxWorkGroupCount / tilePartsForConcurrentKernels, gfxCoreHelper.adjustMaxWorkGroupCount(passedMaxWorkGroupCount, engineGroupType, rootDeviceEnvironment, isEngineInstanced));
|
||||
hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 16;
|
||||
EXPECT_EQ(passedMaxWorkGroupCount / hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled, gfxCoreHelper.adjustMaxWorkGroupCount(passedMaxWorkGroupCount, engineGroupType, rootDeviceEnvironment, isEngineInstanced));
|
||||
} else {
|
||||
EXPECT_EQ(passedMaxWorkGroupCount / tilePartsForConcurrentKernels, gfxCoreHelper.adjustMaxWorkGroupCount(passedMaxWorkGroupCount, engineGroupType, rootDeviceEnvironment, isEngineInstanced));
|
||||
}
|
||||
} else {
|
||||
EXPECT_EQ(passedMaxWorkGroupCount / tilePartsForConcurrentKernels, gfxCoreHelper.adjustMaxWorkGroupCount(passedMaxWorkGroupCount, engineGroupType, rootDeviceEnvironment, isEngineInstanced));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace NEO
|
||||
|
||||
Reference in New Issue
Block a user