mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 14:55:24 +08:00
Limit space in tile for concurrent kernels on pvc
Related-To: NEO-7658, HSD-16016919338 Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
2d9f1caa8c
commit
83eb52591d
@@ -17,6 +17,7 @@ set(L0_FIXTURES_SOURCES
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/event_fixture.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/event_fixture.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/host_pointer_manager_fixture.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/kernel_max_cooperative_groups_count_fixture.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/module_fixture.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/module_fixture.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/memory_ipc_fixture.h
|
||||
|
||||
@@ -0,0 +1,69 @@
|
||||
/*
|
||||
* Copyright (C) 2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/helpers/gfx_core_helper.h"
|
||||
|
||||
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_module.h"
|
||||
|
||||
namespace L0 {
|
||||
namespace ult {
|
||||
|
||||
class KernelImpSuggestMaxCooperativeGroupCountTests : public Test<DeviceFixture> {
|
||||
public:
|
||||
const uint32_t numGrf = 128;
|
||||
const uint32_t simd = 8;
|
||||
const uint32_t lws[3] = {1, 1, 1};
|
||||
uint32_t usedSlm = 0;
|
||||
uint32_t usesBarriers = 0;
|
||||
|
||||
uint32_t availableThreadCount;
|
||||
uint32_t dssCount;
|
||||
uint32_t availableSlm;
|
||||
uint32_t maxBarrierCount;
|
||||
WhiteBox<::L0::KernelImmutableData> kernelInfo;
|
||||
NEO::KernelDescriptor kernelDescriptor;
|
||||
|
||||
void SetUp() override {
|
||||
Test<DeviceFixture>::SetUp();
|
||||
kernelInfo.kernelDescriptor = &kernelDescriptor;
|
||||
auto &hardwareInfo = device->getHwInfo();
|
||||
auto &helper = device->getNEODevice()->getRootDeviceEnvironment().getHelper<GfxCoreHelper>();
|
||||
availableThreadCount = helper.calculateAvailableThreadCount(hardwareInfo, numGrf);
|
||||
|
||||
dssCount = hardwareInfo.gtSystemInfo.DualSubSliceCount;
|
||||
if (dssCount == 0) {
|
||||
dssCount = hardwareInfo.gtSystemInfo.SubSliceCount;
|
||||
}
|
||||
availableSlm = dssCount * KB * hardwareInfo.capabilityTable.slmSize;
|
||||
maxBarrierCount = static_cast<uint32_t>(helper.getMaxBarrierRegisterPerSlice());
|
||||
|
||||
kernelInfo.kernelDescriptor->kernelAttributes.simdSize = simd;
|
||||
kernelInfo.kernelDescriptor->kernelAttributes.numGrfRequired = numGrf;
|
||||
}
|
||||
|
||||
uint32_t getMaxWorkGroupCount() {
|
||||
kernelInfo.kernelDescriptor->kernelAttributes.slmInlineSize = usedSlm;
|
||||
kernelInfo.kernelDescriptor->kernelAttributes.barrierCount = usesBarriers;
|
||||
|
||||
Mock<Kernel> kernel;
|
||||
kernel.kernelImmData = &kernelInfo;
|
||||
auto module = std::make_unique<ModuleImp>(device, nullptr, ModuleType::User);
|
||||
kernel.module = module.get();
|
||||
|
||||
kernel.groupSize[0] = lws[0];
|
||||
kernel.groupSize[1] = lws[1];
|
||||
kernel.groupSize[2] = lws[2];
|
||||
uint32_t totalGroupCount = 0;
|
||||
kernel.KernelImp::suggestMaxCooperativeGroupCount(&totalGroupCount, NEO::EngineGroupType::CooperativeCompute, true);
|
||||
return totalGroupCount;
|
||||
}
|
||||
};
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
@@ -15,6 +15,7 @@
|
||||
#include "shared/test/common/test_macros/test.h"
|
||||
|
||||
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/kernel_max_cooperative_groups_count_fixture.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_module.h"
|
||||
|
||||
@@ -351,70 +352,19 @@ TEST(zeKernelGetProperties, WhenGettingKernelPropertiesThenSuccessIsReturned) {
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
}
|
||||
|
||||
class KernelImpSuggestMaxCooperativeGroupCountTests : public KernelImp {
|
||||
public:
|
||||
const uint32_t numGrf = 128;
|
||||
const uint32_t simd = 8;
|
||||
const uint32_t lws[3] = {1, 1, 1};
|
||||
uint32_t usedSlm = 0;
|
||||
uint32_t usesBarriers = 0;
|
||||
|
||||
uint32_t availableThreadCount;
|
||||
uint32_t dssCount;
|
||||
uint32_t availableSlm;
|
||||
uint32_t maxBarrierCount;
|
||||
WhiteBox<::L0::KernelImmutableData> kernelInfo;
|
||||
NEO::KernelDescriptor kernelDescriptor;
|
||||
|
||||
void SetUp() override {
|
||||
KernelImp::SetUp();
|
||||
kernelInfo.kernelDescriptor = &kernelDescriptor;
|
||||
auto &hardwareInfo = device->getHwInfo();
|
||||
auto &helper = device->getNEODevice()->getRootDeviceEnvironment().getHelper<GfxCoreHelper>();
|
||||
availableThreadCount = helper.calculateAvailableThreadCount(hardwareInfo, numGrf);
|
||||
|
||||
dssCount = hardwareInfo.gtSystemInfo.DualSubSliceCount;
|
||||
if (dssCount == 0) {
|
||||
dssCount = hardwareInfo.gtSystemInfo.SubSliceCount;
|
||||
}
|
||||
availableSlm = dssCount * KB * hardwareInfo.capabilityTable.slmSize;
|
||||
maxBarrierCount = static_cast<uint32_t>(helper.getMaxBarrierRegisterPerSlice());
|
||||
|
||||
kernelInfo.kernelDescriptor->kernelAttributes.simdSize = simd;
|
||||
kernelInfo.kernelDescriptor->kernelAttributes.numGrfRequired = numGrf;
|
||||
}
|
||||
|
||||
uint32_t getMaxWorkGroupCount() {
|
||||
kernelInfo.kernelDescriptor->kernelAttributes.slmInlineSize = usedSlm;
|
||||
kernelInfo.kernelDescriptor->kernelAttributes.barrierCount = usesBarriers;
|
||||
|
||||
Mock<Kernel> kernel;
|
||||
kernel.kernelImmData = &kernelInfo;
|
||||
auto module = std::make_unique<ModuleImp>(device, nullptr, ModuleType::User);
|
||||
kernel.module = module.get();
|
||||
|
||||
kernel.groupSize[0] = lws[0];
|
||||
kernel.groupSize[1] = lws[1];
|
||||
kernel.groupSize[2] = lws[2];
|
||||
uint32_t totalGroupCount = 0;
|
||||
kernel.KernelImp::suggestMaxCooperativeGroupCount(&totalGroupCount, NEO::EngineGroupType::CooperativeCompute, true);
|
||||
return totalGroupCount;
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(KernelImpSuggestMaxCooperativeGroupCountTests, GivenNoBarriersOrSlmUsedWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithSimd) {
|
||||
HWTEST_F(KernelImpSuggestMaxCooperativeGroupCountTests, GivenNoBarriersOrSlmUsedWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithSimd) {
|
||||
auto workGroupSize = lws[0] * lws[1] * lws[2];
|
||||
auto expected = availableThreadCount / Math::divideAndRoundUp(workGroupSize, simd);
|
||||
EXPECT_EQ(expected, getMaxWorkGroupCount());
|
||||
}
|
||||
|
||||
TEST_F(KernelImpSuggestMaxCooperativeGroupCountTests, GivenBarriersWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithRegardToBarriersCount) {
|
||||
HWTEST_F(KernelImpSuggestMaxCooperativeGroupCountTests, GivenBarriersWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithRegardToBarriersCount) {
|
||||
usesBarriers = 1;
|
||||
auto expected = dssCount * (maxBarrierCount / usesBarriers);
|
||||
EXPECT_EQ(expected, getMaxWorkGroupCount());
|
||||
}
|
||||
|
||||
TEST_F(KernelImpSuggestMaxCooperativeGroupCountTests, GivenUsedSlmSizeWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithRegardToUsedSlmSize) {
|
||||
HWTEST_F(KernelImpSuggestMaxCooperativeGroupCountTests, GivenUsedSlmSizeWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithRegardToUsedSlmSize) {
|
||||
usedSlm = 64 * KB;
|
||||
auto expected = availableSlm / usedSlm;
|
||||
EXPECT_EQ(expected, getMaxWorkGroupCount());
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (C) 2022 Intel Corporation
|
||||
# Copyright (C) 2022-2023 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
@@ -9,6 +9,7 @@ if(TESTS_PVC)
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_device_pvc.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_cmdlist_pvc.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_kernel_pvc.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/test_sampler_pvc.cpp
|
||||
)
|
||||
endif()
|
||||
|
||||
@@ -0,0 +1,42 @@
|
||||
/*
|
||||
* Copyright (C) 2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/helpers/basic_math.h"
|
||||
#include "shared/source/xe_hpc_core/hw_cmds_pvc.h"
|
||||
#include "shared/test/common/test_macros/header/per_product_test_definitions.h"
|
||||
#include "shared/test/common/test_macros/hw_test_base.h"
|
||||
|
||||
#include "level_zero/core/test/unit_tests/fixtures/kernel_max_cooperative_groups_count_fixture.h"
|
||||
namespace L0 {
|
||||
namespace ult {
|
||||
|
||||
HWTEST_EXCLUDE_PRODUCT(KernelImpSuggestMaxCooperativeGroupCountTests, GivenUsedSlmSizeWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithRegardToUsedSlmSize, IGFX_PVC);
|
||||
HWTEST_EXCLUDE_PRODUCT(KernelImpSuggestMaxCooperativeGroupCountTests, GivenBarriersWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithRegardToBarriersCount, IGFX_PVC);
|
||||
HWTEST_EXCLUDE_PRODUCT(KernelImpSuggestMaxCooperativeGroupCountTests, GivenNoBarriersOrSlmUsedWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithSimd, IGFX_PVC);
|
||||
|
||||
using KernelImpSuggestMaxCooperativeGroupCountTestsPvc = KernelImpSuggestMaxCooperativeGroupCountTests;
|
||||
|
||||
PVCTEST_F(KernelImpSuggestMaxCooperativeGroupCountTestsPvc, GivenNoBarriersOrSlmUsedWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithSimd) {
|
||||
auto workGroupSize = lws[0] * lws[1] * lws[2];
|
||||
auto expected = (availableThreadCount / Math::divideAndRoundUp(workGroupSize, simd)) / PVC::numberOfpartsInTileForConcurrentKernels;
|
||||
EXPECT_EQ(expected, getMaxWorkGroupCount());
|
||||
}
|
||||
|
||||
PVCTEST_F(KernelImpSuggestMaxCooperativeGroupCountTestsPvc, GivenBarriersWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithRegardToBarriersCount) {
|
||||
usesBarriers = 1;
|
||||
auto expected = (dssCount * (maxBarrierCount / usesBarriers)) / PVC::numberOfpartsInTileForConcurrentKernels;
|
||||
EXPECT_EQ(expected, getMaxWorkGroupCount());
|
||||
}
|
||||
|
||||
PVCTEST_F(KernelImpSuggestMaxCooperativeGroupCountTestsPvc, GivenUsedSlmSizeWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithRegardToUsedSlmSize) {
|
||||
usedSlm = 64 * KB;
|
||||
auto expected = (availableSlm / usedSlm) / PVC::numberOfpartsInTileForConcurrentKernels;
|
||||
EXPECT_EQ(expected, getMaxWorkGroupCount());
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
Reference in New Issue
Block a user