mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-10 23:56:36 +08:00
fix: align thread group to dss size if kernel uses slm
Related-To: NEO-12133 Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
b17fabb120
commit
9d6d6e85f1
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
* Copyright (C) 2020-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -13,26 +13,18 @@
|
||||
namespace NEO {
|
||||
|
||||
template <typename GfxFamily>
|
||||
class MockGfxCoreHelperWithFenceAllocation : public GfxCoreHelperHw<GfxFamily> {
|
||||
class MockGfxCoreHelperHw : public GfxCoreHelperHw<GfxFamily> {
|
||||
public:
|
||||
bool isFenceAllocationRequired(const HardwareInfo &hwInfo) const override {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename GfxFamily>
|
||||
class MockGfxCoreHelperWithLocalMemory : public GfxCoreHelperHw<GfxFamily> {
|
||||
public:
|
||||
bool isLocalMemoryEnabled(const HardwareInfo &hwInfo) const override {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename GfxFamily>
|
||||
struct MockGfxCoreHelperHwWithSetIsLockable : public GfxCoreHelperHw<GfxFamily> {
|
||||
void setExtraAllocationData(AllocationData &allocationData, const AllocationProperties &properties, const RootDeviceEnvironment &rootDeviceEnvironment) const override {
|
||||
allocationData.storageInfo.isLockable = setIsLockable;
|
||||
}
|
||||
void alignThreadGroupCountToDssSize(uint32_t &threadCount, uint32_t dssCount, uint32_t threadsPerDss, uint32_t threadGroupSize) const override {
|
||||
alignThreadGroupCountToDssSizeCalledTimes++;
|
||||
}
|
||||
mutable uint32_t alignThreadGroupCountToDssSizeCalledTimes = 0;
|
||||
bool setIsLockable = true;
|
||||
};
|
||||
} // namespace NEO
|
||||
|
||||
@@ -12,9 +12,11 @@
|
||||
#include "shared/test/common/fixtures/device_fixture.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/helpers/mock_product_helper_hw.h"
|
||||
#include "shared/test/common/helpers/raii_gfx_core_helper.h"
|
||||
#include "shared/test/common/helpers/raii_product_helper.h"
|
||||
#include "shared/test/common/mocks/mock_device.h"
|
||||
#include "shared/test/common/mocks/mock_execution_environment.h"
|
||||
#include "shared/test/common/mocks/mock_gfx_core_helper.h"
|
||||
#include "shared/test/common/test_macros/hw_test.h"
|
||||
#include "shared/test/common/test_macros/test.h"
|
||||
|
||||
@@ -107,16 +109,42 @@ HWTEST2_F(KernelHelperMaxWorkGroupsTests, GivenBarriersWhenCalculatingMaxWorkGro
|
||||
EXPECT_EQ(expected, getMaxWorkGroupCount());
|
||||
}
|
||||
|
||||
TEST_F(KernelHelperMaxWorkGroupsTests, GivenUsedSlmSizeWhenCalculatingMaxWorkGroupsCountThenResultIsCalculatedWithRegardToUsedSlmSize) {
|
||||
HWTEST2_F(KernelHelperMaxWorkGroupsTests, GivenUsedSlmSizeWhenCalculatingMaxWorkGroupsCountThenResultIsCalculatedWithRegardToUsedSlmSize, MatchAny) {
|
||||
NEO::RAIIProductHelperFactory<MockProductHelperHw<productFamily>> raii(*rootDeviceEnvironment);
|
||||
raii.mockProductHelper->isCooperativeEngineSupportedValue = false;
|
||||
usedSlm = 0;
|
||||
auto baseCount = getMaxWorkGroupCount();
|
||||
lws[0] = 1;
|
||||
lws[1] = 0;
|
||||
lws[2] = 0;
|
||||
workDim = 1;
|
||||
|
||||
usedSlm = 4 * MemoryConstants::kiloByte;
|
||||
|
||||
auto expected = std::min(baseCount, availableSlm / usedSlm);
|
||||
auto expected = availableSlm / usedSlm;
|
||||
EXPECT_EQ(expected, getMaxWorkGroupCount());
|
||||
}
|
||||
|
||||
HWTEST_F(KernelHelperMaxWorkGroupsTests, givenUsedSlmSizeWhenCalculatingMaxWorkGroupsCountThenAlignToDssSizeCalled) {
|
||||
auto raiiFactory = RAIIGfxCoreHelperFactory<MockGfxCoreHelperHw<FamilyType>>(*rootDeviceEnvironment);
|
||||
usedSlm = 4 * MemoryConstants::kiloByte;
|
||||
getMaxWorkGroupCount();
|
||||
EXPECT_EQ(raiiFactory.mockGfxCoreHelper->alignThreadGroupCountToDssSizeCalledTimes, 1u);
|
||||
}
|
||||
HWTEST_F(KernelHelperMaxWorkGroupsTests, givenBarriersWhenCalculatingMaxWorkGroupsCountThenAlignToDssSizeCalled) {
|
||||
auto raiiFactory = RAIIGfxCoreHelperFactory<MockGfxCoreHelperHw<FamilyType>>(*rootDeviceEnvironment);
|
||||
numberOfBarriers = 1;
|
||||
getMaxWorkGroupCount();
|
||||
EXPECT_EQ(raiiFactory.mockGfxCoreHelper->alignThreadGroupCountToDssSizeCalledTimes, 1u);
|
||||
}
|
||||
|
||||
HWTEST_F(KernelHelperMaxWorkGroupsTests, givenZeroBarriersAndSlmNotUsedWhenCalculatingMaxWorkGroupsCountThenAlignToDssSizeNotCalled) {
|
||||
auto raiiFactory = RAIIGfxCoreHelperFactory<MockGfxCoreHelperHw<FamilyType>>(*rootDeviceEnvironment);
|
||||
numberOfBarriers = 0;
|
||||
usedSlm = 0;
|
||||
getMaxWorkGroupCount();
|
||||
EXPECT_EQ(raiiFactory.mockGfxCoreHelper->alignThreadGroupCountToDssSizeCalledTimes, 0u);
|
||||
}
|
||||
|
||||
TEST_F(KernelHelperMaxWorkGroupsTests, GivenVariousValuesWhenCalculatingMaxWorkGroupsCountThenLowestResultIsAlwaysReturned) {
|
||||
auto &helper = rootDeviceEnvironment->getHelper<NEO::GfxCoreHelper>();
|
||||
|
||||
|
||||
Reference in New Issue
Block a user