fix: align thread group to dss size if kernel uses slm

Related-To: NEO-12133
Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
Maciej Plewka
2024-10-17 10:58:39 +00:00
committed by Compute-Runtime-Automation
parent b17fabb120
commit 9d6d6e85f1
9 changed files with 59 additions and 39 deletions

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2023 Intel Corporation
* Copyright (C) 2020-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -13,26 +13,18 @@
namespace NEO {
template <typename GfxFamily>
class MockGfxCoreHelperWithFenceAllocation : public GfxCoreHelperHw<GfxFamily> {
class MockGfxCoreHelperHw : public GfxCoreHelperHw<GfxFamily> {
public:
bool isFenceAllocationRequired(const HardwareInfo &hwInfo) const override {
return true;
}
};
template <typename GfxFamily>
class MockGfxCoreHelperWithLocalMemory : public GfxCoreHelperHw<GfxFamily> {
public:
bool isLocalMemoryEnabled(const HardwareInfo &hwInfo) const override {
return true;
}
};
template <typename GfxFamily>
struct MockGfxCoreHelperHwWithSetIsLockable : public GfxCoreHelperHw<GfxFamily> {
void setExtraAllocationData(AllocationData &allocationData, const AllocationProperties &properties, const RootDeviceEnvironment &rootDeviceEnvironment) const override {
allocationData.storageInfo.isLockable = setIsLockable;
}
void alignThreadGroupCountToDssSize(uint32_t &threadCount, uint32_t dssCount, uint32_t threadsPerDss, uint32_t threadGroupSize) const override {
alignThreadGroupCountToDssSizeCalledTimes++;
}
mutable uint32_t alignThreadGroupCountToDssSizeCalledTimes = 0;
bool setIsLockable = true;
};
} // namespace NEO

View File

@@ -12,9 +12,11 @@
#include "shared/test/common/fixtures/device_fixture.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/helpers/mock_product_helper_hw.h"
#include "shared/test/common/helpers/raii_gfx_core_helper.h"
#include "shared/test/common/helpers/raii_product_helper.h"
#include "shared/test/common/mocks/mock_device.h"
#include "shared/test/common/mocks/mock_execution_environment.h"
#include "shared/test/common/mocks/mock_gfx_core_helper.h"
#include "shared/test/common/test_macros/hw_test.h"
#include "shared/test/common/test_macros/test.h"
@@ -107,16 +109,42 @@ HWTEST2_F(KernelHelperMaxWorkGroupsTests, GivenBarriersWhenCalculatingMaxWorkGro
EXPECT_EQ(expected, getMaxWorkGroupCount());
}
TEST_F(KernelHelperMaxWorkGroupsTests, GivenUsedSlmSizeWhenCalculatingMaxWorkGroupsCountThenResultIsCalculatedWithRegardToUsedSlmSize) {
HWTEST2_F(KernelHelperMaxWorkGroupsTests, GivenUsedSlmSizeWhenCalculatingMaxWorkGroupsCountThenResultIsCalculatedWithRegardToUsedSlmSize, MatchAny) {
NEO::RAIIProductHelperFactory<MockProductHelperHw<productFamily>> raii(*rootDeviceEnvironment);
raii.mockProductHelper->isCooperativeEngineSupportedValue = false;
usedSlm = 0;
auto baseCount = getMaxWorkGroupCount();
lws[0] = 1;
lws[1] = 0;
lws[2] = 0;
workDim = 1;
usedSlm = 4 * MemoryConstants::kiloByte;
auto expected = std::min(baseCount, availableSlm / usedSlm);
auto expected = availableSlm / usedSlm;
EXPECT_EQ(expected, getMaxWorkGroupCount());
}
HWTEST_F(KernelHelperMaxWorkGroupsTests, givenUsedSlmSizeWhenCalculatingMaxWorkGroupsCountThenAlignToDssSizeCalled) {
auto raiiFactory = RAIIGfxCoreHelperFactory<MockGfxCoreHelperHw<FamilyType>>(*rootDeviceEnvironment);
usedSlm = 4 * MemoryConstants::kiloByte;
getMaxWorkGroupCount();
EXPECT_EQ(raiiFactory.mockGfxCoreHelper->alignThreadGroupCountToDssSizeCalledTimes, 1u);
}
HWTEST_F(KernelHelperMaxWorkGroupsTests, givenBarriersWhenCalculatingMaxWorkGroupsCountThenAlignToDssSizeCalled) {
auto raiiFactory = RAIIGfxCoreHelperFactory<MockGfxCoreHelperHw<FamilyType>>(*rootDeviceEnvironment);
numberOfBarriers = 1;
getMaxWorkGroupCount();
EXPECT_EQ(raiiFactory.mockGfxCoreHelper->alignThreadGroupCountToDssSizeCalledTimes, 1u);
}
HWTEST_F(KernelHelperMaxWorkGroupsTests, givenZeroBarriersAndSlmNotUsedWhenCalculatingMaxWorkGroupsCountThenAlignToDssSizeNotCalled) {
auto raiiFactory = RAIIGfxCoreHelperFactory<MockGfxCoreHelperHw<FamilyType>>(*rootDeviceEnvironment);
numberOfBarriers = 0;
usedSlm = 0;
getMaxWorkGroupCount();
EXPECT_EQ(raiiFactory.mockGfxCoreHelper->alignThreadGroupCountToDssSizeCalledTimes, 0u);
}
TEST_F(KernelHelperMaxWorkGroupsTests, GivenVariousValuesWhenCalculatingMaxWorkGroupsCountThenLowestResultIsAlwaysReturned) {
auto &helper = rootDeviceEnvironment->getHelper<NEO::GfxCoreHelper>();