Add new function to set max threads for workgroup

Resolves: NEO-4116

Change-Id: I8b9faf582c42edcb6f616a2f4662200d0d5b73d3
Signed-off-by: Katarzyna Cencelewska <katarzyna.cencelewska@intel.com>
This commit is contained in:
Katarzyna Cencelewska 2020-01-17 20:45:48 +01:00 committed by sys_ocldev
parent 4b2b1acbfd
commit 6ffbf55e43
5 changed files with 18 additions and 7 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2017-2019 Intel Corporation
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -45,4 +45,8 @@ uint32_t HwHelper::getMaxThreadsForVfe(const HardwareInfo &hwInfo) {
return hwInfo.gtSystemInfo.EUCount * threadsPerEU;
}
uint32_t HwHelper::getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const {
uint32_t numThreadsPerEU = hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount;
return maxNumEUsPerSubSlice * numThreadsPerEU;
}
} // namespace NEO

View File

@ -62,6 +62,7 @@ class HwHelper {
virtual bool getEnableLocalMemory(const HardwareInfo &hwInfo) const = 0;
virtual std::string getExtensions() const = 0;
static uint32_t getMaxThreadsForVfe(const HardwareInfo &hwInfo);
virtual uint32_t getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const;
virtual uint32_t getMetricsLibraryGenId() const = 0;
virtual uint32_t getMocsIndex(GmmHelper &gmmHelper, bool l3enabled, bool l1enabled) const = 0;
virtual bool requiresAuxResolves() const = 0;
@ -119,6 +120,8 @@ class HwHelperHw : public HwHelper {
size_t getMaxBarrierRegisterPerSlice() const override;
uint32_t getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const override;
uint32_t getComputeUnitsUsedForScratch(const HardwareInfo *pHwInfo) const override;
uint32_t getPitchAlignmentForImage(const HardwareInfo *hwInfo) override;

View File

@ -277,4 +277,8 @@ bool HwHelperHw<GfxFamily>::isForceEmuInt32DivRemSPWARequired(const HardwareInfo
return false;
}
template <typename GfxFamily>
uint32_t HwHelperHw<GfxFamily>::getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const {
return HwHelper::getMaxThreadsForWorkgroup(hwInfo, maxNumEUsPerSubSlice);
}
} // namespace NEO

View File

@ -277,16 +277,16 @@ void Device::initializeCaps() {
deviceInfo.maxNumEUsPerSubSlice = 0;
deviceInfo.maxSliceCount = systemInfo.SliceCount;
deviceInfo.numThreadsPerEU = 0;
auto simdSizeUsed = DebugManager.flags.UseMaxSimdSizeToDeduceMaxWorkgroupSize.get() ? 32 : 8;
auto simdSizeUsed = DebugManager.flags.UseMaxSimdSizeToDeduceMaxWorkgroupSize.get() ? 32u : 8u;
deviceInfo.maxNumEUsPerSubSlice = (systemInfo.EuCountPerPoolMin == 0 || hwInfo.featureTable.ftrPooledEuEnabled == 0)
? (systemInfo.EUCount / systemInfo.SubSliceCount)
: systemInfo.EuCountPerPoolMin;
deviceInfo.numThreadsPerEU = systemInfo.ThreadCount / systemInfo.EUCount;
auto maxWS = deviceInfo.maxNumEUsPerSubSlice * deviceInfo.numThreadsPerEU * simdSizeUsed;
auto maxWS = hwHelper.getMaxThreadsForWorkgroup(hwInfo, static_cast<uint32_t>(deviceInfo.maxNumEUsPerSubSlice)) * simdSizeUsed;
maxWS = Math::prevPowerOfTwo(uint32_t(maxWS));
deviceInfo.maxWorkGroupSize = std::min(uint32_t(maxWS), 1024u);
maxWS = Math::prevPowerOfTwo(maxWS);
deviceInfo.maxWorkGroupSize = std::min(maxWS, 1024u);
// calculate a maximum number of subgroups in a workgroup (for the required SIMD size)
deviceInfo.maxNumOfSubGroups = static_cast<uint32_t>(deviceInfo.maxWorkGroupSize / simdSizeUsed);

View File

@ -851,7 +851,7 @@ TEST(DeviceGetCaps, givenDisabledFtrPooledEuWhenCalculatingMaxEuPerSSThenIgnoreE
EXPECT_EQ(expectedMaxWGS, device->getDeviceInfo().maxWorkGroupSize);
}
TEST(DeviceGetCaps, givenEnabledFtrPooledEuWhenCalculatingMaxEuPerSSThenDontIgnoreEuCountPerPoolMin) {
HWTEST_F(DeviceGetCapsTest, givenEnabledFtrPooledEuWhenCalculatingMaxEuPerSSThenDontIgnoreEuCountPerPoolMin) {
HardwareInfo myHwInfo = *platformDevices[0];
GT_SYSTEM_INFO &mySysInfo = myHwInfo.gtSystemInfo;
FeatureTable &mySkuTable = myHwInfo.featureTable;
@ -884,7 +884,7 @@ TEST(DeviceGetCaps, givenDebugFlagToUseMaxSimdSizeForWkgCalculationWhenDeviceCap
EXPECT_EQ(device->getDeviceInfo().maxWorkGroupSize / 32, device->getDeviceInfo().maxNumOfSubGroups);
}
TEST(DeviceGetCaps, givenDeviceThatHasHighNumberOfExecutionUnitsWhenMaxWorkgroupSizeIsComputedItIsLimitedTo1024) {
HWTEST_F(DeviceGetCapsTest, givenDeviceThatHasHighNumberOfExecutionUnitsWhenMaxWorkgroupSizeIsComputedItIsLimitedTo1024) {
HardwareInfo myHwInfo = *platformDevices[0];
GT_SYSTEM_INFO &mySysInfo = myHwInfo.gtSystemInfo;