Add new function to set max threads for workgroup
Resolves: NEO-4116 Change-Id: I8b9faf582c42edcb6f616a2f4662200d0d5b73d3 Signed-off-by: Katarzyna Cencelewska <katarzyna.cencelewska@intel.com>
This commit is contained in:
parent
4b2b1acbfd
commit
6ffbf55e43
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2017-2019 Intel Corporation
|
||||
* Copyright (C) 2017-2020 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -45,4 +45,8 @@ uint32_t HwHelper::getMaxThreadsForVfe(const HardwareInfo &hwInfo) {
|
|||
return hwInfo.gtSystemInfo.EUCount * threadsPerEU;
|
||||
}
|
||||
|
||||
uint32_t HwHelper::getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const {
|
||||
uint32_t numThreadsPerEU = hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount;
|
||||
return maxNumEUsPerSubSlice * numThreadsPerEU;
|
||||
}
|
||||
} // namespace NEO
|
||||
|
|
|
@ -62,6 +62,7 @@ class HwHelper {
|
|||
virtual bool getEnableLocalMemory(const HardwareInfo &hwInfo) const = 0;
|
||||
virtual std::string getExtensions() const = 0;
|
||||
static uint32_t getMaxThreadsForVfe(const HardwareInfo &hwInfo);
|
||||
virtual uint32_t getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const;
|
||||
virtual uint32_t getMetricsLibraryGenId() const = 0;
|
||||
virtual uint32_t getMocsIndex(GmmHelper &gmmHelper, bool l3enabled, bool l1enabled) const = 0;
|
||||
virtual bool requiresAuxResolves() const = 0;
|
||||
|
@ -119,6 +120,8 @@ class HwHelperHw : public HwHelper {
|
|||
|
||||
size_t getMaxBarrierRegisterPerSlice() const override;
|
||||
|
||||
uint32_t getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const override;
|
||||
|
||||
uint32_t getComputeUnitsUsedForScratch(const HardwareInfo *pHwInfo) const override;
|
||||
|
||||
uint32_t getPitchAlignmentForImage(const HardwareInfo *hwInfo) override;
|
||||
|
|
|
@ -277,4 +277,8 @@ bool HwHelperHw<GfxFamily>::isForceEmuInt32DivRemSPWARequired(const HardwareInfo
|
|||
return false;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
uint32_t HwHelperHw<GfxFamily>::getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const {
|
||||
return HwHelper::getMaxThreadsForWorkgroup(hwInfo, maxNumEUsPerSubSlice);
|
||||
}
|
||||
} // namespace NEO
|
||||
|
|
|
@ -277,16 +277,16 @@ void Device::initializeCaps() {
|
|||
deviceInfo.maxNumEUsPerSubSlice = 0;
|
||||
deviceInfo.maxSliceCount = systemInfo.SliceCount;
|
||||
deviceInfo.numThreadsPerEU = 0;
|
||||
auto simdSizeUsed = DebugManager.flags.UseMaxSimdSizeToDeduceMaxWorkgroupSize.get() ? 32 : 8;
|
||||
auto simdSizeUsed = DebugManager.flags.UseMaxSimdSizeToDeduceMaxWorkgroupSize.get() ? 32u : 8u;
|
||||
|
||||
deviceInfo.maxNumEUsPerSubSlice = (systemInfo.EuCountPerPoolMin == 0 || hwInfo.featureTable.ftrPooledEuEnabled == 0)
|
||||
? (systemInfo.EUCount / systemInfo.SubSliceCount)
|
||||
: systemInfo.EuCountPerPoolMin;
|
||||
deviceInfo.numThreadsPerEU = systemInfo.ThreadCount / systemInfo.EUCount;
|
||||
auto maxWS = deviceInfo.maxNumEUsPerSubSlice * deviceInfo.numThreadsPerEU * simdSizeUsed;
|
||||
auto maxWS = hwHelper.getMaxThreadsForWorkgroup(hwInfo, static_cast<uint32_t>(deviceInfo.maxNumEUsPerSubSlice)) * simdSizeUsed;
|
||||
|
||||
maxWS = Math::prevPowerOfTwo(uint32_t(maxWS));
|
||||
deviceInfo.maxWorkGroupSize = std::min(uint32_t(maxWS), 1024u);
|
||||
maxWS = Math::prevPowerOfTwo(maxWS);
|
||||
deviceInfo.maxWorkGroupSize = std::min(maxWS, 1024u);
|
||||
|
||||
// calculate a maximum number of subgroups in a workgroup (for the required SIMD size)
|
||||
deviceInfo.maxNumOfSubGroups = static_cast<uint32_t>(deviceInfo.maxWorkGroupSize / simdSizeUsed);
|
||||
|
|
|
@ -851,7 +851,7 @@ TEST(DeviceGetCaps, givenDisabledFtrPooledEuWhenCalculatingMaxEuPerSSThenIgnoreE
|
|||
EXPECT_EQ(expectedMaxWGS, device->getDeviceInfo().maxWorkGroupSize);
|
||||
}
|
||||
|
||||
TEST(DeviceGetCaps, givenEnabledFtrPooledEuWhenCalculatingMaxEuPerSSThenDontIgnoreEuCountPerPoolMin) {
|
||||
HWTEST_F(DeviceGetCapsTest, givenEnabledFtrPooledEuWhenCalculatingMaxEuPerSSThenDontIgnoreEuCountPerPoolMin) {
|
||||
HardwareInfo myHwInfo = *platformDevices[0];
|
||||
GT_SYSTEM_INFO &mySysInfo = myHwInfo.gtSystemInfo;
|
||||
FeatureTable &mySkuTable = myHwInfo.featureTable;
|
||||
|
@ -884,7 +884,7 @@ TEST(DeviceGetCaps, givenDebugFlagToUseMaxSimdSizeForWkgCalculationWhenDeviceCap
|
|||
EXPECT_EQ(device->getDeviceInfo().maxWorkGroupSize / 32, device->getDeviceInfo().maxNumOfSubGroups);
|
||||
}
|
||||
|
||||
TEST(DeviceGetCaps, givenDeviceThatHasHighNumberOfExecutionUnitsWhenMaxWorkgroupSizeIsComputedItIsLimitedTo1024) {
|
||||
HWTEST_F(DeviceGetCapsTest, givenDeviceThatHasHighNumberOfExecutionUnitsWhenMaxWorkgroupSizeIsComputedItIsLimitedTo1024) {
|
||||
HardwareInfo myHwInfo = *platformDevices[0];
|
||||
GT_SYSTEM_INFO &mySysInfo = myHwInfo.gtSystemInfo;
|
||||
|
||||
|
|
Loading…
Reference in New Issue