Revert "Use Eu per dss to callculate max work group size"

This reverts commit 86b12dd23979db12e1898013c9162cb7106e40f1.

Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
Maciej Plewka 2021-07-02 16:30:11 +00:00 committed by Compute-Runtime-Automation
parent 55374d419f
commit aed3fada28
8 changed files with 8 additions and 35 deletions

View File

@ -77,8 +77,7 @@ Kernel::Kernel(Program *programArg, const KernelInfo &kernelInfoArg, ClDevice &c
program->retainForKernel();
imageTransformer.reset(new ImageTransformer);
if (kernelInfoArg.kernelDescriptor.kernelAttributes.simdSize == 1u) {
auto deviceInfo = getDevice().getDevice().getDeviceInfo();
maxKernelWorkGroupSize = HwHelper::get(getHardwareInfo().platform.eRenderCoreFamily).getMaxThreadsForWorkgroupInDSSOrSS(getHardwareInfo(), static_cast<uint32_t>(deviceInfo.maxNumEUsPerSubSlice), static_cast<uint32_t>(deviceInfo.maxNumEUsPerDualSubSlice));
maxKernelWorkGroupSize = HwHelper::get(getHardwareInfo().platform.eRenderCoreFamily).getMaxThreadsForWorkgroup(getHardwareInfo(), static_cast<uint32_t>(getDevice().getDevice().getDeviceInfo().maxNumEUsPerSubSlice));
} else {
maxKernelWorkGroupSize = static_cast<uint32_t>(clDevice.getSharedDeviceInfo().maxWorkGroupSize);
}

View File

@ -1685,20 +1685,3 @@ HWTEST_F(QueueFamilyNameTest, givenBcsWhenGettingQueueFamilyNameThenReturnProper
HWTEST_F(QueueFamilyNameTest, givenInvalidEngineGroupWhenGettingQueueFamilyNameThenReturnEmptyName) {
verify(EngineGroupType::MaxEngineGroups, "");
}
HWCMDTEST_F(IGFX_GEN8_CORE, DeviceGetCapsTest, givenSysInfoWhenDeviceCreatedThenMaxWorkGroupCalculatedCorrectly) {
HardwareInfo myHwInfo = *defaultHwInfo;
GT_SYSTEM_INFO &mySysInfo = myHwInfo.gtSystemInfo;
PLATFORM &myPlatform = myHwInfo.platform;
mySysInfo.EUCount = 16;
mySysInfo.SubSliceCount = 4;
mySysInfo.DualSubSliceCount = 2;
mySysInfo.ThreadCount = 16 * 8;
myPlatform.usRevId = 0x4;
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&myHwInfo));
auto minSimd = 8;
auto expectedWG = (mySysInfo.ThreadCount / mySysInfo.EUCount) * (mySysInfo.EUCount / mySysInfo.SubSliceCount) * minSimd;
EXPECT_EQ(expectedWG, device->sharedDeviceInfo.maxWorkGroupSize);
}

View File

@ -2515,7 +2515,6 @@ HWTEST_F(KernelTest, givenKernelWhenDebugFlagToUseMaxSimdForCalculationsIsUsedTh
mySysInfo.EUCount = 24;
mySysInfo.SubSliceCount = 3;
mySysInfo.DualSubSliceCount = 3;
mySysInfo.ThreadCount = 24 * 7;
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&myHwInfo));
@ -3167,8 +3166,7 @@ TEST_F(KernelTests, givenKernelWithSimdEqual1WhenKernelCreatedThenMaxWorgGroupSi
std::unique_ptr<MockKernel> pKernel(new MockKernel(pProgram, *pKernelInfo, *pClDevice));
auto deviceMaxWorkGroupSize = pDevice->getDeviceInfo().maxWorkGroupSize;
auto deviceInfo = pClDevice->getDevice().getDeviceInfo();
auto maxThreadsPerWG = HwHelper::get(pKernel->getHardwareInfo().platform.eRenderCoreFamily).getMaxThreadsForWorkgroupInDSSOrSS(pKernel->getHardwareInfo(), static_cast<uint32_t>(deviceInfo.maxNumEUsPerSubSlice), static_cast<uint32_t>(deviceInfo.maxNumEUsPerDualSubSlice));
auto maxThreadsPerWG = HwHelper::get(pKernel->getHardwareInfo().platform.eRenderCoreFamily).getMaxThreadsForWorkgroup(pKernel->getHardwareInfo(), static_cast<uint32_t>(pClDevice->getDevice().getDeviceInfo().maxNumEUsPerSubSlice));
EXPECT_LT(pKernel->getMaxKernelWorkGroupSize(), deviceMaxWorkGroupSize);
EXPECT_EQ(pKernel->getMaxKernelWorkGroupSize(), maxThreadsPerWG);

View File

@ -107,14 +107,9 @@ void Device::initializeCaps() {
deviceInfo.maxNumEUsPerSubSlice = (systemInfo.EuCountPerPoolMin == 0 || hwInfo.featureTable.ftrPooledEuEnabled == 0)
? (systemInfo.EUCount / systemInfo.SubSliceCount)
: systemInfo.EuCountPerPoolMin;
deviceInfo.maxNumEUsPerDualSubSlice = (systemInfo.EuCountPerPoolMin == 0 || hwInfo.featureTable.ftrPooledEuEnabled == 0)
? (systemInfo.EUCount / systemInfo.DualSubSliceCount)
: systemInfo.EuCountPerPoolMin;
deviceInfo.numThreadsPerEU = systemInfo.ThreadCount / systemInfo.EUCount;
deviceInfo.threadsPerEUConfigs = hwHelper.getThreadsPerEUConfigs();
auto maxWS = hwHelper.getMaxThreadsForWorkgroupInDSSOrSS(hwInfo, static_cast<uint32_t>(deviceInfo.maxNumEUsPerSubSlice), static_cast<uint32_t>(deviceInfo.maxNumEUsPerDualSubSlice)) * simdSizeUsed;
auto maxWS = hwHelper.getMaxThreadsForWorkgroup(hwInfo, static_cast<uint32_t>(deviceInfo.maxNumEUsPerSubSlice)) * simdSizeUsed;
maxWS = Math::prevPowerOfTwo(maxWS);
deviceInfo.maxWorkGroupSize = std::min(maxWS, 1024u);

View File

@ -26,7 +26,6 @@ struct DeviceInfo {
size_t imageMaxArraySize;
size_t imageMaxBufferSize;
size_t maxNumEUsPerSubSlice;
size_t maxNumEUsPerDualSubSlice;
size_t maxParameterSize;
size_t maxWorkGroupSize;
size_t maxWorkItemSizes[3];

View File

@ -97,7 +97,6 @@ class HwHelper {
virtual std::string getExtensions() const = 0;
static uint32_t getMaxThreadsForVfe(const HardwareInfo &hwInfo);
virtual uint32_t getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const;
virtual uint32_t getMaxThreadsForWorkgroupInDSSOrSS(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice, uint32_t maxNumEUsPerDualSubSlice) const = 0;
virtual uint32_t getMetricsLibraryGenId() const = 0;
virtual uint32_t getMocsIndex(const GmmHelper &gmmHelper, bool l3enabled, bool l1enabled) const = 0;
virtual bool tilingAllowed(bool isSharedContext, bool isImage1d, bool forceLinearStorage) = 0;
@ -212,7 +211,7 @@ class HwHelperHw : public HwHelper {
size_t getPaddingForISAAllocation() const override;
uint32_t getMaxThreadsForWorkgroupInDSSOrSS(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice, uint32_t maxNumEUsPerDualSubSlice) const override;
uint32_t getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const override;
uint32_t getComputeUnitsUsedForScratch(const HardwareInfo *pHwInfo) const override;

View File

@ -111,7 +111,7 @@ uint32_t HwHelperHw<GfxFamily>::getPlanarYuvMaxHeight() const {
}
template <typename GfxFamily>
uint32_t HwHelperHw<GfxFamily>::getMaxThreadsForWorkgroupInDSSOrSS(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice, uint32_t maxNumEUsPerDualSubSlice) const {
uint32_t HwHelperHw<GfxFamily>::getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const {
return HwHelper::getMaxThreadsForWorkgroup(hwInfo, maxNumEUsPerSubSlice);
}

View File

@ -194,11 +194,11 @@ inline bool HwHelperHw<GfxFamily>::preferSmallWorkgroupSizeForKernel(const size_
}
template <typename GfxFamily>
inline uint32_t HwHelperHw<GfxFamily>::getMaxThreadsForWorkgroupInDSSOrSS(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice, uint32_t maxNumEUsPerDualSubSlice) const {
inline uint32_t HwHelperHw<GfxFamily>::getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const {
if (isWorkaroundRequired(REVISION_A0, REVISION_B, hwInfo)) {
return std::min(HwHelper::getMaxThreadsForWorkgroup(hwInfo, maxNumEUsPerDualSubSlice), 64u);
return std::min(HwHelper::getMaxThreadsForWorkgroup(hwInfo, maxNumEUsPerSubSlice), 64u);
}
return HwHelper::getMaxThreadsForWorkgroup(hwInfo, maxNumEUsPerDualSubSlice);
return HwHelper::getMaxThreadsForWorkgroup(hwInfo, maxNumEUsPerSubSlice);
}
} // namespace NEO