Use DSS count to callculate max work group size
This reverts commit 89b7a4c9cc0a69cfd2eb5a0bd138356f34c52f0d. Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
parent
233aab88e7
commit
bc121c09fa
|
@ -77,7 +77,8 @@ Kernel::Kernel(Program *programArg, const KernelInfo &kernelInfoArg, ClDevice &c
|
|||
program->retainForKernel();
|
||||
imageTransformer.reset(new ImageTransformer);
|
||||
if (kernelInfoArg.kernelDescriptor.kernelAttributes.simdSize == 1u) {
|
||||
maxKernelWorkGroupSize = HwHelper::get(getHardwareInfo().platform.eRenderCoreFamily).getMaxThreadsForWorkgroup(getHardwareInfo(), static_cast<uint32_t>(getDevice().getDevice().getDeviceInfo().maxNumEUsPerSubSlice));
|
||||
auto deviceInfo = getDevice().getDevice().getDeviceInfo();
|
||||
maxKernelWorkGroupSize = HwHelper::get(getHardwareInfo().platform.eRenderCoreFamily).getMaxThreadsForWorkgroupInDSSOrSS(getHardwareInfo(), static_cast<uint32_t>(deviceInfo.maxNumEUsPerSubSlice), static_cast<uint32_t>(deviceInfo.maxNumEUsPerDualSubSlice));
|
||||
} else {
|
||||
maxKernelWorkGroupSize = static_cast<uint32_t>(clDevice.getSharedDeviceInfo().maxWorkGroupSize);
|
||||
}
|
||||
|
|
|
@ -1685,3 +1685,50 @@ HWTEST_F(QueueFamilyNameTest, givenBcsWhenGettingQueueFamilyNameThenReturnProper
|
|||
HWTEST_F(QueueFamilyNameTest, givenInvalidEngineGroupWhenGettingQueueFamilyNameThenReturnEmptyName) {
|
||||
verify(EngineGroupType::MaxEngineGroups, "");
|
||||
}
|
||||
HWCMDTEST_F(IGFX_GEN8_CORE, DeviceGetCapsTest, givenSysInfoWhenDeviceCreatedThenMaxWorkGroupCalculatedCorrectly) {
|
||||
HardwareInfo myHwInfo = *defaultHwInfo;
|
||||
GT_SYSTEM_INFO &mySysInfo = myHwInfo.gtSystemInfo;
|
||||
PLATFORM &myPlatform = myHwInfo.platform;
|
||||
|
||||
mySysInfo.EUCount = 16;
|
||||
mySysInfo.SubSliceCount = 4;
|
||||
mySysInfo.DualSubSliceCount = 2;
|
||||
mySysInfo.ThreadCount = 16 * 8;
|
||||
myPlatform.usRevId = 0x4;
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&myHwInfo));
|
||||
auto minSimd = 8;
|
||||
|
||||
auto expectedWG = (mySysInfo.ThreadCount / mySysInfo.EUCount) * (mySysInfo.EUCount / mySysInfo.SubSliceCount) * minSimd;
|
||||
|
||||
EXPECT_EQ(expectedWG, device->sharedDeviceInfo.maxWorkGroupSize);
|
||||
}
|
||||
|
||||
HWTEST_F(DeviceGetCapsTest, givenDSSDifferentThanZeroWhenDeviceCreatedThenDualSubSliceCountIsDifferentThanSubSliceCount) {
|
||||
HardwareInfo myHwInfo = *defaultHwInfo;
|
||||
GT_SYSTEM_INFO &mySysInfo = myHwInfo.gtSystemInfo;
|
||||
PLATFORM &myPlatform = myHwInfo.platform;
|
||||
|
||||
mySysInfo.EUCount = 16;
|
||||
mySysInfo.SubSliceCount = 4;
|
||||
mySysInfo.DualSubSliceCount = 2;
|
||||
mySysInfo.ThreadCount = 16 * 8;
|
||||
myPlatform.usRevId = 0x4;
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&myHwInfo));
|
||||
|
||||
EXPECT_NE(device->sharedDeviceInfo.maxNumEUsPerSubSlice, device->sharedDeviceInfo.maxNumEUsPerDualSubSlice);
|
||||
}
|
||||
|
||||
HWTEST_F(DeviceGetCapsTest, givenDSSCountEqualZeroWhenDeviceCreatedThenMaxEuPerDSSEqualMaxEuPerSS) {
|
||||
HardwareInfo myHwInfo = *defaultHwInfo;
|
||||
GT_SYSTEM_INFO &mySysInfo = myHwInfo.gtSystemInfo;
|
||||
PLATFORM &myPlatform = myHwInfo.platform;
|
||||
|
||||
mySysInfo.EUCount = 16;
|
||||
mySysInfo.SubSliceCount = 4;
|
||||
mySysInfo.DualSubSliceCount = 0;
|
||||
mySysInfo.ThreadCount = 16 * 8;
|
||||
myPlatform.usRevId = 0x4;
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&myHwInfo));
|
||||
|
||||
EXPECT_EQ(device->sharedDeviceInfo.maxNumEUsPerSubSlice, device->sharedDeviceInfo.maxNumEUsPerDualSubSlice);
|
||||
}
|
||||
|
|
|
@ -2515,6 +2515,7 @@ HWTEST_F(KernelTest, givenKernelWhenDebugFlagToUseMaxSimdForCalculationsIsUsedTh
|
|||
|
||||
mySysInfo.EUCount = 24;
|
||||
mySysInfo.SubSliceCount = 3;
|
||||
mySysInfo.DualSubSliceCount = 3;
|
||||
mySysInfo.ThreadCount = 24 * 7;
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&myHwInfo));
|
||||
|
||||
|
@ -3166,7 +3167,8 @@ TEST_F(KernelTests, givenKernelWithSimdEqual1WhenKernelCreatedThenMaxWorgGroupSi
|
|||
std::unique_ptr<MockKernel> pKernel(new MockKernel(pProgram, *pKernelInfo, *pClDevice));
|
||||
|
||||
auto deviceMaxWorkGroupSize = pDevice->getDeviceInfo().maxWorkGroupSize;
|
||||
auto maxThreadsPerWG = HwHelper::get(pKernel->getHardwareInfo().platform.eRenderCoreFamily).getMaxThreadsForWorkgroup(pKernel->getHardwareInfo(), static_cast<uint32_t>(pClDevice->getDevice().getDeviceInfo().maxNumEUsPerSubSlice));
|
||||
auto deviceInfo = pClDevice->getDevice().getDeviceInfo();
|
||||
auto maxThreadsPerWG = HwHelper::get(pKernel->getHardwareInfo().platform.eRenderCoreFamily).getMaxThreadsForWorkgroupInDSSOrSS(pKernel->getHardwareInfo(), static_cast<uint32_t>(deviceInfo.maxNumEUsPerSubSlice), static_cast<uint32_t>(deviceInfo.maxNumEUsPerDualSubSlice));
|
||||
|
||||
EXPECT_LT(pKernel->getMaxKernelWorkGroupSize(), deviceMaxWorkGroupSize);
|
||||
EXPECT_EQ(pKernel->getMaxKernelWorkGroupSize(), maxThreadsPerWG);
|
||||
|
|
|
@ -107,9 +107,17 @@ void Device::initializeCaps() {
|
|||
deviceInfo.maxNumEUsPerSubSlice = (systemInfo.EuCountPerPoolMin == 0 || hwInfo.featureTable.ftrPooledEuEnabled == 0)
|
||||
? (systemInfo.EUCount / systemInfo.SubSliceCount)
|
||||
: systemInfo.EuCountPerPoolMin;
|
||||
if (systemInfo.DualSubSliceCount != 0) {
|
||||
deviceInfo.maxNumEUsPerDualSubSlice = (systemInfo.EuCountPerPoolMin == 0 || hwInfo.featureTable.ftrPooledEuEnabled == 0)
|
||||
? (systemInfo.EUCount / systemInfo.DualSubSliceCount)
|
||||
: systemInfo.EuCountPerPoolMin;
|
||||
|
||||
} else {
|
||||
deviceInfo.maxNumEUsPerDualSubSlice = deviceInfo.maxNumEUsPerSubSlice;
|
||||
}
|
||||
deviceInfo.numThreadsPerEU = systemInfo.ThreadCount / systemInfo.EUCount;
|
||||
deviceInfo.threadsPerEUConfigs = hwHelper.getThreadsPerEUConfigs();
|
||||
auto maxWS = hwHelper.getMaxThreadsForWorkgroup(hwInfo, static_cast<uint32_t>(deviceInfo.maxNumEUsPerSubSlice)) * simdSizeUsed;
|
||||
auto maxWS = hwHelper.getMaxThreadsForWorkgroupInDSSOrSS(hwInfo, static_cast<uint32_t>(deviceInfo.maxNumEUsPerSubSlice), static_cast<uint32_t>(deviceInfo.maxNumEUsPerDualSubSlice)) * simdSizeUsed;
|
||||
|
||||
maxWS = Math::prevPowerOfTwo(maxWS);
|
||||
deviceInfo.maxWorkGroupSize = std::min(maxWS, 1024u);
|
||||
|
|
|
@ -26,6 +26,7 @@ struct DeviceInfo {
|
|||
size_t imageMaxArraySize;
|
||||
size_t imageMaxBufferSize;
|
||||
size_t maxNumEUsPerSubSlice;
|
||||
size_t maxNumEUsPerDualSubSlice;
|
||||
size_t maxParameterSize;
|
||||
size_t maxWorkGroupSize;
|
||||
size_t maxWorkItemSizes[3];
|
||||
|
|
|
@ -97,6 +97,7 @@ class HwHelper {
|
|||
virtual std::string getExtensions() const = 0;
|
||||
static uint32_t getMaxThreadsForVfe(const HardwareInfo &hwInfo);
|
||||
virtual uint32_t getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const;
|
||||
virtual uint32_t getMaxThreadsForWorkgroupInDSSOrSS(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice, uint32_t maxNumEUsPerDualSubSlice) const = 0;
|
||||
virtual uint32_t getMetricsLibraryGenId() const = 0;
|
||||
virtual uint32_t getMocsIndex(const GmmHelper &gmmHelper, bool l3enabled, bool l1enabled) const = 0;
|
||||
virtual bool tilingAllowed(bool isSharedContext, bool isImage1d, bool forceLinearStorage) = 0;
|
||||
|
@ -211,7 +212,7 @@ class HwHelperHw : public HwHelper {
|
|||
|
||||
size_t getPaddingForISAAllocation() const override;
|
||||
|
||||
uint32_t getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const override;
|
||||
uint32_t getMaxThreadsForWorkgroupInDSSOrSS(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice, uint32_t maxNumEUsPerDualSubSlice) const override;
|
||||
|
||||
uint32_t getComputeUnitsUsedForScratch(const HardwareInfo *pHwInfo) const override;
|
||||
|
||||
|
|
|
@ -111,7 +111,7 @@ uint32_t HwHelperHw<GfxFamily>::getPlanarYuvMaxHeight() const {
|
|||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
uint32_t HwHelperHw<GfxFamily>::getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const {
|
||||
uint32_t HwHelperHw<GfxFamily>::getMaxThreadsForWorkgroupInDSSOrSS(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice, uint32_t maxNumEUsPerDualSubSlice) const {
|
||||
return HwHelper::getMaxThreadsForWorkgroup(hwInfo, maxNumEUsPerSubSlice);
|
||||
}
|
||||
|
||||
|
|
|
@ -194,11 +194,11 @@ inline bool HwHelperHw<GfxFamily>::preferSmallWorkgroupSizeForKernel(const size_
|
|||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
inline uint32_t HwHelperHw<GfxFamily>::getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const {
|
||||
inline uint32_t HwHelperHw<GfxFamily>::getMaxThreadsForWorkgroupInDSSOrSS(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice, uint32_t maxNumEUsPerDualSubSlice) const {
|
||||
if (isWorkaroundRequired(REVISION_A0, REVISION_B, hwInfo)) {
|
||||
return std::min(HwHelper::getMaxThreadsForWorkgroup(hwInfo, maxNumEUsPerSubSlice), 64u);
|
||||
return std::min(HwHelper::getMaxThreadsForWorkgroup(hwInfo, maxNumEUsPerDualSubSlice), 64u);
|
||||
}
|
||||
return HwHelper::getMaxThreadsForWorkgroup(hwInfo, maxNumEUsPerSubSlice);
|
||||
return HwHelper::getMaxThreadsForWorkgroup(hwInfo, maxNumEUsPerDualSubSlice);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
|
Loading…
Reference in New Issue