From aed3fada287ce1d87430ab77bfe39df58a9da8fe Mon Sep 17 00:00:00 2001 From: Maciej Plewka Date: Fri, 2 Jul 2021 16:30:11 +0000 Subject: [PATCH] Revert "Use Eu per dss to callculate max work group size" This reverts commit 86b12dd23979db12e1898013c9162cb7106e40f1. Signed-off-by: Maciej Plewka --- opencl/source/kernel/kernel.cpp | 3 +-- .../test/unit_test/device/device_caps_tests.cpp | 17 ----------------- opencl/test/unit_test/kernel/kernel_tests.cpp | 4 +--- shared/source/device/device_caps.cpp | 7 +------ shared/source/device/device_info.h | 1 - shared/source/helpers/hw_helper.h | 3 +-- shared/source/helpers/hw_helper_bdw_plus.inl | 2 +- shared/source/helpers/hw_helper_xehp_plus.inl | 6 +++--- 8 files changed, 8 insertions(+), 35 deletions(-) diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index b75abcc7e2..0cecffd3e7 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -77,8 +77,7 @@ Kernel::Kernel(Program *programArg, const KernelInfo &kernelInfoArg, ClDevice &c program->retainForKernel(); imageTransformer.reset(new ImageTransformer); if (kernelInfoArg.kernelDescriptor.kernelAttributes.simdSize == 1u) { - auto deviceInfo = getDevice().getDevice().getDeviceInfo(); - maxKernelWorkGroupSize = HwHelper::get(getHardwareInfo().platform.eRenderCoreFamily).getMaxThreadsForWorkgroupInDSSOrSS(getHardwareInfo(), static_cast(deviceInfo.maxNumEUsPerSubSlice), static_cast(deviceInfo.maxNumEUsPerDualSubSlice)); + maxKernelWorkGroupSize = HwHelper::get(getHardwareInfo().platform.eRenderCoreFamily).getMaxThreadsForWorkgroup(getHardwareInfo(), static_cast(getDevice().getDevice().getDeviceInfo().maxNumEUsPerSubSlice)); } else { maxKernelWorkGroupSize = static_cast(clDevice.getSharedDeviceInfo().maxWorkGroupSize); } diff --git a/opencl/test/unit_test/device/device_caps_tests.cpp b/opencl/test/unit_test/device/device_caps_tests.cpp index b7c6a61e88..92fe1c9e0b 100644 --- a/opencl/test/unit_test/device/device_caps_tests.cpp +++ b/opencl/test/unit_test/device/device_caps_tests.cpp @@ -1685,20 +1685,3 @@ HWTEST_F(QueueFamilyNameTest, givenBcsWhenGettingQueueFamilyNameThenReturnProper HWTEST_F(QueueFamilyNameTest, givenInvalidEngineGroupWhenGettingQueueFamilyNameThenReturnEmptyName) { verify(EngineGroupType::MaxEngineGroups, ""); } -HWCMDTEST_F(IGFX_GEN8_CORE, DeviceGetCapsTest, givenSysInfoWhenDeviceCreatedThenMaxWorkGroupCalculatedCorrectly) { - HardwareInfo myHwInfo = *defaultHwInfo; - GT_SYSTEM_INFO &mySysInfo = myHwInfo.gtSystemInfo; - PLATFORM &myPlatform = myHwInfo.platform; - - mySysInfo.EUCount = 16; - mySysInfo.SubSliceCount = 4; - mySysInfo.DualSubSliceCount = 2; - mySysInfo.ThreadCount = 16 * 8; - myPlatform.usRevId = 0x4; - auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&myHwInfo)); - auto minSimd = 8; - - auto expectedWG = (mySysInfo.ThreadCount / mySysInfo.EUCount) * (mySysInfo.EUCount / mySysInfo.SubSliceCount) * minSimd; - - EXPECT_EQ(expectedWG, device->sharedDeviceInfo.maxWorkGroupSize); -} \ No newline at end of file diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index 880c00341b..2a6ad3f9fd 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -2515,7 +2515,6 @@ HWTEST_F(KernelTest, givenKernelWhenDebugFlagToUseMaxSimdForCalculationsIsUsedTh mySysInfo.EUCount = 24; mySysInfo.SubSliceCount = 3; - mySysInfo.DualSubSliceCount = 3; mySysInfo.ThreadCount = 24 * 7; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&myHwInfo)); @@ -3167,8 +3166,7 @@ TEST_F(KernelTests, givenKernelWithSimdEqual1WhenKernelCreatedThenMaxWorgGroupSi std::unique_ptr pKernel(new MockKernel(pProgram, *pKernelInfo, *pClDevice)); auto deviceMaxWorkGroupSize = pDevice->getDeviceInfo().maxWorkGroupSize; - auto deviceInfo = pClDevice->getDevice().getDeviceInfo(); - auto maxThreadsPerWG = HwHelper::get(pKernel->getHardwareInfo().platform.eRenderCoreFamily).getMaxThreadsForWorkgroupInDSSOrSS(pKernel->getHardwareInfo(), static_cast(deviceInfo.maxNumEUsPerSubSlice), static_cast(deviceInfo.maxNumEUsPerDualSubSlice)); + auto maxThreadsPerWG = HwHelper::get(pKernel->getHardwareInfo().platform.eRenderCoreFamily).getMaxThreadsForWorkgroup(pKernel->getHardwareInfo(), static_cast(pClDevice->getDevice().getDeviceInfo().maxNumEUsPerSubSlice)); EXPECT_LT(pKernel->getMaxKernelWorkGroupSize(), deviceMaxWorkGroupSize); EXPECT_EQ(pKernel->getMaxKernelWorkGroupSize(), maxThreadsPerWG); diff --git a/shared/source/device/device_caps.cpp b/shared/source/device/device_caps.cpp index 7673f2f26f..188335aa97 100644 --- a/shared/source/device/device_caps.cpp +++ b/shared/source/device/device_caps.cpp @@ -107,14 +107,9 @@ void Device::initializeCaps() { deviceInfo.maxNumEUsPerSubSlice = (systemInfo.EuCountPerPoolMin == 0 || hwInfo.featureTable.ftrPooledEuEnabled == 0) ? (systemInfo.EUCount / systemInfo.SubSliceCount) : systemInfo.EuCountPerPoolMin; - - deviceInfo.maxNumEUsPerDualSubSlice = (systemInfo.EuCountPerPoolMin == 0 || hwInfo.featureTable.ftrPooledEuEnabled == 0) - ? (systemInfo.EUCount / systemInfo.DualSubSliceCount) - : systemInfo.EuCountPerPoolMin; - deviceInfo.numThreadsPerEU = systemInfo.ThreadCount / systemInfo.EUCount; deviceInfo.threadsPerEUConfigs = hwHelper.getThreadsPerEUConfigs(); - auto maxWS = hwHelper.getMaxThreadsForWorkgroupInDSSOrSS(hwInfo, static_cast(deviceInfo.maxNumEUsPerSubSlice), static_cast(deviceInfo.maxNumEUsPerDualSubSlice)) * simdSizeUsed; + auto maxWS = hwHelper.getMaxThreadsForWorkgroup(hwInfo, static_cast(deviceInfo.maxNumEUsPerSubSlice)) * simdSizeUsed; maxWS = Math::prevPowerOfTwo(maxWS); deviceInfo.maxWorkGroupSize = std::min(maxWS, 1024u); diff --git a/shared/source/device/device_info.h b/shared/source/device/device_info.h index 901d16a375..8670f69856 100644 --- a/shared/source/device/device_info.h +++ b/shared/source/device/device_info.h @@ -26,7 +26,6 @@ struct DeviceInfo { size_t imageMaxArraySize; size_t imageMaxBufferSize; size_t maxNumEUsPerSubSlice; - size_t maxNumEUsPerDualSubSlice; size_t maxParameterSize; size_t maxWorkGroupSize; size_t maxWorkItemSizes[3]; diff --git a/shared/source/helpers/hw_helper.h b/shared/source/helpers/hw_helper.h index 211f55ca40..f8c7d5a468 100644 --- a/shared/source/helpers/hw_helper.h +++ b/shared/source/helpers/hw_helper.h @@ -97,7 +97,6 @@ class HwHelper { virtual std::string getExtensions() const = 0; static uint32_t getMaxThreadsForVfe(const HardwareInfo &hwInfo); virtual uint32_t getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const; - virtual uint32_t getMaxThreadsForWorkgroupInDSSOrSS(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice, uint32_t maxNumEUsPerDualSubSlice) const = 0; virtual uint32_t getMetricsLibraryGenId() const = 0; virtual uint32_t getMocsIndex(const GmmHelper &gmmHelper, bool l3enabled, bool l1enabled) const = 0; virtual bool tilingAllowed(bool isSharedContext, bool isImage1d, bool forceLinearStorage) = 0; @@ -212,7 +211,7 @@ class HwHelperHw : public HwHelper { size_t getPaddingForISAAllocation() const override; - uint32_t getMaxThreadsForWorkgroupInDSSOrSS(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice, uint32_t maxNumEUsPerDualSubSlice) const override; + uint32_t getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const override; uint32_t getComputeUnitsUsedForScratch(const HardwareInfo *pHwInfo) const override; diff --git a/shared/source/helpers/hw_helper_bdw_plus.inl b/shared/source/helpers/hw_helper_bdw_plus.inl index a9bbf7f02d..36e5bcb187 100644 --- a/shared/source/helpers/hw_helper_bdw_plus.inl +++ b/shared/source/helpers/hw_helper_bdw_plus.inl @@ -111,7 +111,7 @@ uint32_t HwHelperHw::getPlanarYuvMaxHeight() const { } template -uint32_t HwHelperHw::getMaxThreadsForWorkgroupInDSSOrSS(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice, uint32_t maxNumEUsPerDualSubSlice) const { +uint32_t HwHelperHw::getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const { return HwHelper::getMaxThreadsForWorkgroup(hwInfo, maxNumEUsPerSubSlice); } diff --git a/shared/source/helpers/hw_helper_xehp_plus.inl b/shared/source/helpers/hw_helper_xehp_plus.inl index 0e685d7706..bee46219b9 100644 --- a/shared/source/helpers/hw_helper_xehp_plus.inl +++ b/shared/source/helpers/hw_helper_xehp_plus.inl @@ -194,11 +194,11 @@ inline bool HwHelperHw::preferSmallWorkgroupSizeForKernel(const size_ } template -inline uint32_t HwHelperHw::getMaxThreadsForWorkgroupInDSSOrSS(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice, uint32_t maxNumEUsPerDualSubSlice) const { +inline uint32_t HwHelperHw::getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const { if (isWorkaroundRequired(REVISION_A0, REVISION_B, hwInfo)) { - return std::min(HwHelper::getMaxThreadsForWorkgroup(hwInfo, maxNumEUsPerDualSubSlice), 64u); + return std::min(HwHelper::getMaxThreadsForWorkgroup(hwInfo, maxNumEUsPerSubSlice), 64u); } - return HwHelper::getMaxThreadsForWorkgroup(hwInfo, maxNumEUsPerDualSubSlice); + return HwHelper::getMaxThreadsForWorkgroup(hwInfo, maxNumEUsPerSubSlice); } } // namespace NEO