From b0d4215328ac8b8bffc38252cef93aff29511d6e Mon Sep 17 00:00:00 2001 From: Compute-Runtime-Validation Date: Tue, 28 Jun 2022 11:11:41 +0200 Subject: [PATCH] Revert "Use DualSubSliceCount to calculate workgroup size" This reverts commit 8ea5bbd3b5df7aa6df5527f66c5222afb99f872f. Signed-off-by: Compute-Runtime-Validation --- level_zero/core/test/unit_tests/main.cpp | 1 - .../sources/kernel/test_function.cpp | 9 ------- ...oncurrent_work_group_count_intel_tests.inl | 2 -- .../unit_test/device/device_caps_tests.cpp | 11 ++++---- opencl/test/unit_test/main.cpp | 1 - shared/source/device/device_caps.cpp | 8 +++--- .../unit_test/device/neo_device_tests.cpp | 25 ------------------- shared/test/unit_test/main.cpp | 1 - 8 files changed, 9 insertions(+), 49 deletions(-) diff --git a/level_zero/core/test/unit_tests/main.cpp b/level_zero/core/test/unit_tests/main.cpp index f75781ba2e..2174ca06a0 100644 --- a/level_zero/core/test/unit_tests/main.cpp +++ b/level_zero/core/test/unit_tests/main.cpp @@ -299,7 +299,6 @@ int main(int argc, char **argv) { // clang-format off gtSystemInfo.SliceCount = sliceCount; gtSystemInfo.SubSliceCount = gtSystemInfo.SliceCount * subSlicePerSliceCount; - gtSystemInfo.DualSubSliceCount = gtSystemInfo.SubSliceCount; gtSystemInfo.EUCount = gtSystemInfo.SubSliceCount * euPerSubSlice - dieRecovery; gtSystemInfo.ThreadCount = gtSystemInfo.EUCount * threadsPerEu; gtSystemInfo.MaxEuPerSubSlice = std::max(gtSystemInfo.MaxEuPerSubSlice, euPerSubSlice); diff --git a/level_zero/core/test/unit_tests/sources/kernel/test_function.cpp b/level_zero/core/test/unit_tests/sources/kernel/test_function.cpp index a63c69a595..f5fc568bbf 100644 --- a/level_zero/core/test/unit_tests/sources/kernel/test_function.cpp +++ b/level_zero/core/test/unit_tests/sources/kernel/test_function.cpp @@ -301,15 +301,6 @@ TEST_F(KernelImpSuggestMaxCooperativeGroupCountTests, GivenNoBarriersOrSlmUsedWh EXPECT_EQ(expected, getMaxWorkGroupCount()); } -TEST_F(KernelImpSuggestMaxCooperativeGroupCountTests, GivenNoBarriersOrSlmUsedAndDSSCountEqualZeroWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithSimd) { - auto workGroupSize = lws[0] * lws[1] * lws[2]; - auto expected = availableThreadCount / Math::divideAndRoundUp(workGroupSize, simd); - auto mutableHwInfo = neoDevice->getRootDeviceEnvironment().getMutableHardwareInfo(); - mutableHwInfo->gtSystemInfo.DualSubSliceCount = 0; - - EXPECT_EQ(expected, getMaxWorkGroupCount()); -} - TEST_F(KernelImpSuggestMaxCooperativeGroupCountTests, GivenBarriersWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithRegardToBarriersCount) { usesBarriers = 1; auto expected = dssCount * (maxBarrierCount / usesBarriers); diff --git a/opencl/test/unit_test/api/cl_get_kernel_max_concurrent_work_group_count_intel_tests.inl b/opencl/test/unit_test/api/cl_get_kernel_max_concurrent_work_group_count_intel_tests.inl index d05b5b953a..1d788e00eb 100644 --- a/opencl/test/unit_test/api/cl_get_kernel_max_concurrent_work_group_count_intel_tests.inl +++ b/opencl/test/unit_test/api/cl_get_kernel_max_concurrent_work_group_count_intel_tests.inl @@ -59,8 +59,6 @@ TEST_F(clGetKernelMaxConcurrentWorkGroupCountTests, GivenVariousInputWhenGetting size_t globalWorkOffset[] = {0, 0, 0}; size_t localWorkSize[] = {8, 8, 8}; size_t maxConcurrentWorkGroupCount = 0; - auto mutableHwInfo = pDevice->getRootDeviceEnvironment().getMutableHardwareInfo(); - mutableHwInfo->gtSystemInfo.DualSubSliceCount = 0; const_cast(pKernel->getKernelInfo()).kernelDescriptor.kernelAttributes.numGrfRequired = GrfConfig::DefaultGrfNumber; retVal = clGetKernelMaxConcurrentWorkGroupCountINTEL(pCommandQueue, pMultiDeviceKernel, workDim, globalWorkOffset, localWorkSize, diff --git a/opencl/test/unit_test/device/device_caps_tests.cpp b/opencl/test/unit_test/device/device_caps_tests.cpp index 9ca2133675..174752463d 100644 --- a/opencl/test/unit_test/device/device_caps_tests.cpp +++ b/opencl/test/unit_test/device/device_caps_tests.cpp @@ -1096,7 +1096,7 @@ TEST(DeviceGetCaps, givenDebugFlagToUseMaxSimdSizeForWkgCalculationWhenDeviceCap GT_SYSTEM_INFO &mySysInfo = myHwInfo.gtSystemInfo; mySysInfo.EUCount = 24; - mySysInfo.DualSubSliceCount = 3; + mySysInfo.SubSliceCount = 3; mySysInfo.ThreadCount = 24 * 7; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&myHwInfo)); @@ -1112,7 +1112,7 @@ HWTEST_F(DeviceGetCapsTest, givenDeviceThatHasHighNumberOfExecutionUnitsWhenMaxW auto &hwHelper = HwHelper::get(myHwInfo.platform.eRenderCoreFamily); mySysInfo.EUCount = 32; - mySysInfo.DualSubSliceCount = 2; + mySysInfo.SubSliceCount = 2; mySysInfo.ThreadCount = 32 * hwHelper.getMinimalSIMDSize(); // 128 threads per subslice, in simd 8 gives 1024 auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&myHwInfo)); @@ -1504,18 +1504,19 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceGetCapsTest, givenSysInfoWhenDeviceCreatedThen PLATFORM &myPlatform = myHwInfo.platform; mySysInfo.EUCount = 16; + mySysInfo.SubSliceCount = 4; mySysInfo.DualSubSliceCount = 2; mySysInfo.ThreadCount = 16 * 8; myPlatform.usRevId = 0x4; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&myHwInfo)); auto minSimd = 8; - auto expectedWG = (mySysInfo.ThreadCount / mySysInfo.EUCount) * (mySysInfo.EUCount / mySysInfo.DualSubSliceCount) * minSimd; + auto expectedWG = (mySysInfo.ThreadCount / mySysInfo.EUCount) * (mySysInfo.EUCount / mySysInfo.SubSliceCount) * minSimd; EXPECT_EQ(expectedWG, device->sharedDeviceInfo.maxWorkGroupSize); } -HWTEST_F(DeviceGetCapsTest, givenDSSDifferentThanZeroAndDifferentThanSubSliceCountWhenDeviceCreatedThenDualSubSliceCountIsSameAsSubSliceCount) { +HWTEST_F(DeviceGetCapsTest, givenDSSDifferentThanZeroWhenDeviceCreatedThenDualSubSliceCountIsDifferentThanSubSliceCount) { HardwareInfo myHwInfo = *defaultHwInfo; GT_SYSTEM_INFO &mySysInfo = myHwInfo.gtSystemInfo; PLATFORM &myPlatform = myHwInfo.platform; @@ -1527,7 +1528,7 @@ HWTEST_F(DeviceGetCapsTest, givenDSSDifferentThanZeroAndDifferentThanSubSliceCou myPlatform.usRevId = 0x4; auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(&myHwInfo)); - EXPECT_EQ(device->sharedDeviceInfo.maxNumEUsPerSubSlice, device->sharedDeviceInfo.maxNumEUsPerDualSubSlice); + EXPECT_NE(device->sharedDeviceInfo.maxNumEUsPerSubSlice, device->sharedDeviceInfo.maxNumEUsPerDualSubSlice); } HWTEST_F(DeviceGetCapsTest, givenDSSCountEqualZeroWhenDeviceCreatedThenMaxEuPerDSSEqualMaxEuPerSS) { diff --git a/opencl/test/unit_test/main.cpp b/opencl/test/unit_test/main.cpp index 67ce6384a3..be34436f7e 100644 --- a/opencl/test/unit_test/main.cpp +++ b/opencl/test/unit_test/main.cpp @@ -317,7 +317,6 @@ int main(int argc, char **argv) { // clang-format off gtSystemInfo.SliceCount = sliceCount; gtSystemInfo.SubSliceCount = gtSystemInfo.SliceCount * subSlicePerSliceCount; - gtSystemInfo.DualSubSliceCount = gtSystemInfo.SubSliceCount; gtSystemInfo.EUCount = gtSystemInfo.SubSliceCount * euPerSubSlice - dieRecovery; gtSystemInfo.ThreadCount = gtSystemInfo.EUCount * threadsPerEu; gtSystemInfo.MaxEuPerSubSlice = std::max(gtSystemInfo.MaxEuPerSubSlice, euPerSubSlice); diff --git a/shared/source/device/device_caps.cpp b/shared/source/device/device_caps.cpp index 14a7fbcb01..99de6c00d3 100644 --- a/shared/source/device/device_caps.cpp +++ b/shared/source/device/device_caps.cpp @@ -118,14 +118,12 @@ void Device::initializeCaps() { ? CommonConstants::maximalSimdSize : hwHelper.getMinimalSIMDSize(); - uint32_t dualSubsliceCount = systemInfo.DualSubSliceCount == 0 ? systemInfo.SubSliceCount : systemInfo.DualSubSliceCount; - deviceInfo.maxNumEUsPerSubSlice = (systemInfo.EuCountPerPoolMin == 0 || hwInfo.featureTable.flags.ftrPooledEuEnabled == 0) - ? (systemInfo.EUCount / dualSubsliceCount) + ? (systemInfo.EUCount / systemInfo.SubSliceCount) : systemInfo.EuCountPerPoolMin; - if (dualSubsliceCount != 0) { + if (systemInfo.DualSubSliceCount != 0) { deviceInfo.maxNumEUsPerDualSubSlice = (systemInfo.EuCountPerPoolMin == 0 || hwInfo.featureTable.flags.ftrPooledEuEnabled == 0) - ? (systemInfo.EUCount / dualSubsliceCount) + ? (systemInfo.EUCount / systemInfo.DualSubSliceCount) : systemInfo.EuCountPerPoolMin; } else { diff --git a/shared/test/unit_test/device/neo_device_tests.cpp b/shared/test/unit_test/device/neo_device_tests.cpp index 9e470914d1..5975270cee 100644 --- a/shared/test/unit_test/device/neo_device_tests.cpp +++ b/shared/test/unit_test/device/neo_device_tests.cpp @@ -118,31 +118,6 @@ TEST_F(DeviceGetCapsTest, givenMockCompilerInterfaceWhenInitializeCapsIsCalledTh EXPECT_EQ(1u, pDevice->getDeviceInfo().maxParameterSize); } -TEST_F(DeviceGetCapsTest, whenInitializeCapsIsCalledWithDSSCountSetToZeroThenMaxWorkGroupSizeIsTheSame) { - pDevice->initializeCaps(); - auto maxWorkGroupSizeBefore = pDevice->getDeviceInfo().maxWorkGroupSize; - - auto mutableHwInfo = pDevice->getRootDeviceEnvironment().getMutableHardwareInfo(); - mutableHwInfo->gtSystemInfo.DualSubSliceCount = 0; - pDevice->initializeCaps(); - auto maxWorkGroupSizeAfter = pDevice->getDeviceInfo().maxWorkGroupSize; - - EXPECT_EQ(maxWorkGroupSizeBefore, maxWorkGroupSizeAfter); -} - -TEST_F(DeviceGetCapsTest, givenSSCountAndDSSCountEqualToZeroAndEuCountPerPoolMinIsSetThenMaxNumEUsPerSliceIsSetAccordingly) { - auto mutableHwInfo = pDevice->getRootDeviceEnvironment().getMutableHardwareInfo(); - mutableHwInfo->gtSystemInfo.EuCountPerPoolMin = 16; - mutableHwInfo->featureTable.flags.ftrPooledEuEnabled = 1; - mutableHwInfo->gtSystemInfo.SubSliceCount = 0; - mutableHwInfo->gtSystemInfo.DualSubSliceCount = 0; - - pDevice->initializeCaps(); - - EXPECT_EQ(pDevice->getDeviceInfo().maxNumEUsPerSubSlice, 16ul); - EXPECT_EQ(pDevice->getDeviceInfo().maxNumEUsPerDualSubSlice, 16ul); -} - TEST_F(DeviceGetCapsTest, givenImplicitScalingWhenInitializeCapsIsCalledThenMaxMemAllocSizeIsSetCorrectly) { DebugManagerStateRestore dbgRestorer; diff --git a/shared/test/unit_test/main.cpp b/shared/test/unit_test/main.cpp index 8d8f787c26..02fc2a70b7 100644 --- a/shared/test/unit_test/main.cpp +++ b/shared/test/unit_test/main.cpp @@ -302,7 +302,6 @@ int main(int argc, char **argv) { // clang-format off gtSystemInfo.SliceCount = sliceCount; gtSystemInfo.SubSliceCount = gtSystemInfo.SliceCount * subSlicePerSliceCount; - gtSystemInfo.DualSubSliceCount = gtSystemInfo.SubSliceCount; gtSystemInfo.EUCount = gtSystemInfo.SubSliceCount * euPerSubSlice - dieRecovery; gtSystemInfo.ThreadCount = gtSystemInfo.EUCount * threadsPerEu; gtSystemInfo.MaxEuPerSubSlice = std::max(gtSystemInfo.MaxEuPerSubSlice, euPerSubSlice);