Revert "Use DualSubSliceCount to calculate workgroup size"

This reverts commit 8ea5bbd3b5.

Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
This commit is contained in:
Compute-Runtime-Validation
2022-06-28 11:11:41 +02:00
committed by Compute-Runtime-Automation
parent 2cc2d05c37
commit b0d4215328
8 changed files with 9 additions and 49 deletions

View File

@ -299,7 +299,6 @@ int main(int argc, char **argv) {
// clang-format off
gtSystemInfo.SliceCount = sliceCount;
gtSystemInfo.SubSliceCount = gtSystemInfo.SliceCount * subSlicePerSliceCount;
gtSystemInfo.DualSubSliceCount = gtSystemInfo.SubSliceCount;
gtSystemInfo.EUCount = gtSystemInfo.SubSliceCount * euPerSubSlice - dieRecovery;
gtSystemInfo.ThreadCount = gtSystemInfo.EUCount * threadsPerEu;
gtSystemInfo.MaxEuPerSubSlice = std::max(gtSystemInfo.MaxEuPerSubSlice, euPerSubSlice);

View File

@ -301,15 +301,6 @@ TEST_F(KernelImpSuggestMaxCooperativeGroupCountTests, GivenNoBarriersOrSlmUsedWh
EXPECT_EQ(expected, getMaxWorkGroupCount());
}
TEST_F(KernelImpSuggestMaxCooperativeGroupCountTests, GivenNoBarriersOrSlmUsedAndDSSCountEqualZeroWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithSimd) {
auto workGroupSize = lws[0] * lws[1] * lws[2];
auto expected = availableThreadCount / Math::divideAndRoundUp(workGroupSize, simd);
auto mutableHwInfo = neoDevice->getRootDeviceEnvironment().getMutableHardwareInfo();
mutableHwInfo->gtSystemInfo.DualSubSliceCount = 0;
EXPECT_EQ(expected, getMaxWorkGroupCount());
}
TEST_F(KernelImpSuggestMaxCooperativeGroupCountTests, GivenBarriersWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithRegardToBarriersCount) {
usesBarriers = 1;
auto expected = dssCount * (maxBarrierCount / usesBarriers);

View File

@ -59,8 +59,6 @@ TEST_F(clGetKernelMaxConcurrentWorkGroupCountTests, GivenVariousInputWhenGetting
size_t globalWorkOffset[] = {0, 0, 0};
size_t localWorkSize[] = {8, 8, 8};
size_t maxConcurrentWorkGroupCount = 0;
auto mutableHwInfo = pDevice->getRootDeviceEnvironment().getMutableHardwareInfo();
mutableHwInfo->gtSystemInfo.DualSubSliceCount = 0;
const_cast<KernelInfo &>(pKernel->getKernelInfo()).kernelDescriptor.kernelAttributes.numGrfRequired = GrfConfig::DefaultGrfNumber;
retVal = clGetKernelMaxConcurrentWorkGroupCountINTEL(pCommandQueue, pMultiDeviceKernel, workDim, globalWorkOffset, localWorkSize,

View File

@ -1096,7 +1096,7 @@ TEST(DeviceGetCaps, givenDebugFlagToUseMaxSimdSizeForWkgCalculationWhenDeviceCap
GT_SYSTEM_INFO &mySysInfo = myHwInfo.gtSystemInfo;
mySysInfo.EUCount = 24;
mySysInfo.DualSubSliceCount = 3;
mySysInfo.SubSliceCount = 3;
mySysInfo.ThreadCount = 24 * 7;
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&myHwInfo));
@ -1112,7 +1112,7 @@ HWTEST_F(DeviceGetCapsTest, givenDeviceThatHasHighNumberOfExecutionUnitsWhenMaxW
auto &hwHelper = HwHelper::get(myHwInfo.platform.eRenderCoreFamily);
mySysInfo.EUCount = 32;
mySysInfo.DualSubSliceCount = 2;
mySysInfo.SubSliceCount = 2;
mySysInfo.ThreadCount = 32 * hwHelper.getMinimalSIMDSize(); // 128 threads per subslice, in simd 8 gives 1024
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&myHwInfo));
@ -1504,18 +1504,19 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceGetCapsTest, givenSysInfoWhenDeviceCreatedThen
PLATFORM &myPlatform = myHwInfo.platform;
mySysInfo.EUCount = 16;
mySysInfo.SubSliceCount = 4;
mySysInfo.DualSubSliceCount = 2;
mySysInfo.ThreadCount = 16 * 8;
myPlatform.usRevId = 0x4;
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&myHwInfo));
auto minSimd = 8;
auto expectedWG = (mySysInfo.ThreadCount / mySysInfo.EUCount) * (mySysInfo.EUCount / mySysInfo.DualSubSliceCount) * minSimd;
auto expectedWG = (mySysInfo.ThreadCount / mySysInfo.EUCount) * (mySysInfo.EUCount / mySysInfo.SubSliceCount) * minSimd;
EXPECT_EQ(expectedWG, device->sharedDeviceInfo.maxWorkGroupSize);
}
HWTEST_F(DeviceGetCapsTest, givenDSSDifferentThanZeroAndDifferentThanSubSliceCountWhenDeviceCreatedThenDualSubSliceCountIsSameAsSubSliceCount) {
HWTEST_F(DeviceGetCapsTest, givenDSSDifferentThanZeroWhenDeviceCreatedThenDualSubSliceCountIsDifferentThanSubSliceCount) {
HardwareInfo myHwInfo = *defaultHwInfo;
GT_SYSTEM_INFO &mySysInfo = myHwInfo.gtSystemInfo;
PLATFORM &myPlatform = myHwInfo.platform;
@ -1527,7 +1528,7 @@ HWTEST_F(DeviceGetCapsTest, givenDSSDifferentThanZeroAndDifferentThanSubSliceCou
myPlatform.usRevId = 0x4;
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&myHwInfo));
EXPECT_EQ(device->sharedDeviceInfo.maxNumEUsPerSubSlice, device->sharedDeviceInfo.maxNumEUsPerDualSubSlice);
EXPECT_NE(device->sharedDeviceInfo.maxNumEUsPerSubSlice, device->sharedDeviceInfo.maxNumEUsPerDualSubSlice);
}
HWTEST_F(DeviceGetCapsTest, givenDSSCountEqualZeroWhenDeviceCreatedThenMaxEuPerDSSEqualMaxEuPerSS) {

View File

@ -317,7 +317,6 @@ int main(int argc, char **argv) {
// clang-format off
gtSystemInfo.SliceCount = sliceCount;
gtSystemInfo.SubSliceCount = gtSystemInfo.SliceCount * subSlicePerSliceCount;
gtSystemInfo.DualSubSliceCount = gtSystemInfo.SubSliceCount;
gtSystemInfo.EUCount = gtSystemInfo.SubSliceCount * euPerSubSlice - dieRecovery;
gtSystemInfo.ThreadCount = gtSystemInfo.EUCount * threadsPerEu;
gtSystemInfo.MaxEuPerSubSlice = std::max(gtSystemInfo.MaxEuPerSubSlice, euPerSubSlice);

View File

@ -118,14 +118,12 @@ void Device::initializeCaps() {
? CommonConstants::maximalSimdSize
: hwHelper.getMinimalSIMDSize();
uint32_t dualSubsliceCount = systemInfo.DualSubSliceCount == 0 ? systemInfo.SubSliceCount : systemInfo.DualSubSliceCount;
deviceInfo.maxNumEUsPerSubSlice = (systemInfo.EuCountPerPoolMin == 0 || hwInfo.featureTable.flags.ftrPooledEuEnabled == 0)
? (systemInfo.EUCount / dualSubsliceCount)
? (systemInfo.EUCount / systemInfo.SubSliceCount)
: systemInfo.EuCountPerPoolMin;
if (dualSubsliceCount != 0) {
if (systemInfo.DualSubSliceCount != 0) {
deviceInfo.maxNumEUsPerDualSubSlice = (systemInfo.EuCountPerPoolMin == 0 || hwInfo.featureTable.flags.ftrPooledEuEnabled == 0)
? (systemInfo.EUCount / dualSubsliceCount)
? (systemInfo.EUCount / systemInfo.DualSubSliceCount)
: systemInfo.EuCountPerPoolMin;
} else {

View File

@ -118,31 +118,6 @@ TEST_F(DeviceGetCapsTest, givenMockCompilerInterfaceWhenInitializeCapsIsCalledTh
EXPECT_EQ(1u, pDevice->getDeviceInfo().maxParameterSize);
}
TEST_F(DeviceGetCapsTest, whenInitializeCapsIsCalledWithDSSCountSetToZeroThenMaxWorkGroupSizeIsTheSame) {
pDevice->initializeCaps();
auto maxWorkGroupSizeBefore = pDevice->getDeviceInfo().maxWorkGroupSize;
auto mutableHwInfo = pDevice->getRootDeviceEnvironment().getMutableHardwareInfo();
mutableHwInfo->gtSystemInfo.DualSubSliceCount = 0;
pDevice->initializeCaps();
auto maxWorkGroupSizeAfter = pDevice->getDeviceInfo().maxWorkGroupSize;
EXPECT_EQ(maxWorkGroupSizeBefore, maxWorkGroupSizeAfter);
}
TEST_F(DeviceGetCapsTest, givenSSCountAndDSSCountEqualToZeroAndEuCountPerPoolMinIsSetThenMaxNumEUsPerSliceIsSetAccordingly) {
auto mutableHwInfo = pDevice->getRootDeviceEnvironment().getMutableHardwareInfo();
mutableHwInfo->gtSystemInfo.EuCountPerPoolMin = 16;
mutableHwInfo->featureTable.flags.ftrPooledEuEnabled = 1;
mutableHwInfo->gtSystemInfo.SubSliceCount = 0;
mutableHwInfo->gtSystemInfo.DualSubSliceCount = 0;
pDevice->initializeCaps();
EXPECT_EQ(pDevice->getDeviceInfo().maxNumEUsPerSubSlice, 16ul);
EXPECT_EQ(pDevice->getDeviceInfo().maxNumEUsPerDualSubSlice, 16ul);
}
TEST_F(DeviceGetCapsTest,
givenImplicitScalingWhenInitializeCapsIsCalledThenMaxMemAllocSizeIsSetCorrectly) {
DebugManagerStateRestore dbgRestorer;

View File

@ -302,7 +302,6 @@ int main(int argc, char **argv) {
// clang-format off
gtSystemInfo.SliceCount = sliceCount;
gtSystemInfo.SubSliceCount = gtSystemInfo.SliceCount * subSlicePerSliceCount;
gtSystemInfo.DualSubSliceCount = gtSystemInfo.SubSliceCount;
gtSystemInfo.EUCount = gtSystemInfo.SubSliceCount * euPerSubSlice - dieRecovery;
gtSystemInfo.ThreadCount = gtSystemInfo.EUCount * threadsPerEu;
gtSystemInfo.MaxEuPerSubSlice = std::max(gtSystemInfo.MaxEuPerSubSlice, euPerSubSlice);