mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-10 12:53:42 +08:00
Revert "Use DualSubSliceCount to calculate workgroup size"
This reverts commit 8ea5bbd3b5
.
Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
2cc2d05c37
commit
b0d4215328
@ -299,7 +299,6 @@ int main(int argc, char **argv) {
|
||||
// clang-format off
|
||||
gtSystemInfo.SliceCount = sliceCount;
|
||||
gtSystemInfo.SubSliceCount = gtSystemInfo.SliceCount * subSlicePerSliceCount;
|
||||
gtSystemInfo.DualSubSliceCount = gtSystemInfo.SubSliceCount;
|
||||
gtSystemInfo.EUCount = gtSystemInfo.SubSliceCount * euPerSubSlice - dieRecovery;
|
||||
gtSystemInfo.ThreadCount = gtSystemInfo.EUCount * threadsPerEu;
|
||||
gtSystemInfo.MaxEuPerSubSlice = std::max(gtSystemInfo.MaxEuPerSubSlice, euPerSubSlice);
|
||||
|
@ -301,15 +301,6 @@ TEST_F(KernelImpSuggestMaxCooperativeGroupCountTests, GivenNoBarriersOrSlmUsedWh
|
||||
EXPECT_EQ(expected, getMaxWorkGroupCount());
|
||||
}
|
||||
|
||||
TEST_F(KernelImpSuggestMaxCooperativeGroupCountTests, GivenNoBarriersOrSlmUsedAndDSSCountEqualZeroWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithSimd) {
|
||||
auto workGroupSize = lws[0] * lws[1] * lws[2];
|
||||
auto expected = availableThreadCount / Math::divideAndRoundUp(workGroupSize, simd);
|
||||
auto mutableHwInfo = neoDevice->getRootDeviceEnvironment().getMutableHardwareInfo();
|
||||
mutableHwInfo->gtSystemInfo.DualSubSliceCount = 0;
|
||||
|
||||
EXPECT_EQ(expected, getMaxWorkGroupCount());
|
||||
}
|
||||
|
||||
TEST_F(KernelImpSuggestMaxCooperativeGroupCountTests, GivenBarriersWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithRegardToBarriersCount) {
|
||||
usesBarriers = 1;
|
||||
auto expected = dssCount * (maxBarrierCount / usesBarriers);
|
||||
|
@ -59,8 +59,6 @@ TEST_F(clGetKernelMaxConcurrentWorkGroupCountTests, GivenVariousInputWhenGetting
|
||||
size_t globalWorkOffset[] = {0, 0, 0};
|
||||
size_t localWorkSize[] = {8, 8, 8};
|
||||
size_t maxConcurrentWorkGroupCount = 0;
|
||||
auto mutableHwInfo = pDevice->getRootDeviceEnvironment().getMutableHardwareInfo();
|
||||
mutableHwInfo->gtSystemInfo.DualSubSliceCount = 0;
|
||||
const_cast<KernelInfo &>(pKernel->getKernelInfo()).kernelDescriptor.kernelAttributes.numGrfRequired = GrfConfig::DefaultGrfNumber;
|
||||
|
||||
retVal = clGetKernelMaxConcurrentWorkGroupCountINTEL(pCommandQueue, pMultiDeviceKernel, workDim, globalWorkOffset, localWorkSize,
|
||||
|
@ -1096,7 +1096,7 @@ TEST(DeviceGetCaps, givenDebugFlagToUseMaxSimdSizeForWkgCalculationWhenDeviceCap
|
||||
GT_SYSTEM_INFO &mySysInfo = myHwInfo.gtSystemInfo;
|
||||
|
||||
mySysInfo.EUCount = 24;
|
||||
mySysInfo.DualSubSliceCount = 3;
|
||||
mySysInfo.SubSliceCount = 3;
|
||||
mySysInfo.ThreadCount = 24 * 7;
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&myHwInfo));
|
||||
|
||||
@ -1112,7 +1112,7 @@ HWTEST_F(DeviceGetCapsTest, givenDeviceThatHasHighNumberOfExecutionUnitsWhenMaxW
|
||||
auto &hwHelper = HwHelper::get(myHwInfo.platform.eRenderCoreFamily);
|
||||
|
||||
mySysInfo.EUCount = 32;
|
||||
mySysInfo.DualSubSliceCount = 2;
|
||||
mySysInfo.SubSliceCount = 2;
|
||||
mySysInfo.ThreadCount = 32 * hwHelper.getMinimalSIMDSize(); // 128 threads per subslice, in simd 8 gives 1024
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&myHwInfo));
|
||||
|
||||
@ -1504,18 +1504,19 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DeviceGetCapsTest, givenSysInfoWhenDeviceCreatedThen
|
||||
PLATFORM &myPlatform = myHwInfo.platform;
|
||||
|
||||
mySysInfo.EUCount = 16;
|
||||
mySysInfo.SubSliceCount = 4;
|
||||
mySysInfo.DualSubSliceCount = 2;
|
||||
mySysInfo.ThreadCount = 16 * 8;
|
||||
myPlatform.usRevId = 0x4;
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&myHwInfo));
|
||||
auto minSimd = 8;
|
||||
|
||||
auto expectedWG = (mySysInfo.ThreadCount / mySysInfo.EUCount) * (mySysInfo.EUCount / mySysInfo.DualSubSliceCount) * minSimd;
|
||||
auto expectedWG = (mySysInfo.ThreadCount / mySysInfo.EUCount) * (mySysInfo.EUCount / mySysInfo.SubSliceCount) * minSimd;
|
||||
|
||||
EXPECT_EQ(expectedWG, device->sharedDeviceInfo.maxWorkGroupSize);
|
||||
}
|
||||
|
||||
HWTEST_F(DeviceGetCapsTest, givenDSSDifferentThanZeroAndDifferentThanSubSliceCountWhenDeviceCreatedThenDualSubSliceCountIsSameAsSubSliceCount) {
|
||||
HWTEST_F(DeviceGetCapsTest, givenDSSDifferentThanZeroWhenDeviceCreatedThenDualSubSliceCountIsDifferentThanSubSliceCount) {
|
||||
HardwareInfo myHwInfo = *defaultHwInfo;
|
||||
GT_SYSTEM_INFO &mySysInfo = myHwInfo.gtSystemInfo;
|
||||
PLATFORM &myPlatform = myHwInfo.platform;
|
||||
@ -1527,7 +1528,7 @@ HWTEST_F(DeviceGetCapsTest, givenDSSDifferentThanZeroAndDifferentThanSubSliceCou
|
||||
myPlatform.usRevId = 0x4;
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&myHwInfo));
|
||||
|
||||
EXPECT_EQ(device->sharedDeviceInfo.maxNumEUsPerSubSlice, device->sharedDeviceInfo.maxNumEUsPerDualSubSlice);
|
||||
EXPECT_NE(device->sharedDeviceInfo.maxNumEUsPerSubSlice, device->sharedDeviceInfo.maxNumEUsPerDualSubSlice);
|
||||
}
|
||||
|
||||
HWTEST_F(DeviceGetCapsTest, givenDSSCountEqualZeroWhenDeviceCreatedThenMaxEuPerDSSEqualMaxEuPerSS) {
|
||||
|
@ -317,7 +317,6 @@ int main(int argc, char **argv) {
|
||||
// clang-format off
|
||||
gtSystemInfo.SliceCount = sliceCount;
|
||||
gtSystemInfo.SubSliceCount = gtSystemInfo.SliceCount * subSlicePerSliceCount;
|
||||
gtSystemInfo.DualSubSliceCount = gtSystemInfo.SubSliceCount;
|
||||
gtSystemInfo.EUCount = gtSystemInfo.SubSliceCount * euPerSubSlice - dieRecovery;
|
||||
gtSystemInfo.ThreadCount = gtSystemInfo.EUCount * threadsPerEu;
|
||||
gtSystemInfo.MaxEuPerSubSlice = std::max(gtSystemInfo.MaxEuPerSubSlice, euPerSubSlice);
|
||||
|
@ -118,14 +118,12 @@ void Device::initializeCaps() {
|
||||
? CommonConstants::maximalSimdSize
|
||||
: hwHelper.getMinimalSIMDSize();
|
||||
|
||||
uint32_t dualSubsliceCount = systemInfo.DualSubSliceCount == 0 ? systemInfo.SubSliceCount : systemInfo.DualSubSliceCount;
|
||||
|
||||
deviceInfo.maxNumEUsPerSubSlice = (systemInfo.EuCountPerPoolMin == 0 || hwInfo.featureTable.flags.ftrPooledEuEnabled == 0)
|
||||
? (systemInfo.EUCount / dualSubsliceCount)
|
||||
? (systemInfo.EUCount / systemInfo.SubSliceCount)
|
||||
: systemInfo.EuCountPerPoolMin;
|
||||
if (dualSubsliceCount != 0) {
|
||||
if (systemInfo.DualSubSliceCount != 0) {
|
||||
deviceInfo.maxNumEUsPerDualSubSlice = (systemInfo.EuCountPerPoolMin == 0 || hwInfo.featureTable.flags.ftrPooledEuEnabled == 0)
|
||||
? (systemInfo.EUCount / dualSubsliceCount)
|
||||
? (systemInfo.EUCount / systemInfo.DualSubSliceCount)
|
||||
: systemInfo.EuCountPerPoolMin;
|
||||
|
||||
} else {
|
||||
|
@ -118,31 +118,6 @@ TEST_F(DeviceGetCapsTest, givenMockCompilerInterfaceWhenInitializeCapsIsCalledTh
|
||||
EXPECT_EQ(1u, pDevice->getDeviceInfo().maxParameterSize);
|
||||
}
|
||||
|
||||
TEST_F(DeviceGetCapsTest, whenInitializeCapsIsCalledWithDSSCountSetToZeroThenMaxWorkGroupSizeIsTheSame) {
|
||||
pDevice->initializeCaps();
|
||||
auto maxWorkGroupSizeBefore = pDevice->getDeviceInfo().maxWorkGroupSize;
|
||||
|
||||
auto mutableHwInfo = pDevice->getRootDeviceEnvironment().getMutableHardwareInfo();
|
||||
mutableHwInfo->gtSystemInfo.DualSubSliceCount = 0;
|
||||
pDevice->initializeCaps();
|
||||
auto maxWorkGroupSizeAfter = pDevice->getDeviceInfo().maxWorkGroupSize;
|
||||
|
||||
EXPECT_EQ(maxWorkGroupSizeBefore, maxWorkGroupSizeAfter);
|
||||
}
|
||||
|
||||
TEST_F(DeviceGetCapsTest, givenSSCountAndDSSCountEqualToZeroAndEuCountPerPoolMinIsSetThenMaxNumEUsPerSliceIsSetAccordingly) {
|
||||
auto mutableHwInfo = pDevice->getRootDeviceEnvironment().getMutableHardwareInfo();
|
||||
mutableHwInfo->gtSystemInfo.EuCountPerPoolMin = 16;
|
||||
mutableHwInfo->featureTable.flags.ftrPooledEuEnabled = 1;
|
||||
mutableHwInfo->gtSystemInfo.SubSliceCount = 0;
|
||||
mutableHwInfo->gtSystemInfo.DualSubSliceCount = 0;
|
||||
|
||||
pDevice->initializeCaps();
|
||||
|
||||
EXPECT_EQ(pDevice->getDeviceInfo().maxNumEUsPerSubSlice, 16ul);
|
||||
EXPECT_EQ(pDevice->getDeviceInfo().maxNumEUsPerDualSubSlice, 16ul);
|
||||
}
|
||||
|
||||
TEST_F(DeviceGetCapsTest,
|
||||
givenImplicitScalingWhenInitializeCapsIsCalledThenMaxMemAllocSizeIsSetCorrectly) {
|
||||
DebugManagerStateRestore dbgRestorer;
|
||||
|
@ -302,7 +302,6 @@ int main(int argc, char **argv) {
|
||||
// clang-format off
|
||||
gtSystemInfo.SliceCount = sliceCount;
|
||||
gtSystemInfo.SubSliceCount = gtSystemInfo.SliceCount * subSlicePerSliceCount;
|
||||
gtSystemInfo.DualSubSliceCount = gtSystemInfo.SubSliceCount;
|
||||
gtSystemInfo.EUCount = gtSystemInfo.SubSliceCount * euPerSubSlice - dieRecovery;
|
||||
gtSystemInfo.ThreadCount = gtSystemInfo.EUCount * threadsPerEu;
|
||||
gtSystemInfo.MaxEuPerSubSlice = std::max(gtSystemInfo.MaxEuPerSubSlice, euPerSubSlice);
|
||||
|
Reference in New Issue
Block a user