Disable EU fusion based on kernel properties from compiler

Related-To: NEO-6633

Signed-off-by: Konstanty Misiak <konstanty.misiak@intel.com>
This commit is contained in:
Konstanty Misiak
2022-02-10 23:33:40 +00:00
committed by Compute-Runtime-Automation
parent 13bc2300e1
commit cf1bc3a2ba
37 changed files with 228 additions and 95 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -17,6 +17,26 @@
using namespace NEO;
TEST(localWorkSizeTest, givenDisableEUFusionWhenCreatingWorkSizeInfoThenCorrectMinWorkGroupSizeIsSet) {
uint32_t simdSize = 8u;
uint32_t numThreadsPerSubS = 8u;
WorkSizeInfo wsInfo(256, // maxWorkGroupSize
1u, // hasBariers
simdSize, // simdSize
0u, // slmTotalSize
defaultHwInfo.get(), // hardwareInfo
numThreadsPerSubS, // numThreadsPerSubS
0u, // localMemorySize
false, // imgUsed
false, // yTiledSurface
true // disableEUFusion
);
uint32_t maxBarriersPerHSlice = (defaultHwInfo.get()->platform.eRenderCoreFamily >= IGFX_GEN9_CORE) ? 32 : 16;
uint32_t expectedMinWGS = simdSize * numThreadsPerSubS / maxBarriersPerHSlice;
EXPECT_EQ(expectedMinWGS, wsInfo.minWorkGroupSize);
}
TEST(localWorkSizeTest, given3DimWorkGroupAndSimdEqual8AndBarriersWhenComputeCalledThenLocalGroupComputedCorrectly) {
WorkSizeInfo wsInfo(256, // maxWorkGroupSize
1u, // hasBariers
@ -26,7 +46,8 @@ TEST(localWorkSizeTest, given3DimWorkGroupAndSimdEqual8AndBarriersWhenComputeCal
32u, // numThreadsPerSubSlice
0u, // localMemorySize
false, // imgUsed
false // yTiledSurface
false, // yTiledSurface
false // disableEUFusion
);
uint32_t workDim = 3;
@ -56,7 +77,8 @@ TEST(localWorkSizeTest, givenSmallerLocalMemSizeThanSlmTotalSizeThenExceptionIsT
32u, // numThreadsPerSubSlice
64u, // localMemorySize
false, // imgUsed
false // yTiledSurface
false, // yTiledSurface
false // disableEUFusion
),
std::exception);
}
@ -65,8 +87,8 @@ TEST(localWorkSizeTest, given2DimWorkGroupAndSimdEqual8AndNoBarriersWhenComputeC
DebugManagerStateRestore dbgRestore;
DebugManager.flags.EnableComputeWorkSizeSquared.set(true);
//wsInfo maxWorkGroupSize, hasBariers, simdSize, slmTotalSize, hardwareInfo, numThreadsPerSubSlice, localMemorySize, imgUsed, yTiledSurface
WorkSizeInfo wsInfo(256, 0u, 8, 0u, defaultHwInfo.get(), 32u, 0u, false, false);
//wsInfo maxWorkGroupSize, hasBariers, simdSize, slmTotalSize, hardwareInfo, numThreadsPerSubSlice, localMemorySize, imgUsed, yTiledSurface, disableEUFusion
WorkSizeInfo wsInfo(256, 0u, 8, 0u, defaultHwInfo.get(), 32u, 0u, false, false, false);
uint32_t workDim = 2;
size_t workGroup[3] = {10003, 10003, 1};
size_t workGroupSize[3];
@ -85,8 +107,8 @@ TEST(localWorkSizeTest, given2DimWorkGroupAndSimdEqual8AndNoBarriersWhenComputeC
}
TEST(localWorkSizeTest, given1DimWorkGroupAndSimdEqual8WhenComputeCalledThenLocalGroupComputed) {
//wsInfo maxWorkGroupSize, hasBariers, simdSize, slmTotalSize, hardwareInfo, numThreadsPerSubSlice, localMemorySize, imgUsed, yTiledSurface
WorkSizeInfo wsInfo(256, 0u, 8, 0u, defaultHwInfo.get(), 32u, 0u, false, false);
//wsInfo maxWorkGroupSize, hasBariers, simdSize, slmTotalSize, hardwareInfo, numThreadsPerSubSlice, localMemorySize, imgUsed, yTiledSurface, disableEUFusion
WorkSizeInfo wsInfo(256, 0u, 8, 0u, defaultHwInfo.get(), 32u, 0u, false, false, false);
uint32_t workDim = 1;
size_t workGroup[3] = {6144, 1, 1};
size_t workGroupSize[3];
@ -116,7 +138,7 @@ TEST(localWorkSizeTest, given1DimWorkGroupAndSimdEqual8WhenComputeCalledThenLoca
}
TEST(localWorkSizeTest, given1DimWorkGroupAndSimdEqual32WhenComputeCalledThenLocalGroupComputed) {
WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo.get(), 32u, 0u, false, false);
WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo.get(), 32u, 0u, false, false, false);
uint32_t workDim = 1;
size_t workGroup[3] = {6144, 1, 1};
size_t workGroupSize[3];
@ -140,7 +162,7 @@ TEST(localWorkSizeTest, given1DimWorkGroupAndSimdEqual32WhenComputeCalledThenLoc
}
TEST(localWorkSizeTest, given2DimWorkGroupAndSimdEqual8WhenComputeCalledThenLocalGroupComputed) {
WorkSizeInfo wsInfo(256, 0u, 8, 0u, defaultHwInfo.get(), 56u, 0u, false, false);
WorkSizeInfo wsInfo(256, 0u, 8, 0u, defaultHwInfo.get(), 56u, 0u, false, false, false);
uint32_t workDim = 2;
size_t workGroup[3] = {384, 96, 1};
size_t workGroupSize[3];
@ -167,7 +189,7 @@ TEST(localWorkSizeTest, given2DimWorkGroupAndSimdEqual8WhenComputeCalledThenLoca
TEST(localWorkSizeTest, given2DimWorkGroupAndSimdEqual32WhenComputeCalledThenLocalGroupComputed) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.EnableComputeWorkSizeSquared.set(false);
WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo.get(), 32u, 0u, false, false);
WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo.get(), 32u, 0u, false, false, false);
uint32_t workDim = 2;
size_t workGroup[3] = {384, 96, 1};
@ -200,7 +222,7 @@ TEST(localWorkSizeTest, given2DimWorkGroupAndSimdEqual32WhenComputeCalledThenLoc
}
TEST(localWorkSizeTest, given3DimWorkGroupAndSimdEqual8WhenComputeCalledThenLocalGroupComputed) {
WorkSizeInfo wsInfo(256, 0u, 8, 0u, defaultHwInfo.get(), 56u, 0u, false, false);
WorkSizeInfo wsInfo(256, 0u, 8, 0u, defaultHwInfo.get(), 56u, 0u, false, false, false);
uint32_t workDim = 3;
size_t workGroup[3] = {384, 384, 384};
size_t workGroupSize[3];
@ -236,7 +258,7 @@ TEST(localWorkSizeTest, given3DimWorkGroupAndSimdEqual8WhenComputeCalledThenLoca
}
TEST(localWorkSizeTest, given3DimWorkGroupAndSimdEqual32WhenComputeCalledThenLocalGroupComputed) {
NEO::WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo.get(), 32u, 0u, false, false);
NEO::WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo.get(), 32u, 0u, false, false, false);
uint32_t workDim = 3;
size_t workGroup[3] = {384, 384, 384};
size_t workGroupSize[3];
@ -282,7 +304,7 @@ TEST(localWorkSizeTest, given2DimWorkGroupAndSquaredAlgorithmWhenComputeCalledTh
DebugManagerStateRestore dbgRestore;
DebugManager.flags.EnableComputeWorkSizeSquared.set(true);
WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo.get(), 32u, 0u, false, false);
WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo.get(), 32u, 0u, false, false, false);
uint32_t workDim = 2;
size_t workGroup[3] = {384, 96, 1};
size_t workGroupSize[3];
@ -297,7 +319,7 @@ TEST(localWorkSizeTest, given1DimWorkGroupAndSquaredAlgorithmOnWhenComputeCalled
DebugManagerStateRestore dbgRestore;
DebugManager.flags.EnableComputeWorkSizeSquared.set(true);
WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo.get(), 32u, 0u, false, false);
WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo.get(), 32u, 0u, false, false, false);
uint32_t workDim = 1;
size_t workGroup[3] = {1024, 1, 1};
size_t workGroupSize[3];
@ -312,7 +334,7 @@ TEST(localWorkSizeTest, given2DdispatchWithImagesAndSquaredAlgorithmOnWhenLwsIsC
DebugManagerStateRestore dbgRestore;
DebugManager.flags.EnableComputeWorkSizeSquared.set(true);
WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo.get(), 32u, 0u, true, false);
WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo.get(), 32u, 0u, true, false, false);
uint32_t workDim = 2;
size_t workGroup[3] = {256, 96, 1};
size_t workGroupSize[3];
@ -324,7 +346,7 @@ TEST(localWorkSizeTest, given2DdispatchWithImagesAndSquaredAlgorithmOnWhenLwsIsC
}
TEST(localWorkSizeTest, givenKernelWithTileYImagesAndBarrierWhenWorkgroupSizeIsComputedThenItMimicsTilingPattern) {
WorkSizeInfo wsInfo(256, true, 32, 0u, defaultHwInfo.get(), 32u, 0u, true, true);
WorkSizeInfo wsInfo(256, true, 32, 0u, defaultHwInfo.get(), 32u, 0u, true, true, false);
uint32_t workDim = 2;
size_t workGroup[3] = {1, 1, 1};
size_t workGroupSize[3];
@ -345,7 +367,7 @@ TEST(localWorkSizeTest, givenKernelWithTileYImagesAndBarrierWhenWorkgroupSizeIsC
}
TEST(localWorkSizeTest, givenKernelWithTileYImagesAndNoBarriersWhenWorkgroupSizeIsComputedThenItMimicsTilingPattern) {
WorkSizeInfo wsInfo(256, false, 32, 0u, defaultHwInfo.get(), 32u, 0u, true, true);
WorkSizeInfo wsInfo(256, false, 32, 0u, defaultHwInfo.get(), 32u, 0u, true, true, false);
uint32_t workDim = 2;
size_t workGroup[3] = {1, 1, 1};
size_t workGroupSize[3];
@ -366,7 +388,7 @@ TEST(localWorkSizeTest, givenKernelWithTileYImagesAndNoBarriersWhenWorkgroupSize
}
TEST(localWorkSizeTest, givenSimd16KernelWithTileYImagesAndNoBarriersWhenWorkgroupSizeIsComputedThenItMimicsTilingPattern) {
WorkSizeInfo wsInfo(256, false, 16, 0u, defaultHwInfo.get(), 32u, 0u, true, true);
WorkSizeInfo wsInfo(256, false, 16, 0u, defaultHwInfo.get(), 32u, 0u, true, true, false);
uint32_t workDim = 2;
size_t workGroup[3] = {1, 1, 1};
size_t workGroupSize[3];
@ -387,7 +409,7 @@ TEST(localWorkSizeTest, givenSimd16KernelWithTileYImagesAndNoBarriersWhenWorkgro
}
TEST(localWorkSizeTest, givenKernelWithTwoDimensionalGlobalSizesWhenLwsIsComputedThenItHasMaxWorkgroupSize) {
WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo.get(), 32u, 0u, false, false);
WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo.get(), 32u, 0u, false, false, false);
uint32_t workDim = 2;
size_t workGroup[3] = {1, 1, 1};
size_t workGroupSize[3];
@ -401,7 +423,7 @@ TEST(localWorkSizeTest, givenKernelWithTwoDimensionalGlobalSizesWhenLwsIsCompute
}
TEST(localWorkSizeTest, givenKernelWithBarriersAndTiledImagesWithYdimensionHigherThenXDimensionWhenLwsIsComputedThenItMimicsTiling) {
WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo.get(), 32u, 0u, true, true);
WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo.get(), 32u, 0u, true, true, false);
uint32_t workDim = 2;
size_t workGroup[3] = {1, 1, 1};
size_t workGroupSize[3];
@ -436,7 +458,7 @@ TEST(localWorkSizeTest, givenKernelWithBarriersAndTiledImagesWithYdimensionHighe
}
TEST(localWorkSizeTest, givenHighOneDimensionalGwsWhenLwsIsComputedThenMaxWorkgoupSizeIsUsed) {
WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo.get(), 32u, 0u, false, false);
WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo.get(), 32u, 0u, false, false, false);
uint32_t workDim = 2;
size_t workGroup[3] = {1, 1, 1};
size_t workGroupSize[3];
@ -457,7 +479,7 @@ TEST(localWorkSizeTest, givenHighOneDimensionalGwsWhenLwsIsComputedThenMaxWorkgo
}
TEST(localWorkSizeTest, givenVeriousGwsSizesWithImagesWhenLwsIsComputedThenProperSizesAreReturned) {
WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo.get(), 32u, 0u, true, true);
WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo.get(), 32u, 0u, true, true, false);
uint32_t workDim = 2;
size_t workGroup[3] = {1, 1, 1};
size_t workGroupSize[3];
@ -501,7 +523,7 @@ TEST(localWorkSizeTest, givenVeriousGwsSizesWithImagesWhenLwsIsComputedThenPrope
}
TEST(localWorkSizeTest, givenHigh1DGwsAndSimdSize16WhenLwsIsComputedThenMaxWorkgroupSizeIsChoosen) {
WorkSizeInfo wsInfo(256u, 0u, 16, 0u, defaultHwInfo.get(), 56u, 0, false, false);
WorkSizeInfo wsInfo(256u, 0u, 16, 0u, defaultHwInfo.get(), 56u, 0, false, false, false);
size_t workGroup[3] = {1, 1, 1};
size_t workGroupSize[3];
@ -514,7 +536,7 @@ TEST(localWorkSizeTest, givenHigh1DGwsAndSimdSize16WhenLwsIsComputedThenMaxWorkg
}
TEST(localWorkSizeTest, givenHigh1DGwsAndSimdSize8WhenLwsIsComputedThenMaxWorkgroupSizeIsChoosen) {
WorkSizeInfo wsInfo(256u, 0u, 8, 0u, defaultHwInfo.get(), 32u, 0, false, false);
WorkSizeInfo wsInfo(256u, 0u, 8, 0u, defaultHwInfo.get(), 32u, 0, false, false, false);
size_t workGroup[3] = {1, 1, 1};
size_t workGroupSize[3];
@ -527,7 +549,7 @@ TEST(localWorkSizeTest, givenHigh1DGwsAndSimdSize8WhenLwsIsComputedThenMaxWorkgr
}
TEST(localWorkSizeTest, givenKernelUtilizingImagesAndSlmWhenLwsIsBeingComputedThenItMimicsGlobalWorkgroupSizes) {
WorkSizeInfo wsInfo(256u, 1u, 32, 4096u, defaultHwInfo.get(), 56u, 65536u, true, true);
WorkSizeInfo wsInfo(256u, 1u, 32, 4096u, defaultHwInfo.get(), 56u, 65536u, true, true, false);
uint32_t workDim = 2;
size_t workGroup[3] = {1, 1, 1};
size_t workGroupSize[3];
@ -548,7 +570,7 @@ TEST(localWorkSizeTest, givenKernelUtilizingImagesAndSlmWhenLwsIsBeingComputedTh
}
TEST(localWorkSizeTest, GivenUseStrictRatioWhenLwsIsBeingComputedThenWgsIsCalculatedCorrectly) {
WorkSizeInfo wsInfo(256u, 0u, 32u, 0u, defaultHwInfo.get(), 0u, 0u, true, true);
WorkSizeInfo wsInfo(256u, 0u, 32u, 0u, defaultHwInfo.get(), 0u, 0u, true, true, false);
uint32_t workDim = 2;
size_t workGroup[3] = {194, 234, 1};
size_t workGroupSize[3];
@ -576,7 +598,7 @@ TEST(localWorkSizeTest, GivenUseStrictRatioWhenLwsIsBeingComputedThenWgsIsCalcul
}
TEST(localWorkSizeTest, GivenUseBarriersWhenLwsIsBeingComputedThenWgsIsCalculatedCorrectly) {
WorkSizeInfo wsInfo(256u, 1u, 32u, 0u, defaultHwInfo.get(), 56u, 0u, true, true);
WorkSizeInfo wsInfo(256u, 1u, 32u, 0u, defaultHwInfo.get(), 56u, 0u, true, true, false);
uint32_t workDim = 2;
size_t workGroup[3] = {194, 234, 1};
@ -607,7 +629,7 @@ TEST(localWorkSizeTest, GivenUseBarriersWhenLwsIsBeingComputedThenWgsIsCalculate
}
TEST(localWorkSizeTest, given2DimWorkWhenComputeSquaredCalledThenLocalGroupComputed) {
WorkSizeInfo wsInfo(256, 0u, 16, 0u, defaultHwInfo.get(), 6u, 0u, false, false);
WorkSizeInfo wsInfo(256, 0u, 16, 0u, defaultHwInfo.get(), 6u, 0u, false, false, false);
uint32_t workDim = 2;
size_t workGroup[3] = {2048, 272, 1};
@ -657,7 +679,7 @@ TEST(localWorkSizeTest, given2DimWorkWhenComputeSquaredCalledThenLocalGroupCompu
TEST(localWorkSizeTest, givenDeviceSupportingLws1024AndKernelCompiledInSimd8WhenGwsIs1024ThenLwsIsComputedAsMaxOptimalMultipliedBySimd) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.EnableComputeWorkSizeSquared.set(false);
WorkSizeInfo wsInfo(1024, 0u, 8, 0u, defaultHwInfo.get(), 56u, 0u, false, false);
WorkSizeInfo wsInfo(1024, 0u, 8, 0u, defaultHwInfo.get(), 56u, 0u, false, false, false);
uint32_t workDim = 2;
size_t workGroup[3] = {32, 32, 1};
@ -672,7 +694,7 @@ TEST(localWorkSizeTest, givenDeviceSupportingLws1024AndKernelCompiledInSimd8When
TEST(localWorkSizeTest, givenDeviceWith36ThreadsPerSubsliceWhenSimd16KernelIsBeingSubmittedThenWorkgroupContainsOf8HwThreads) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.EnableComputeWorkSizeSquared.set(false);
WorkSizeInfo wsInfo(256, 0u, 16, 0u, defaultHwInfo.get(), 36u, 0u, false, false);
WorkSizeInfo wsInfo(256, 0u, 16, 0u, defaultHwInfo.get(), 36u, 0u, false, false, false);
uint32_t workDim = 2;
size_t workGroup[3] = {1024, 1024, 1};
@ -687,7 +709,7 @@ TEST(localWorkSizeTest, givenDeviceWith36ThreadsPerSubsliceWhenSimd16KernelIsBei
TEST(localWorkSizeTest, givenDeviceWith56ThreadsPerSubsliceWhenSimd16KernelIsBeingSubmittedThenWorkgroupContainsOf16HwThreads) {
DebugManagerStateRestore dbgRestore;
DebugManager.flags.EnableComputeWorkSizeSquared.set(false);
WorkSizeInfo wsInfo(256, 0u, 16, 0u, defaultHwInfo.get(), 56u, 0u, false, false);
WorkSizeInfo wsInfo(256, 0u, 16, 0u, defaultHwInfo.get(), 56u, 0u, false, false, false);
uint32_t workDim = 2;
size_t workGroup[3] = {1024, 1024, 1};
@ -800,14 +822,14 @@ HWTEST2_F(LocalWorkSizeTest, givenWorkSizeInfoIsCreatedWithHwInfoThenTestEuFusio
{
const bool fusedEuDispatchDisabled = true;
DebugManager.flags.CFEFusedEUDispatch.set(fusedEuDispatchDisabled);
WorkSizeInfo workSizeInfo(512, 1u, 16, 0u, defaultHwInfo.get(), 36u, 0u, false, false);
WorkSizeInfo workSizeInfo(512, 1u, 16, 0u, defaultHwInfo.get(), 36u, 0u, false, false, false);
EXPECT_EQ(nonFusedMinWorkGroupSize, workSizeInfo.minWorkGroupSize);
}
{
const bool fusedEuDispatchDisabled = false;
DebugManager.flags.CFEFusedEUDispatch.set(fusedEuDispatchDisabled);
WorkSizeInfo workSizeInfo(512, 1u, 16, 0u, defaultHwInfo.get(), 36u, 0u, false, false);
WorkSizeInfo workSizeInfo(512, 1u, 16, 0u, defaultHwInfo.get(), 36u, 0u, false, false, false);
EXPECT_EQ(fusedMinWorkGroupSize, workSizeInfo.minWorkGroupSize);
}
}
@ -827,7 +849,7 @@ TEST(localWorkSizeTest, givenDispatchInfoWhenWorkSizeInfoIsCreatedThenHasBarrier
}
TEST(localWorkSizeTest, givenMaxWorkgroupSizeEqualToSimdSizeWhenLwsIsCalculatedThenItIsDownsizedToMaxWorkgroupSize) {
WorkSizeInfo wsInfo(32, 0u, 32, 0u, defaultHwInfo.get(), 32u, 0u, false, false);
WorkSizeInfo wsInfo(32, 0u, 32, 0u, defaultHwInfo.get(), 32u, 0u, false, false, false);
uint32_t workDim = 2;
size_t workGroup[3] = {32, 32, 1};
size_t workGroupSize[3];

View File

@ -56,7 +56,7 @@ struct WorkGroupSizeBase {
size_t workGroupSize[3];
auto maxWorkGroupSize = 256u;
if (DebugManager.flags.EnableComputeWorkSizeND.get()) {
WorkSizeInfo wsInfo(maxWorkGroupSize, 0u, simdSize, 0u, ::defaultHwInfo.get(), 32u, 0u, false, false);
WorkSizeInfo wsInfo(maxWorkGroupSize, 0u, simdSize, 0u, ::defaultHwInfo.get(), 32u, 0u, false, false, false);
computeWorkgroupSizeND(wsInfo, workGroupSize, workItems, dims);
} else {
if (dims == 1) {

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2021 Intel Corporation
* Copyright (C) 2018-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -798,7 +798,7 @@ TEST_F(PerformanceHintEnqueueKernelPrintfTest, GivenKernelWithPrintfWhenEnqueueK
size_t preferredWorkGroupSize[3];
auto maxWorkGroupSize = static_cast<uint32_t>(pPlatform->getClDevice(0)->getSharedDeviceInfo().maxWorkGroupSize);
if (DebugManager.flags.EnableComputeWorkSizeND.get()) {
WorkSizeInfo wsInfo(maxWorkGroupSize, 0u, 32u, 0u, ::defaultHwInfo.get(), 32u, 0u, false, false);
WorkSizeInfo wsInfo(maxWorkGroupSize, 0u, 32u, 0u, ::defaultHwInfo.get(), 32u, 0u, false, false, false);
computeWorkgroupSizeND(wsInfo, preferredWorkGroupSize, globalWorkGroupSize, 2);
} else
computeWorkgroupSize2D(maxWorkGroupSize, preferredWorkGroupSize, globalWorkGroupSize, 32);
@ -817,7 +817,7 @@ TEST_F(PerformanceHintEnqueueTest, GivenKernelWithCoherentPtrWhenEnqueueKernelIs
Kernel::SimpleKernelArgInfo kernelArgInfo;
if (DebugManager.flags.EnableComputeWorkSizeND.get()) {
WorkSizeInfo wsInfo(maxWorkGroupSize, 0u, 32u, 0u, ::defaultHwInfo.get(), 32u, 0u, false, false);
WorkSizeInfo wsInfo(maxWorkGroupSize, 0u, 32u, 0u, ::defaultHwInfo.get(), 32u, 0u, false, false, false);
computeWorkgroupSizeND(wsInfo, preferredWorkGroupSize, globalWorkGroupSize, 2);
} else
computeWorkgroupSize2D(maxWorkGroupSize, preferredWorkGroupSize, globalWorkGroupSize, 32);

View File

@ -310,6 +310,29 @@ TEST_F(KernelTests, GivenKernelCompileWorkGroupSizeWhenGettingWorkGroupInfoThenC
EXPECT_EQ(paramValueSize, paramValueSizeRet);
}
TEST_F(KernelTests, GivenRequiredDisabledEUFusionFlagWhenGettingPrefferedWorkGroupSizeMultipleThenCorectValueIsReturned) {
KernelInfo kernelInfo = {};
kernelInfo.kernelDescriptor.kernelAttributes.flags.requiresDisabledEUFusion = true;
MockKernel kernel(pProgram, kernelInfo, *pClDevice);
cl_kernel_info paramName = CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE;
size_t paramValue;
size_t paramValueSize = sizeof(paramValue);
size_t paramValueSizeRet = 0;
retVal = kernel.getWorkGroupInfo(
paramName,
paramValueSize,
&paramValue,
&paramValueSizeRet);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(paramValueSize, paramValueSizeRet);
EXPECT_EQ(kernelInfo.getMaxSimdSize(), paramValue);
kernelInfo.kernelDescriptor.kernelAttributes.flags.requiresDisabledEUFusion = false;
}
TEST_F(KernelTests, GivenInvalidParamNameWhenGettingWorkGroupInfoThenInvalidValueErrorIsReturned) {
size_t paramValueSizeRet = 0x1234u;

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2021 Intel Corporation
* Copyright (C) 2021-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -23,7 +23,7 @@ HWTEST2_F(PreambleCfeState, givenXehpAndFlagCFEWeightedDispatchModeDisableSetFal
auto pVfeCmd = PreambleHelper<FamilyType>::getSpaceForVfeState(&linearStream, *defaultHwInfo, EngineGroupType::RenderCompute);
StreamProperties streamProperties{};
streamProperties.frontEndState.setProperties(false, false, false, *defaultHwInfo);
streamProperties.frontEndState.setProperties(false, false, false, false, *defaultHwInfo);
PreambleHelper<FamilyType>::programVfeState(pVfeCmd, *defaultHwInfo, 0u, 0, 0, streamProperties);
parseCommands<FamilyType>(linearStream);
auto cfeStateIt = find<CFE_STATE *>(cmdList.begin(), cmdList.end());
@ -39,7 +39,7 @@ HWTEST2_F(PreambleCfeState, givenXehpAndFlagCFEWeightedDispatchModeDisableSetTru
auto pVfeCmd = PreambleHelper<FamilyType>::getSpaceForVfeState(&linearStream, *defaultHwInfo, EngineGroupType::RenderCompute);
StreamProperties streamProperties{};
streamProperties.frontEndState.setProperties(false, false, false, *defaultHwInfo);
streamProperties.frontEndState.setProperties(false, false, false, false, *defaultHwInfo);
PreambleHelper<FamilyType>::programVfeState(pVfeCmd, *defaultHwInfo, 0u, 0, 0, streamProperties);
parseCommands<FamilyType>(linearStream);
auto cfeStateIt = find<CFE_STATE *>(cmdList.begin(), cmdList.end());
@ -56,7 +56,7 @@ HWTEST2_F(PreambleCfeState, givenXehpAndFlagCFEComputeOverdispatchDisableSetFals
auto pVfeCmd = PreambleHelper<FamilyType>::getSpaceForVfeState(&linearStream, *defaultHwInfo, EngineGroupType::RenderCompute);
StreamProperties streamProperties{};
streamProperties.frontEndState.setProperties(false, false, false, *defaultHwInfo);
streamProperties.frontEndState.setProperties(false, false, false, false, *defaultHwInfo);
PreambleHelper<FamilyType>::programVfeState(pVfeCmd, *defaultHwInfo, 0u, 0, 0, streamProperties);
parseCommands<FamilyType>(linearStream);
auto cfeStateIt = find<CFE_STATE *>(cmdList.begin(), cmdList.end());
@ -72,7 +72,7 @@ HWTEST2_F(PreambleCfeState, givenXehpAndFlagCFEComputeOverdispatchDisableSetTrue
auto pVfeCmd = PreambleHelper<FamilyType>::getSpaceForVfeState(&linearStream, *defaultHwInfo, EngineGroupType::RenderCompute);
StreamProperties streamProperties{};
streamProperties.frontEndState.setProperties(false, false, false, *defaultHwInfo);
streamProperties.frontEndState.setProperties(false, false, false, false, *defaultHwInfo);
PreambleHelper<FamilyType>::programVfeState(pVfeCmd, *defaultHwInfo, 0u, 0, 0, streamProperties);
parseCommands<FamilyType>(linearStream);
auto cfeStateIt = find<CFE_STATE *>(cmdList.begin(), cmdList.end());
@ -90,7 +90,7 @@ HWTEST2_F(PreambleCfeState, givenXehpAndDisabledFusedEuWhenCfeStateProgrammedThe
auto pVfeCmd = PreambleHelper<FamilyType>::getSpaceForVfeState(&linearStream, hwInfo, EngineGroupType::RenderCompute);
StreamProperties streamProperties{};
streamProperties.frontEndState.setProperties(false, false, false, hwInfo);
streamProperties.frontEndState.setProperties(false, false, false, false, hwInfo);
PreambleHelper<FamilyType>::programVfeState(pVfeCmd, hwInfo, 0u, 0, 0, streamProperties);
parseCommands<FamilyType>(linearStream);
auto cfeStateIt = find<CFE_STATE *>(cmdList.begin(), cmdList.end());
@ -108,7 +108,7 @@ HWTEST2_F(PreambleCfeState, givenXehpAndEnabledFusedEuWhenCfeStateProgrammedThen
auto pVfeCmd = PreambleHelper<FamilyType>::getSpaceForVfeState(&linearStream, hwInfo, EngineGroupType::RenderCompute);
StreamProperties streamProperties{};
streamProperties.frontEndState.setProperties(false, false, false, hwInfo);
streamProperties.frontEndState.setProperties(false, false, false, false, hwInfo);
PreambleHelper<FamilyType>::programVfeState(pVfeCmd, hwInfo, 0u, 0, 0, streamProperties);
parseCommands<FamilyType>(linearStream);
auto cfeStateIt = find<CFE_STATE *>(cmdList.begin(), cmdList.end());

View File

@ -364,7 +364,7 @@ HWTEST2_F(PreambleCfeState, givenXehpAndDisabledFusedEuWhenCfeStateProgrammedThe
auto pVfeCmd = PreambleHelper<FamilyType>::getSpaceForVfeState(&linearStream, hwInfo, EngineGroupType::RenderCompute);
StreamProperties streamProperties{};
streamProperties.frontEndState.setProperties(false, false, false, hwInfo);
streamProperties.frontEndState.setProperties(false, false, false, false, hwInfo);
PreambleHelper<FamilyType>::programVfeState(pVfeCmd, hwInfo, 0u, 0, 0, streamProperties);
parseCommands<FamilyType>(linearStream);
auto cfeStateIt = find<CFE_STATE *>(cmdList.begin(), cmdList.end());
@ -382,7 +382,7 @@ HWTEST2_F(PreambleCfeState, givenXehpAndEnabledFusedEuWhenCfeStateProgrammedThen
auto pVfeCmd = PreambleHelper<FamilyType>::getSpaceForVfeState(&linearStream, hwInfo, EngineGroupType::RenderCompute);
StreamProperties streamProperties{};
streamProperties.frontEndState.setProperties(false, false, false, hwInfo);
streamProperties.frontEndState.setProperties(false, false, false, false, hwInfo);
PreambleHelper<FamilyType>::programVfeState(pVfeCmd, hwInfo, 0u, 0, 0, streamProperties);
parseCommands<FamilyType>(linearStream);
auto cfeStateIt = find<CFE_STATE *>(cmdList.begin(), cmdList.end());