From da80d9906e4d9dec63739668980499980c586473 Mon Sep 17 00:00:00 2001 From: Kamil Kopryk Date: Sun, 1 Jan 2023 19:22:01 +0000 Subject: [PATCH] Refactor: don't use global GfxCoreHelper getter in shared files 5/n Related-To: NEO-6853 Signed-off-by: Kamil Kopryk --- level_zero/core/source/kernel/kernel_imp.cpp | 5 +- .../command_queue/cl_local_work_size.cpp | 4 +- .../command_queue/local_work_size_tests.cpp | 286 +++++++++--------- .../command_queue/work_group_size_tests.cpp | 13 +- .../driver_diagnostics_enqueue_tests.cpp | 8 +- shared/source/program/kernel_info.cpp | 15 +- shared/source/program/kernel_info.h | 7 +- 7 files changed, 175 insertions(+), 163 deletions(-) diff --git a/level_zero/core/source/kernel/kernel_imp.cpp b/level_zero/core/source/kernel/kernel_imp.cpp index 28472110a0..7bd86625f0 100644 --- a/level_zero/core/source/kernel/kernel_imp.cpp +++ b/level_zero/core/source/kernel/kernel_imp.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2022 Intel Corporation + * Copyright (C) 2020-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -356,7 +356,6 @@ ze_result_t KernelImp::suggestGroupSize(uint32_t globalSizeX, uint32_t globalSiz if (NEO::DebugManager.flags.EnableComputeWorkSizeND.get()) { auto usesImages = getImmutableData()->getDescriptor().kernelAttributes.flags.usesImages; auto neoDevice = module->getDevice()->getNEODevice(); - const auto hwInfo = &neoDevice->getHardwareInfo(); const auto &deviceInfo = neoDevice->getDeviceInfo(); uint32_t numThreadsPerSubSlice = (uint32_t)deviceInfo.maxNumEUsPerSubSlice * deviceInfo.numThreadsPerEU; uint32_t localMemSize = (uint32_t)deviceInfo.localMemSize; @@ -367,7 +366,7 @@ ze_result_t KernelImp::suggestGroupSize(uint32_t globalSizeX, uint32_t globalSiz } NEO::WorkSizeInfo wsInfo(maxWorkGroupSize, kernelImmData->getDescriptor().kernelAttributes.usesBarriers(), simd, this->getSlmTotalSize(), - hwInfo, numThreadsPerSubSlice, localMemSize, + neoDevice->getRootDeviceEnvironment(), numThreadsPerSubSlice, localMemSize, usesImages, false, kernelImmData->getDescriptor().kernelAttributes.flags.requiresDisabledEUFusion); NEO::computeWorkgroupSizeND(wsInfo, retGroupSize, workItems, dim); } else { diff --git a/opencl/source/command_queue/cl_local_work_size.cpp b/opencl/source/command_queue/cl_local_work_size.cpp index f964e1df95..0a335bce49 100644 --- a/opencl/source/command_queue/cl_local_work_size.cpp +++ b/opencl/source/command_queue/cl_local_work_size.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2022 Intel Corporation + * Copyright (C) 2021-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -99,7 +99,7 @@ WorkSizeInfo createWorkSizeInfoFromDispatchInfo(const DispatchInfo &dispatchInfo kernelInfo.kernelDescriptor.kernelAttributes.usesBarriers(), static_cast(kernelInfo.getMaxSimdSize()), static_cast(dispatchInfo.getKernel()->getSlmTotalSize()), - &device.getHardwareInfo(), + device.getRootDeviceEnvironment(), numThreadsPerSubSlice, static_cast(device.getSharedDeviceInfo().localMemSize), false, diff --git a/opencl/test/unit_test/command_queue/local_work_size_tests.cpp b/opencl/test/unit_test/command_queue/local_work_size_tests.cpp index 600e3163eb..662ade961c 100644 --- a/opencl/test/unit_test/command_queue/local_work_size_tests.cpp +++ b/opencl/test/unit_test/command_queue/local_work_size_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2022 Intel Corporation + * Copyright (C) 2018-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -18,22 +18,30 @@ using namespace NEO; -TEST(localWorkSizeTest, givenDisableEUFusionWhenCreatingWorkSizeInfoThenCorrectMinWorkGroupSizeIsSet) { +struct LocalWorkSizeTest : public ::testing::Test { + + MockExecutionEnvironment mockExecutionEnvironment{}; + RootDeviceEnvironment &rootDeviceEnvironment = *mockExecutionEnvironment.rootDeviceEnvironments[0].get(); +}; + +TEST_F(LocalWorkSizeTest, givenDisableEUFusionWhenCreatingWorkSizeInfoThenCorrectMinWorkGroupSizeIsSet) { + + auto &gfxCoreHelper = rootDeviceEnvironment.getHelper(); + uint32_t simdSize = 8u; uint32_t numThreadsPerSubS = 8u; - WorkSizeInfo wsInfo(256, // maxWorkGroupSize - 1u, // hasBariers - simdSize, // simdSize - 0u, // slmTotalSize - defaultHwInfo.get(), // hardwareInfo - numThreadsPerSubS, // numThreadsPerSubS - 0u, // localMemorySize - false, // imgUsed - false, // yTiledSurface - true // disableEUFusion + WorkSizeInfo wsInfo(256, // maxWorkGroupSize + 1u, // hasBariers + simdSize, // simdSize + 0u, // slmTotalSize + rootDeviceEnvironment, // rootDeviceEnvironment + numThreadsPerSubS, // numThreadsPerSubS + 0u, // localMemorySize + false, // imgUsed + false, // yTiledSurface + true // disableEUFusion ); - MockExecutionEnvironment mockExecutionEnvironment{}; - auto &gfxCoreHelper = mockExecutionEnvironment.rootDeviceEnvironments[0]->getHelper(); + bool fusedDispatchEnabled = gfxCoreHelper.isFusedEuDispatchEnabled(*defaultHwInfo, true); auto wgsMultiple = fusedDispatchEnabled ? 2 : 1; @@ -42,21 +50,21 @@ TEST(localWorkSizeTest, givenDisableEUFusionWhenCreatingWorkSizeInfoThenCorrectM EXPECT_EQ(expectedMinWGS, wsInfo.minWorkGroupSize); } -TEST(localWorkSizeTest, GivenSlmLargerThanLocalThenWarningIsReturned) { +TEST_F(LocalWorkSizeTest, GivenSlmLargerThanLocalThenWarningIsReturned) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.PrintDebugMessages.set(true); ::testing::internal::CaptureStderr(); - EXPECT_THROW(WorkSizeInfo wsInfo(256, // maxWorkGroupSize - 1u, // hasBariers - 8, // simdSize - 128u, // slmTotalSize - defaultHwInfo.get(), // hardwareInfo - 32u, // numThreadsPerSubSlice - 64u, // localMemorySize - false, // imgUsed - false, // yTiledSurface - false // disableEUFusion + EXPECT_THROW(WorkSizeInfo wsInfo(256, // maxWorkGroupSize + 1u, // hasBariers + 8, // simdSize + 128u, // slmTotalSize + rootDeviceEnvironment, // rootDeviceEnvironment + 32u, // numThreadsPerSubSlice + 64u, // localMemorySize + false, // imgUsed + false, // yTiledSurface + false // disableEUFusion ), std::exception); @@ -64,69 +72,69 @@ TEST(localWorkSizeTest, GivenSlmLargerThanLocalThenWarningIsReturned) { EXPECT_EQ(std::string("Size of SLM (128) larger than available (64)\n"), output); } -TEST(localWorkSizeTest, GivenSlmSmallerThanLocalThenWarningIsNotReturned) { +TEST_F(LocalWorkSizeTest, GivenSlmSmallerThanLocalThenWarningIsNotReturned) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.PrintDebugMessages.set(true); ::testing::internal::CaptureStderr(); - WorkSizeInfo wsInfo(256, // maxWorkGroupSize - 1u, // hasBariers - 8, // simdSize - 64u, // slmTotalSize - defaultHwInfo.get(), // hardwareInfo - 32u, // numThreadsPerSubSlice - 128u, // localMemorySize - false, // imgUsed - false, // yTiledSurface - false // disableEUFusion + WorkSizeInfo wsInfo(256, // maxWorkGroupSize + 1u, // hasBariers + 8, // simdSize + 64u, // slmTotalSize + rootDeviceEnvironment, // rootDeviceEnvironment + 32u, // numThreadsPerSubSlice + 128u, // localMemorySize + false, // imgUsed + false, // yTiledSurface + false // disableEUFusion ); std::string output = testing::internal::GetCapturedStderr(); EXPECT_EQ(std::string(""), output); } -TEST(localWorkSizeTest, whenSettingHasBarriersWithNoFusedDispatchThenMinWorkGroupSizeIsSetCorrectly) { +TEST_F(LocalWorkSizeTest, whenSettingHasBarriersWithNoFusedDispatchThenMinWorkGroupSizeIsSetCorrectly) { DebugManagerStateRestore dbgRestorer; DebugManager.flags.CFEFusedEUDispatch.set(0); - WorkSizeInfo wsInfo0(256, // maxWorkGroupSize - 0u, // hasBariers - 8, // simdSize - 0u, // slmTotalSize - defaultHwInfo.get(), // hardwareInfo - 32u, // numThreadsPerSubSlice - 128u, // localMemorySize - false, // imgUsed - false, // yTiledSurface - false // disableEUFusion + WorkSizeInfo wsInfo0(256, // maxWorkGroupSize + 0u, // hasBariers + 8, // simdSize + 0u, // slmTotalSize + rootDeviceEnvironment, // rootDeviceEnvironment + 32u, // numThreadsPerSubSlice + 128u, // localMemorySize + false, // imgUsed + false, // yTiledSurface + false // disableEUFusion ); EXPECT_EQ(0u, wsInfo0.minWorkGroupSize); - WorkSizeInfo wsInfo1(256, // maxWorkGroupSize - 1u, // hasBariers - 8, // simdSize - 0u, // slmTotalSize - defaultHwInfo.get(), // hardwareInfo - 32u, // numThreadsPerSubSlice - 128u, // localMemorySize - false, // imgUsed - false, // yTiledSurface - false // disableEUFusion + WorkSizeInfo wsInfo1(256, // maxWorkGroupSize + 1u, // hasBariers + 8, // simdSize + 0u, // slmTotalSize + rootDeviceEnvironment, // rootDeviceEnvironment + 32u, // numThreadsPerSubSlice + 128u, // localMemorySize + false, // imgUsed + false, // yTiledSurface + false // disableEUFusion ); EXPECT_NE(0u, wsInfo1.minWorkGroupSize); } -TEST(localWorkSizeTest, given3DimWorkGroupAndSimdEqual8AndBarriersWhenComputeCalledThenLocalGroupComputedCorrectly) { - WorkSizeInfo wsInfo(256, // maxWorkGroupSize - 1u, // hasBariers - 8, // simdSize - 0u, // slmTotalSize - defaultHwInfo.get(), // hardwareInfo - 32u, // numThreadsPerSubSlice - 0u, // localMemorySize - false, // imgUsed - false, // yTiledSurface - false // disableEUFusion +TEST_F(LocalWorkSizeTest, given3DimWorkGroupAndSimdEqual8AndBarriersWhenComputeCalledThenLocalGroupComputedCorrectly) { + WorkSizeInfo wsInfo(256, // maxWorkGroupSize + 1u, // hasBariers + 8, // simdSize + 0u, // slmTotalSize + rootDeviceEnvironment, // rootDeviceEnvironment + 32u, // numThreadsPerSubSlice + 0u, // localMemorySize + false, // imgUsed + false, // yTiledSurface + false // disableEUFusion ); uint32_t workDim = 3; @@ -147,27 +155,27 @@ TEST(localWorkSizeTest, given3DimWorkGroupAndSimdEqual8AndBarriersWhenComputeCal EXPECT_EQ(workGroupSize[2], 1u); } -TEST(localWorkSizeTest, givenSmallerLocalMemSizeThanSlmTotalSizeThenExceptionIsThrown) { - EXPECT_THROW(WorkSizeInfo wsInfo(256, // maxWorkGroupSize - 1u, // hasBariers - 8, // simdSize - 128u, // slmTotalSize - defaultHwInfo.get(), // hardwareInfo - 32u, // numThreadsPerSubSlice - 64u, // localMemorySize - false, // imgUsed - false, // yTiledSurface - false // disableEUFusion +TEST_F(LocalWorkSizeTest, givenSmallerLocalMemSizeThanSlmTotalSizeThenExceptionIsThrown) { + EXPECT_THROW(WorkSizeInfo wsInfo(256, // maxWorkGroupSize + 1u, // hasBariers + 8, // simdSize + 128u, // slmTotalSize + rootDeviceEnvironment, // rootDeviceEnvironment + 32u, // numThreadsPerSubSlice + 64u, // localMemorySize + false, // imgUsed + false, // yTiledSurface + false // disableEUFusion ), std::exception); } -TEST(localWorkSizeTest, given2DimWorkGroupAndSimdEqual8AndNoBarriersWhenComputeCalledThenLocalGroupComputedCorrectly) { +TEST_F(LocalWorkSizeTest, given2DimWorkGroupAndSimdEqual8AndNoBarriersWhenComputeCalledThenLocalGroupComputedCorrectly) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(true); - // wsInfo maxWorkGroupSize, hasBariers, simdSize, slmTotalSize, hardwareInfo, numThreadsPerSubSlice, localMemorySize, imgUsed, yTiledSurface, disableEUFusion - WorkSizeInfo wsInfo(256, 0u, 8, 0u, defaultHwInfo.get(), 32u, 0u, false, false, false); + // wsInfo maxWorkGroupSize, hasBariers, simdSize, slmTotalSize, rootDeviceEnvironment, numThreadsPerSubSlice, localMemorySize, imgUsed, yTiledSurface, disableEUFusion + WorkSizeInfo wsInfo(256, 0u, 8, 0u, rootDeviceEnvironment, 32u, 0u, false, false, false); uint32_t workDim = 2; size_t workGroup[3] = {10003, 10003, 1}; size_t workGroupSize[3]; @@ -185,9 +193,9 @@ TEST(localWorkSizeTest, given2DimWorkGroupAndSimdEqual8AndNoBarriersWhenComputeC EXPECT_EQ(workGroupSize[2], 1u); } -TEST(localWorkSizeTest, given1DimWorkGroupAndSimdEqual8WhenComputeCalledThenLocalGroupComputed) { +TEST_F(LocalWorkSizeTest, given1DimWorkGroupAndSimdEqual8WhenComputeCalledThenLocalGroupComputed) { // wsInfo maxWorkGroupSize, hasBariers, simdSize, slmTotalSize, hardwareInfo, numThreadsPerSubSlice, localMemorySize, imgUsed, yTiledSurface, disableEUFusion - WorkSizeInfo wsInfo(256, 0u, 8, 0u, defaultHwInfo.get(), 32u, 0u, false, false, false); + WorkSizeInfo wsInfo(256, 0u, 8, 0u, rootDeviceEnvironment, 32u, 0u, false, false, false); uint32_t workDim = 1; size_t workGroup[3] = {6144, 1, 1}; size_t workGroupSize[3]; @@ -216,8 +224,8 @@ TEST(localWorkSizeTest, given1DimWorkGroupAndSimdEqual8WhenComputeCalledThenLoca EXPECT_EQ(workGroupSize[2], 1u); } -TEST(localWorkSizeTest, given1DimWorkGroupAndSimdEqual32WhenComputeCalledThenLocalGroupComputed) { - WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo.get(), 32u, 0u, false, false, false); +TEST_F(LocalWorkSizeTest, given1DimWorkGroupAndSimdEqual32WhenComputeCalledThenLocalGroupComputed) { + WorkSizeInfo wsInfo(256, 0u, 32, 0u, rootDeviceEnvironment, 32u, 0u, false, false, false); uint32_t workDim = 1; size_t workGroup[3] = {6144, 1, 1}; size_t workGroupSize[3]; @@ -240,8 +248,8 @@ TEST(localWorkSizeTest, given1DimWorkGroupAndSimdEqual32WhenComputeCalledThenLoc EXPECT_EQ(workGroupSize[2], 1u); } -TEST(localWorkSizeTest, given2DimWorkGroupAndSimdEqual8WhenComputeCalledThenLocalGroupComputed) { - WorkSizeInfo wsInfo(256, 0u, 8, 0u, defaultHwInfo.get(), 56u, 0u, false, false, false); +TEST_F(LocalWorkSizeTest, given2DimWorkGroupAndSimdEqual8WhenComputeCalledThenLocalGroupComputed) { + WorkSizeInfo wsInfo(256, 0u, 8, 0u, rootDeviceEnvironment, 56u, 0u, false, false, false); uint32_t workDim = 2; size_t workGroup[3] = {384, 96, 1}; size_t workGroupSize[3]; @@ -265,10 +273,10 @@ TEST(localWorkSizeTest, given2DimWorkGroupAndSimdEqual8WhenComputeCalledThenLoca EXPECT_EQ(workGroupSize[2], 1u); } -TEST(localWorkSizeTest, given2DimWorkGroupAndSimdEqual32WhenComputeCalledThenLocalGroupComputed) { +TEST_F(LocalWorkSizeTest, given2DimWorkGroupAndSimdEqual32WhenComputeCalledThenLocalGroupComputed) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(false); - WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo.get(), 32u, 0u, false, false, false); + WorkSizeInfo wsInfo(256, 0u, 32, 0u, rootDeviceEnvironment, 32u, 0u, false, false, false); uint32_t workDim = 2; size_t workGroup[3] = {384, 96, 1}; @@ -300,8 +308,8 @@ TEST(localWorkSizeTest, given2DimWorkGroupAndSimdEqual32WhenComputeCalledThenLoc EXPECT_EQ(workGroupSize[2], 1u); } -TEST(localWorkSizeTest, given3DimWorkGroupAndSimdEqual8WhenComputeCalledThenLocalGroupComputed) { - WorkSizeInfo wsInfo(256, 0u, 8, 0u, defaultHwInfo.get(), 56u, 0u, false, false, false); +TEST_F(LocalWorkSizeTest, given3DimWorkGroupAndSimdEqual8WhenComputeCalledThenLocalGroupComputed) { + WorkSizeInfo wsInfo(256, 0u, 8, 0u, rootDeviceEnvironment, 56u, 0u, false, false, false); uint32_t workDim = 3; size_t workGroup[3] = {384, 384, 384}; size_t workGroupSize[3]; @@ -336,8 +344,8 @@ TEST(localWorkSizeTest, given3DimWorkGroupAndSimdEqual8WhenComputeCalledThenLoca EXPECT_EQ(workGroupSize[2], 3u); } -TEST(localWorkSizeTest, given3DimWorkGroupAndSimdEqual32WhenComputeCalledThenLocalGroupComputed) { - NEO::WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo.get(), 32u, 0u, false, false, false); +TEST_F(LocalWorkSizeTest, given3DimWorkGroupAndSimdEqual32WhenComputeCalledThenLocalGroupComputed) { + NEO::WorkSizeInfo wsInfo(256, 0u, 32, 0u, rootDeviceEnvironment, 32u, 0u, false, false, false); uint32_t workDim = 3; size_t workGroup[3] = {384, 384, 384}; size_t workGroupSize[3]; @@ -379,11 +387,11 @@ TEST(localWorkSizeTest, given3DimWorkGroupAndSimdEqual32WhenComputeCalledThenLoc EXPECT_EQ(workGroupSize[2], 1u); } -TEST(localWorkSizeTest, given2DimWorkGroupAndSquaredAlgorithmWhenComputeCalledThenLocalGroupComputed) { +TEST_F(LocalWorkSizeTest, given2DimWorkGroupAndSquaredAlgorithmWhenComputeCalledThenLocalGroupComputed) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(true); - WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo.get(), 32u, 0u, false, false, false); + WorkSizeInfo wsInfo(256, 0u, 32, 0u, rootDeviceEnvironment, 32u, 0u, false, false, false); uint32_t workDim = 2; size_t workGroup[3] = {384, 96, 1}; size_t workGroupSize[3]; @@ -394,11 +402,11 @@ TEST(localWorkSizeTest, given2DimWorkGroupAndSquaredAlgorithmWhenComputeCalledTh EXPECT_EQ(workGroupSize[2], 1u); } -TEST(localWorkSizeTest, given1DimWorkGroupAndSquaredAlgorithmOnWhenComputeCalledThenSquaredAlgorithmIsNotExecuted) { +TEST_F(LocalWorkSizeTest, given1DimWorkGroupAndSquaredAlgorithmOnWhenComputeCalledThenSquaredAlgorithmIsNotExecuted) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(true); - WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo.get(), 32u, 0u, false, false, false); + WorkSizeInfo wsInfo(256, 0u, 32, 0u, rootDeviceEnvironment, 32u, 0u, false, false, false); uint32_t workDim = 1; size_t workGroup[3] = {1024, 1, 1}; size_t workGroupSize[3]; @@ -409,11 +417,11 @@ TEST(localWorkSizeTest, given1DimWorkGroupAndSquaredAlgorithmOnWhenComputeCalled EXPECT_EQ(workGroupSize[2], 1u); } -TEST(localWorkSizeTest, given2DdispatchWithImagesAndSquaredAlgorithmOnWhenLwsIsComputedThenSquaredAlgorithmIsNotExecuted) { +TEST_F(LocalWorkSizeTest, given2DdispatchWithImagesAndSquaredAlgorithmOnWhenLwsIsComputedThenSquaredAlgorithmIsNotExecuted) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(true); - WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo.get(), 32u, 0u, true, false, false); + WorkSizeInfo wsInfo(256, 0u, 32, 0u, rootDeviceEnvironment, 32u, 0u, true, false, false); uint32_t workDim = 2; size_t workGroup[3] = {256, 96, 1}; size_t workGroupSize[3]; @@ -424,8 +432,8 @@ TEST(localWorkSizeTest, given2DdispatchWithImagesAndSquaredAlgorithmOnWhenLwsIsC EXPECT_EQ(workGroupSize[2], 1u); } -TEST(localWorkSizeTest, givenKernelWithTileYImagesAndBarrierWhenWorkgroupSizeIsComputedThenItMimicsTilingPattern) { - WorkSizeInfo wsInfo(256, true, 32, 0u, defaultHwInfo.get(), 32u, 0u, true, true, false); +TEST_F(LocalWorkSizeTest, givenKernelWithTileYImagesAndBarrierWhenWorkgroupSizeIsComputedThenItMimicsTilingPattern) { + WorkSizeInfo wsInfo(256, true, 32, 0u, rootDeviceEnvironment, 32u, 0u, true, true, false); uint32_t workDim = 2; size_t workGroup[3] = {1, 1, 1}; size_t workGroupSize[3]; @@ -445,8 +453,8 @@ TEST(localWorkSizeTest, givenKernelWithTileYImagesAndBarrierWhenWorkgroupSizeIsC EXPECT_EQ(workGroupSize[2], 1u); } -TEST(localWorkSizeTest, givenKernelWithTileYImagesAndNoBarriersWhenWorkgroupSizeIsComputedThenItMimicsTilingPattern) { - WorkSizeInfo wsInfo(256, false, 32, 0u, defaultHwInfo.get(), 32u, 0u, true, true, false); +TEST_F(LocalWorkSizeTest, givenKernelWithTileYImagesAndNoBarriersWhenWorkgroupSizeIsComputedThenItMimicsTilingPattern) { + WorkSizeInfo wsInfo(256, false, 32, 0u, rootDeviceEnvironment, 32u, 0u, true, true, false); uint32_t workDim = 2; size_t workGroup[3] = {1, 1, 1}; size_t workGroupSize[3]; @@ -466,8 +474,8 @@ TEST(localWorkSizeTest, givenKernelWithTileYImagesAndNoBarriersWhenWorkgroupSize EXPECT_EQ(workGroupSize[2], 1u); } -TEST(localWorkSizeTest, givenSimd16KernelWithTileYImagesAndNoBarriersWhenWorkgroupSizeIsComputedThenItMimicsTilingPattern) { - WorkSizeInfo wsInfo(256, false, 16, 0u, defaultHwInfo.get(), 32u, 0u, true, true, false); +TEST_F(LocalWorkSizeTest, givenSimd16KernelWithTileYImagesAndNoBarriersWhenWorkgroupSizeIsComputedThenItMimicsTilingPattern) { + WorkSizeInfo wsInfo(256, false, 16, 0u, rootDeviceEnvironment, 32u, 0u, true, true, false); uint32_t workDim = 2; size_t workGroup[3] = {1, 1, 1}; size_t workGroupSize[3]; @@ -487,8 +495,8 @@ TEST(localWorkSizeTest, givenSimd16KernelWithTileYImagesAndNoBarriersWhenWorkgro EXPECT_EQ(workGroupSize[2], 1u); } -TEST(localWorkSizeTest, givenKernelWithTwoDimensionalGlobalSizesWhenLwsIsComputedThenItHasMaxWorkgroupSize) { - WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo.get(), 32u, 0u, false, false, false); +TEST_F(LocalWorkSizeTest, givenKernelWithTwoDimensionalGlobalSizesWhenLwsIsComputedThenItHasMaxWorkgroupSize) { + WorkSizeInfo wsInfo(256, 0u, 32, 0u, rootDeviceEnvironment, 32u, 0u, false, false, false); uint32_t workDim = 2; size_t workGroup[3] = {1, 1, 1}; size_t workGroupSize[3]; @@ -501,8 +509,8 @@ TEST(localWorkSizeTest, givenKernelWithTwoDimensionalGlobalSizesWhenLwsIsCompute EXPECT_EQ(workGroupSize[2], 1u); } -TEST(localWorkSizeTest, givenKernelWithBarriersAndTiledImagesWithYdimensionHigherThenXDimensionWhenLwsIsComputedThenItMimicsTiling) { - WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo.get(), 32u, 0u, true, true, false); +TEST_F(LocalWorkSizeTest, givenKernelWithBarriersAndTiledImagesWithYdimensionHigherThenXDimensionWhenLwsIsComputedThenItMimicsTiling) { + WorkSizeInfo wsInfo(256, 0u, 32, 0u, rootDeviceEnvironment, 32u, 0u, true, true, false); uint32_t workDim = 2; size_t workGroup[3] = {1, 1, 1}; size_t workGroupSize[3]; @@ -536,8 +544,8 @@ TEST(localWorkSizeTest, givenKernelWithBarriersAndTiledImagesWithYdimensionHighe EXPECT_EQ(workGroupSize[2], 1u); } -TEST(localWorkSizeTest, givenHighOneDimensionalGwsWhenLwsIsComputedThenMaxWorkgoupSizeIsUsed) { - WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo.get(), 32u, 0u, false, false, false); +TEST_F(LocalWorkSizeTest, givenHighOneDimensionalGwsWhenLwsIsComputedThenMaxWorkgoupSizeIsUsed) { + WorkSizeInfo wsInfo(256, 0u, 32, 0u, rootDeviceEnvironment, 32u, 0u, false, false, false); uint32_t workDim = 2; size_t workGroup[3] = {1, 1, 1}; size_t workGroupSize[3]; @@ -557,8 +565,8 @@ TEST(localWorkSizeTest, givenHighOneDimensionalGwsWhenLwsIsComputedThenMaxWorkgo EXPECT_EQ(workGroupSize[2], 1u); } -TEST(localWorkSizeTest, givenVeriousGwsSizesWithImagesWhenLwsIsComputedThenProperSizesAreReturned) { - WorkSizeInfo wsInfo(256, 0u, 32, 0u, defaultHwInfo.get(), 32u, 0u, true, true, false); +TEST_F(LocalWorkSizeTest, givenVeriousGwsSizesWithImagesWhenLwsIsComputedThenProperSizesAreReturned) { + WorkSizeInfo wsInfo(256, 0u, 32, 0u, rootDeviceEnvironment, 32u, 0u, true, true, false); uint32_t workDim = 2; size_t workGroup[3] = {1, 1, 1}; size_t workGroupSize[3]; @@ -601,8 +609,8 @@ TEST(localWorkSizeTest, givenVeriousGwsSizesWithImagesWhenLwsIsComputedThenPrope EXPECT_EQ(workGroupSize[2], 1u); } -TEST(localWorkSizeTest, givenHigh1DGwsAndSimdSize16WhenLwsIsComputedThenMaxWorkgroupSizeIsChoosen) { - WorkSizeInfo wsInfo(256u, 0u, 16, 0u, defaultHwInfo.get(), 56u, 0, false, false, false); +TEST_F(LocalWorkSizeTest, givenHigh1DGwsAndSimdSize16WhenLwsIsComputedThenMaxWorkgroupSizeIsChoosen) { + WorkSizeInfo wsInfo(256u, 0u, 16, 0u, rootDeviceEnvironment, 56u, 0, false, false, false); size_t workGroup[3] = {1, 1, 1}; size_t workGroupSize[3]; @@ -614,8 +622,8 @@ TEST(localWorkSizeTest, givenHigh1DGwsAndSimdSize16WhenLwsIsComputedThenMaxWorkg EXPECT_EQ(workGroupSize[2], 1u); } -TEST(localWorkSizeTest, givenHigh1DGwsAndSimdSize8WhenLwsIsComputedThenMaxWorkgroupSizeIsChoosen) { - WorkSizeInfo wsInfo(256u, 0u, 8, 0u, defaultHwInfo.get(), 32u, 0, false, false, false); +TEST_F(LocalWorkSizeTest, givenHigh1DGwsAndSimdSize8WhenLwsIsComputedThenMaxWorkgroupSizeIsChoosen) { + WorkSizeInfo wsInfo(256u, 0u, 8, 0u, rootDeviceEnvironment, 32u, 0, false, false, false); size_t workGroup[3] = {1, 1, 1}; size_t workGroupSize[3]; @@ -627,8 +635,8 @@ TEST(localWorkSizeTest, givenHigh1DGwsAndSimdSize8WhenLwsIsComputedThenMaxWorkgr EXPECT_EQ(workGroupSize[2], 1u); } -TEST(localWorkSizeTest, givenKernelUtilizingImagesAndSlmWhenLwsIsBeingComputedThenItMimicsGlobalWorkgroupSizes) { - WorkSizeInfo wsInfo(256u, 1u, 32, 4096u, defaultHwInfo.get(), 56u, 65536u, true, true, false); +TEST_F(LocalWorkSizeTest, givenKernelUtilizingImagesAndSlmWhenLwsIsBeingComputedThenItMimicsGlobalWorkgroupSizes) { + WorkSizeInfo wsInfo(256u, 1u, 32, 4096u, rootDeviceEnvironment, 56u, 65536u, true, true, false); uint32_t workDim = 2; size_t workGroup[3] = {1, 1, 1}; size_t workGroupSize[3]; @@ -648,8 +656,8 @@ TEST(localWorkSizeTest, givenKernelUtilizingImagesAndSlmWhenLwsIsBeingComputedTh EXPECT_EQ(workGroupSize[2], 1u); } -TEST(localWorkSizeTest, GivenUseStrictRatioWhenLwsIsBeingComputedThenWgsIsCalculatedCorrectly) { - WorkSizeInfo wsInfo(256u, 0u, 32u, 0u, defaultHwInfo.get(), 0u, 0u, true, true, false); +TEST_F(LocalWorkSizeTest, GivenUseStrictRatioWhenLwsIsBeingComputedThenWgsIsCalculatedCorrectly) { + WorkSizeInfo wsInfo(256u, 0u, 32u, 0u, rootDeviceEnvironment, 0u, 0u, true, true, false); uint32_t workDim = 2; size_t workGroup[3] = {194, 234, 1}; size_t workGroupSize[3]; @@ -676,8 +684,8 @@ TEST(localWorkSizeTest, GivenUseStrictRatioWhenLwsIsBeingComputedThenWgsIsCalcul EXPECT_EQ(workGroupSize[2], 1u); } -TEST(localWorkSizeTest, GivenUseBarriersWhenLwsIsBeingComputedThenWgsIsCalculatedCorrectly) { - WorkSizeInfo wsInfo(256u, 1u, 32u, 0u, defaultHwInfo.get(), 56u, 0u, true, true, false); +TEST_F(LocalWorkSizeTest, GivenUseBarriersWhenLwsIsBeingComputedThenWgsIsCalculatedCorrectly) { + WorkSizeInfo wsInfo(256u, 1u, 32u, 0u, rootDeviceEnvironment, 56u, 0u, true, true, false); uint32_t workDim = 2; size_t workGroup[3] = {194, 234, 1}; @@ -709,8 +717,8 @@ TEST(localWorkSizeTest, GivenUseBarriersWhenLwsIsBeingComputedThenWgsIsCalculate EXPECT_EQ(workGroupSize[2], 1u); } -TEST(localWorkSizeTest, given2DimWorkWhenComputeSquaredCalledThenLocalGroupComputed) { - WorkSizeInfo wsInfo(256, 0u, 16, 0u, defaultHwInfo.get(), 6u, 0u, false, false, false); +TEST_F(LocalWorkSizeTest, given2DimWorkWhenComputeSquaredCalledThenLocalGroupComputed) { + WorkSizeInfo wsInfo(256, 0u, 16, 0u, rootDeviceEnvironment, 6u, 0u, false, false, false); uint32_t workDim = 2; size_t workGroup[3] = {2048, 272, 1}; @@ -757,10 +765,10 @@ TEST(localWorkSizeTest, given2DimWorkWhenComputeSquaredCalledThenLocalGroupCompu EXPECT_EQ(workGroupSize[2], 1u); } -TEST(localWorkSizeTest, givenDeviceSupportingLws1024AndKernelCompiledInSimd8WhenGwsIs1024ThenLwsIsComputedAsMaxOptimalMultipliedBySimd) { +TEST_F(LocalWorkSizeTest, givenDeviceSupportingLws1024AndKernelCompiledInSimd8WhenGwsIs1024ThenLwsIsComputedAsMaxOptimalMultipliedBySimd) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(false); - WorkSizeInfo wsInfo(1024, 0u, 8, 0u, defaultHwInfo.get(), 56u, 0u, false, false, false); + WorkSizeInfo wsInfo(1024, 0u, 8, 0u, rootDeviceEnvironment, 56u, 0u, false, false, false); uint32_t workDim = 2; size_t workGroup[3] = {32, 32, 1}; @@ -772,10 +780,10 @@ TEST(localWorkSizeTest, givenDeviceSupportingLws1024AndKernelCompiledInSimd8When EXPECT_EQ(workGroupSize[2], 1u); } -TEST(localWorkSizeTest, givenDeviceWith36ThreadsPerSubsliceWhenSimd16KernelIsBeingSubmittedThenWorkgroupContainsOf8HwThreads) { +TEST_F(LocalWorkSizeTest, givenDeviceWith36ThreadsPerSubsliceWhenSimd16KernelIsBeingSubmittedThenWorkgroupContainsOf8HwThreads) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(false); - WorkSizeInfo wsInfo(256, 0u, 16, 0u, defaultHwInfo.get(), 36u, 0u, false, false, false); + WorkSizeInfo wsInfo(256, 0u, 16, 0u, rootDeviceEnvironment, 36u, 0u, false, false, false); uint32_t workDim = 2; size_t workGroup[3] = {1024, 1024, 1}; @@ -787,10 +795,10 @@ TEST(localWorkSizeTest, givenDeviceWith36ThreadsPerSubsliceWhenSimd16KernelIsBei EXPECT_EQ(workGroupSize[2], 1u); } -TEST(localWorkSizeTest, givenDeviceWith56ThreadsPerSubsliceWhenSimd16KernelIsBeingSubmittedThenWorkgroupContainsOf16HwThreads) { +TEST_F(LocalWorkSizeTest, givenDeviceWith56ThreadsPerSubsliceWhenSimd16KernelIsBeingSubmittedThenWorkgroupContainsOf16HwThreads) { DebugManagerStateRestore dbgRestore; DebugManager.flags.EnableComputeWorkSizeSquared.set(false); - WorkSizeInfo wsInfo(256, 0u, 16, 0u, defaultHwInfo.get(), 56u, 0u, false, false, false); + WorkSizeInfo wsInfo(256, 0u, 16, 0u, rootDeviceEnvironment, 56u, 0u, false, false, false); uint32_t workDim = 2; size_t workGroup[3] = {1024, 1024, 1}; @@ -802,7 +810,7 @@ TEST(localWorkSizeTest, givenDeviceWith56ThreadsPerSubsliceWhenSimd16KernelIsBei EXPECT_EQ(workGroupSize[2], 1u); } -TEST(localWorkSizeTest, givenDispatchInfoWhenWorkSizeInfoIsCreatedThenItHasCorrectNumberOfThreads) { +TEST_F(LocalWorkSizeTest, givenDispatchInfoWhenWorkSizeInfoIsCreatedThenItHasCorrectNumberOfThreads) { MockClDevice device{new MockDevice}; MockKernelWithInternals kernel(device); DispatchInfo dispatchInfo; @@ -820,8 +828,6 @@ TEST(localWorkSizeTest, givenDispatchInfoWhenWorkSizeInfoIsCreatedThenItHasCorre EXPECT_EQ(workSizeInfo.numThreadsPerSubSlice, threadsPerEu * euPerSubSlice); } -using LocalWorkSizeTest = ::testing::Test; - HWTEST2_F(LocalWorkSizeTest, givenDispatchInfoWhenWorkSizeInfoIsCreatedThenWorkgroupSizeIsCorrect, IsAtMostGen11) { MockClDevice device{new MockDevice}; MockKernelWithInternals kernel(device); @@ -903,19 +909,19 @@ HWTEST2_F(LocalWorkSizeTest, givenWorkSizeInfoIsCreatedWithHwInfoThenTestEuFusio { const bool fusedEuDispatchDisabled = true; DebugManager.flags.CFEFusedEUDispatch.set(fusedEuDispatchDisabled); - WorkSizeInfo workSizeInfo(512, 1u, 16, 0u, defaultHwInfo.get(), 36u, 0u, false, false, false); + WorkSizeInfo workSizeInfo(512, 1u, 16, 0u, rootDeviceEnvironment, 36u, 0u, false, false, false); EXPECT_EQ(nonFusedMinWorkGroupSize, workSizeInfo.minWorkGroupSize); } { const bool fusedEuDispatchDisabled = false; DebugManager.flags.CFEFusedEUDispatch.set(fusedEuDispatchDisabled); - WorkSizeInfo workSizeInfo(512, 1u, 16, 0u, defaultHwInfo.get(), 36u, 0u, false, false, false); + WorkSizeInfo workSizeInfo(512, 1u, 16, 0u, rootDeviceEnvironment, 36u, 0u, false, false, false); EXPECT_EQ(fusedMinWorkGroupSize, workSizeInfo.minWorkGroupSize); } } -TEST(localWorkSizeTest, givenDispatchInfoWhenWorkSizeInfoIsCreatedThenHasBarriersIsCorrectlySet) { +TEST_F(LocalWorkSizeTest, givenDispatchInfoWhenWorkSizeInfoIsCreatedThenHasBarriersIsCorrectlySet) { MockClDevice device{new MockDevice}; MockKernelWithInternals kernel(device); DispatchInfo dispatchInfo; @@ -929,8 +935,8 @@ TEST(localWorkSizeTest, givenDispatchInfoWhenWorkSizeInfoIsCreatedThenHasBarrier EXPECT_TRUE(createWorkSizeInfoFromDispatchInfo(dispatchInfo).hasBarriers); } -TEST(localWorkSizeTest, givenMaxWorkgroupSizeEqualToSimdSizeWhenLwsIsCalculatedThenItIsDownsizedToMaxWorkgroupSize) { - WorkSizeInfo wsInfo(32, 0u, 32, 0u, defaultHwInfo.get(), 32u, 0u, false, false, false); +TEST_F(LocalWorkSizeTest, givenMaxWorkgroupSizeEqualToSimdSizeWhenLwsIsCalculatedThenItIsDownsizedToMaxWorkgroupSize) { + WorkSizeInfo wsInfo(32, 0u, 32, 0u, rootDeviceEnvironment, 32u, 0u, false, false, false); uint32_t workDim = 2; size_t workGroup[3] = {32, 32, 1}; size_t workGroupSize[3]; diff --git a/opencl/test/unit_test/command_queue/work_group_size_tests.cpp b/opencl/test/unit_test/command_queue/work_group_size_tests.cpp index d7cda32b27..4a7f930dbf 100644 --- a/opencl/test/unit_test/command_queue/work_group_size_tests.cpp +++ b/opencl/test/unit_test/command_queue/work_group_size_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2022 Intel Corporation + * Copyright (C) 2018-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -7,6 +7,7 @@ #include "shared/source/helpers/local_work_size.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" +#include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/test_macros/hw_test.h" #include "opencl/source/command_queue/gpgpu_walker.h" @@ -56,7 +57,9 @@ struct WorkGroupSizeBase { size_t workGroupSize[3]; auto maxWorkGroupSize = 256u; if (DebugManager.flags.EnableComputeWorkSizeND.get()) { - WorkSizeInfo wsInfo(maxWorkGroupSize, 0u, simdSize, 0u, ::defaultHwInfo.get(), 32u, 0u, false, false, false); + MockExecutionEnvironment mockExecutionEnvironment{}; + RootDeviceEnvironment &rootDeviceEnvironment = *mockExecutionEnvironment.rootDeviceEnvironments[0].get(); + WorkSizeInfo wsInfo(maxWorkGroupSize, 0u, simdSize, 0u, rootDeviceEnvironment, 32u, 0u, false, false, false); computeWorkgroupSizeND(wsInfo, workGroupSize, workItems, dims); } else { if (dims == 1) { @@ -75,12 +78,12 @@ struct WorkGroupSizeBase { auto yRemainder = workItems[1] % workGroupSize[1]; auto zRemainder = workItems[2] % workGroupSize[2]; - //No remainders + // No remainders EXPECT_EQ(0u, xRemainder); EXPECT_EQ(0u, yRemainder); EXPECT_EQ(0u, zRemainder); - //Now setup GPGPU Walker + // Now setup GPGPU Walker typedef typename FamilyType::GPGPU_WALKER GPGPU_WALKER; GPGPU_WALKER pCmd = FamilyType::cmdInitGpgpuWalker; @@ -93,7 +96,7 @@ struct WorkGroupSizeBase { GpgpuWalkerHelper::setGpgpuWalkerThreadData(&pCmd, kd, globalOffsets, workGroupsStart, workGroupsNum, workGroupSize, simdSize, dims, true, false, 0u); - //And check if it is programmed correctly + // And check if it is programmed correctly auto numWorkItems = computeWalkerWorkItems(pCmd); EXPECT_EQ(totalWorkItems, numWorkItems); diff --git a/opencl/test/unit_test/context/driver_diagnostics_enqueue_tests.cpp b/opencl/test/unit_test/context/driver_diagnostics_enqueue_tests.cpp index 092a41680e..8c88a4543b 100644 --- a/opencl/test/unit_test/context/driver_diagnostics_enqueue_tests.cpp +++ b/opencl/test/unit_test/context/driver_diagnostics_enqueue_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2022 Intel Corporation + * Copyright (C) 2018-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -798,7 +798,8 @@ TEST_F(PerformanceHintEnqueueKernelPrintfTest, GivenKernelWithPrintfWhenEnqueueK size_t preferredWorkGroupSize[3]; auto maxWorkGroupSize = static_cast(pPlatform->getClDevice(0)->getSharedDeviceInfo().maxWorkGroupSize); if (DebugManager.flags.EnableComputeWorkSizeND.get()) { - WorkSizeInfo wsInfo(maxWorkGroupSize, 0u, 32u, 0u, ::defaultHwInfo.get(), 32u, 0u, false, false, false); + auto &rootDeviceEnvironment = pPlatform->getClDevice(0)->getRootDeviceEnvironment(); + WorkSizeInfo wsInfo(maxWorkGroupSize, 0u, 32u, 0u, rootDeviceEnvironment, 32u, 0u, false, false, false); computeWorkgroupSizeND(wsInfo, preferredWorkGroupSize, globalWorkGroupSize, 2); } else computeWorkgroupSize2D(maxWorkGroupSize, preferredWorkGroupSize, globalWorkGroupSize, 32); @@ -817,7 +818,8 @@ TEST_F(PerformanceHintEnqueueTest, GivenKernelWithCoherentPtrWhenEnqueueKernelIs Kernel::SimpleKernelArgInfo kernelArgInfo; if (DebugManager.flags.EnableComputeWorkSizeND.get()) { - WorkSizeInfo wsInfo(maxWorkGroupSize, 0u, 32u, 0u, ::defaultHwInfo.get(), 32u, 0u, false, false, false); + auto &rootDeviceEnvironment = pPlatform->getClDevice(0)->getRootDeviceEnvironment(); + WorkSizeInfo wsInfo(maxWorkGroupSize, 0u, 32u, 0u, rootDeviceEnvironment, 32u, 0u, false, false, false); computeWorkgroupSizeND(wsInfo, preferredWorkGroupSize, globalWorkGroupSize, 2); } else computeWorkgroupSize2D(maxWorkGroupSize, preferredWorkGroupSize, globalWorkGroupSize, 32); diff --git a/shared/source/program/kernel_info.cpp b/shared/source/program/kernel_info.cpp index 787c95e9a6..5342fef45f 100644 --- a/shared/source/program/kernel_info.cpp +++ b/shared/source/program/kernel_info.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2022 Intel Corporation + * Copyright (C) 2018-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -10,6 +10,7 @@ #include "shared/source/device/device.h" #include "shared/source/device_binary_format/elf/zebin_elf.h" #include "shared/source/device_binary_format/patchtokens_decoder.h" +#include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/blit_commands_helper.h" #include "shared/source/helpers/hw_helper.h" @@ -32,18 +33,18 @@ struct KernelArgumentType { uint64_t argTypeQualifierValue; }; -WorkSizeInfo::WorkSizeInfo(uint32_t maxWorkGroupSize, bool hasBarriers, uint32_t simdSize, uint32_t slmTotalSize, const HardwareInfo *hwInfo, uint32_t numThreadsPerSubSlice, uint32_t localMemSize, bool imgUsed, bool yTiledSurface, bool disableEUFusion) { +WorkSizeInfo::WorkSizeInfo(uint32_t maxWorkGroupSize, bool hasBarriers, uint32_t simdSize, uint32_t slmTotalSize, const RootDeviceEnvironment &rootDeviceEnvironemnt, uint32_t numThreadsPerSubSlice, uint32_t localMemSize, bool imgUsed, bool yTiledSurface, bool disableEUFusion) { this->maxWorkGroupSize = maxWorkGroupSize; this->hasBarriers = hasBarriers; this->simdSize = simdSize; this->slmTotalSize = slmTotalSize; - this->coreFamily = hwInfo->platform.eRenderCoreFamily; + this->coreFamily = rootDeviceEnvironemnt.getHardwareInfo()->platform.eRenderCoreFamily; this->numThreadsPerSubSlice = numThreadsPerSubSlice; this->localMemSize = localMemSize; this->imgUsed = imgUsed; this->yTiledSurfaces = yTiledSurface; - setMinWorkGroupSize(hwInfo, disableEUFusion); + setMinWorkGroupSize(rootDeviceEnvironemnt, disableEUFusion); } void WorkSizeInfo::setIfUseImg(const KernelInfo &kernelInfo) { @@ -56,7 +57,7 @@ void WorkSizeInfo::setIfUseImg(const KernelInfo &kernelInfo) { } } -void WorkSizeInfo::setMinWorkGroupSize(const HardwareInfo *hwInfo, bool disableEUFusion) { +void WorkSizeInfo::setMinWorkGroupSize(const RootDeviceEnvironment &rootDeviceEnvironemnt, bool disableEUFusion) { minWorkGroupSize = 0; if (hasBarriers) { uint32_t maxBarriersPerHSlice = (coreFamily >= IGFX_GEN9_CORE) ? 32 : 16; @@ -70,8 +71,8 @@ void WorkSizeInfo::setMinWorkGroupSize(const HardwareInfo *hwInfo, bool disableE minWorkGroupSize = std::max(maxWorkGroupSize / ((localMemSize / slmTotalSize)), minWorkGroupSize); } - const auto &gfxCoreHelper = GfxCoreHelper::get(hwInfo->platform.eRenderCoreFamily); - if (gfxCoreHelper.isFusedEuDispatchEnabled(*hwInfo, disableEUFusion)) { + const auto &gfxCoreHelper = rootDeviceEnvironemnt.getHelper(); + if (gfxCoreHelper.isFusedEuDispatchEnabled(*rootDeviceEnvironemnt.getHardwareInfo(), disableEUFusion)) { minWorkGroupSize *= 2; } } diff --git a/shared/source/program/kernel_info.h b/shared/source/program/kernel_info.h index f4cb8d21e1..922cba150e 100644 --- a/shared/source/program/kernel_info.h +++ b/shared/source/program/kernel_info.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2022 Intel Corporation + * Copyright (C) 2018-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -28,6 +28,7 @@ class DispatchInfo; struct KernelArgumentType; class GraphicsAllocation; class MemoryManager; +struct RootDeviceEnvironemnt; static const float YTilingRatioValue = 1.3862943611198906188344642429164f; @@ -46,10 +47,10 @@ struct WorkSizeInfo { bool useStrictRatio = false; float targetRatio = 0; - WorkSizeInfo(uint32_t maxWorkGroupSize, bool hasBarriers, uint32_t simdSize, uint32_t slmTotalSize, const HardwareInfo *hwInfo, uint32_t numThreadsPerSubSlice, uint32_t localMemSize, bool imgUsed, bool yTiledSurface, bool disableEUFusion); + WorkSizeInfo(uint32_t maxWorkGroupSize, bool hasBarriers, uint32_t simdSize, uint32_t slmTotalSize, const RootDeviceEnvironment &rootDeviceEnvironment, uint32_t numThreadsPerSubSlice, uint32_t localMemSize, bool imgUsed, bool yTiledSurface, bool disableEUFusion); void setIfUseImg(const KernelInfo &kernelInfo); - void setMinWorkGroupSize(const HardwareInfo *hwInfo, bool disableEUFusion); + void setMinWorkGroupSize(const RootDeviceEnvironment &rootDeviceEnvironemnt, bool disableEUFusion); void checkRatio(const size_t workItems[3]); };