From 2c1bfbb5b223a2b3aa6e0b65135eed9bdf465558 Mon Sep 17 00:00:00 2001 From: Krystian Chmielewski Date: Mon, 4 Apr 2022 09:11:39 +0000 Subject: [PATCH] Encode number barriers When programming number of barriers use BARRIER_SIZE enumeration. Resolves: NEO-6785 Signed-off-by: Krystian Chmielewski --- level_zero/core/source/kernel/kernel_imp.cpp | 2 +- opencl/source/kernel/kernel.cpp | 2 +- .../unit_test/helpers/hw_helper_tests.cpp | 6 ---- .../helpers/hw_helper_tests_pvc_and_later.cpp | 14 --------- shared/source/helpers/hw_helper.h | 3 -- shared/source/helpers/hw_helper_base.inl | 5 ---- shared/source/utilities/CMakeLists.txt | 3 +- shared/source/utilities/lookup_array.h | 29 +++++++++++++++++++ .../command_encoder_xe_hpc_core.cpp | 13 ++++++++- .../xe_hpc_core/hw_helper_xe_hpc_core.cpp | 15 ---------- .../command_encoder_xe_hpg_core.cpp | 7 ++++- ...t_encode_dispatch_kernel_pvc_and_later.cpp | 20 +++++++++---- 12 files changed, 66 insertions(+), 53 deletions(-) create mode 100644 shared/source/utilities/lookup_array.h diff --git a/level_zero/core/source/kernel/kernel_imp.cpp b/level_zero/core/source/kernel/kernel_imp.cpp index bda6af49ef..6eba7cfb8f 100644 --- a/level_zero/core/source/kernel/kernel_imp.cpp +++ b/level_zero/core/source/kernel/kernel_imp.cpp @@ -428,7 +428,7 @@ ze_result_t KernelImp::suggestMaxCooperativeGroupCount(uint32_t *totalGroupCount dssCount * KB * hardwareInfo.capabilityTable.slmSize, hwHelper.alignSlmSize(slmArgsTotalSize + descriptor.kernelAttributes.slmInlineSize), static_cast(hwHelper.getMaxBarrierRegisterPerSlice()), - hwHelper.getBarriersCountFromHasBarriers(barrierCount), + barrierCount, workDim, localWorkSize); *totalGroupCount = hwHelper.adjustMaxWorkGroupCount(*totalGroupCount, engineGroupType, hardwareInfo, isEngineInstanced); diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index 22e5e25384..2bba0e10b3 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -1076,7 +1076,7 @@ uint32_t Kernel::getMaxWorkGroupCount(const cl_uint workDim, const size_t *local dssCount * KB * hardwareInfo.capabilityTable.slmSize, hwHelper.alignSlmSize(slmTotalSize), static_cast(hwHelper.getMaxBarrierRegisterPerSlice()), - hwHelper.getBarriersCountFromHasBarriers(barrierCount), + barrierCount, workDim, localWorkSize); auto isEngineInstanced = commandQueue->getGpgpuCommandStreamReceiver().getOsContext().isEngineInstanced(); diff --git a/opencl/test/unit_test/helpers/hw_helper_tests.cpp b/opencl/test/unit_test/helpers/hw_helper_tests.cpp index 54328a20af..5f467f6bf1 100644 --- a/opencl/test/unit_test/helpers/hw_helper_tests.cpp +++ b/opencl/test/unit_test/helpers/hw_helper_tests.cpp @@ -950,12 +950,6 @@ HWTEST_F(HwHelperTest, WhenIsBankOverrideRequiredIsCalledThenFalseIsReturned) { EXPECT_FALSE(hwHelper.isBankOverrideRequired(hardwareInfo)); } -HWCMDTEST_F(IGFX_GEN8_CORE, HwHelperTest, GivenVariousValuesWhenCallingGetBarriersCountFromHasBarrierThenCorrectValueIsReturned) { - auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); - EXPECT_EQ(0u, hwHelper.getBarriersCountFromHasBarriers(0u)); - EXPECT_EQ(1u, hwHelper.getBarriersCountFromHasBarriers(1u)); -} - HWCMDTEST_F(IGFX_GEN8_CORE, HwHelperTest, GivenVariousValuesWhenCallingCalculateAvailableThreadCountThenCorrectValueIsReturned) { auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); auto result = hwHelper.calculateAvailableThreadCount( diff --git a/opencl/test/unit_test/helpers/hw_helper_tests_pvc_and_later.cpp b/opencl/test/unit_test/helpers/hw_helper_tests_pvc_and_later.cpp index d53e810a02..36b698dbb6 100644 --- a/opencl/test/unit_test/helpers/hw_helper_tests_pvc_and_later.cpp +++ b/opencl/test/unit_test/helpers/hw_helper_tests_pvc_and_later.cpp @@ -72,20 +72,6 @@ HWTEST2_F(HwHelperTestPvcAndLater, GivenVariousValuesWhenCallingCalculateAvailab } } -HWTEST2_F(HwHelperTestPvcAndLater, GivenVariousValuesWhenCallingGetBarriersCountFromHasBarrierThenCorrectValueIsReturned, IsAtLeastXeHpcCore) { - auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); - - EXPECT_EQ(0u, hwHelper.getBarriersCountFromHasBarriers(0u)); - EXPECT_EQ(1u, hwHelper.getBarriersCountFromHasBarriers(1u)); - - EXPECT_EQ(2u, hwHelper.getBarriersCountFromHasBarriers(2u)); - EXPECT_EQ(4u, hwHelper.getBarriersCountFromHasBarriers(3u)); - EXPECT_EQ(8u, hwHelper.getBarriersCountFromHasBarriers(4u)); - EXPECT_EQ(16u, hwHelper.getBarriersCountFromHasBarriers(5u)); - EXPECT_EQ(24u, hwHelper.getBarriersCountFromHasBarriers(6u)); - EXPECT_EQ(32u, hwHelper.getBarriersCountFromHasBarriers(7u)); -} - HWTEST2_F(HwHelperTestPvcAndLater, givenHwHelperWhenCheckIsUpdateTaskCountFromWaitSupportedThenReturnsTrue, IsAtLeastXeHpcCore) { auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); diff --git a/shared/source/helpers/hw_helper.h b/shared/source/helpers/hw_helper.h index ea0942a8a7..10d11debe2 100644 --- a/shared/source/helpers/hw_helper.h +++ b/shared/source/helpers/hw_helper.h @@ -87,7 +87,6 @@ class HwHelper { virtual uint32_t getMetricsLibraryGenId() const = 0; virtual uint32_t getMocsIndex(const GmmHelper &gmmHelper, bool l3enabled, bool l1enabled) const = 0; virtual bool tilingAllowed(bool isSharedContext, bool isImage1d, bool forceLinearStorage) = 0; - virtual uint32_t getBarriersCountFromHasBarriers(uint32_t hasBarriers) = 0; virtual uint32_t calculateAvailableThreadCount(PRODUCT_FAMILY family, uint32_t grfCount, uint32_t euCount, uint32_t threadsPerEu) = 0; virtual uint32_t alignSlmSize(uint32_t slmSize) = 0; @@ -286,8 +285,6 @@ class HwHelperHw : public HwHelper { bool tilingAllowed(bool isSharedContext, bool isImage1d, bool forceLinearStorage) override; - uint32_t getBarriersCountFromHasBarriers(uint32_t hasBarriers) override; - uint32_t calculateAvailableThreadCount(PRODUCT_FAMILY family, uint32_t grfCount, uint32_t euCount, uint32_t threadsPerEu) override; uint32_t alignSlmSize(uint32_t slmSize) override; diff --git a/shared/source/helpers/hw_helper_base.inl b/shared/source/helpers/hw_helper_base.inl index 21620492ed..fb1e851fb4 100644 --- a/shared/source/helpers/hw_helper_base.inl +++ b/shared/source/helpers/hw_helper_base.inl @@ -446,11 +446,6 @@ uint32_t HwHelperHw::computeSlmValues(const HardwareInfo &hwInfo, uin return value * !!slmSize; } -template -uint32_t HwHelperHw::getBarriersCountFromHasBarriers(uint32_t hasBarriers) { - return hasBarriers; -} - template inline bool HwHelperHw::isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) const { return false; diff --git a/shared/source/utilities/CMakeLists.txt b/shared/source/utilities/CMakeLists.txt index a051ab63b9..b3a9434675 100644 --- a/shared/source/utilities/CMakeLists.txt +++ b/shared/source/utilities/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright (C) 2019-2021 Intel Corporation +# Copyright (C) 2019-2022 Intel Corporation # # SPDX-License-Identifier: MIT # @@ -25,6 +25,7 @@ set(NEO_CORE_UTILITIES ${CMAKE_CURRENT_SOURCE_DIR}/io_functions.h ${CMAKE_CURRENT_SOURCE_DIR}/logger.cpp ${CMAKE_CURRENT_SOURCE_DIR}/logger.h + ${CMAKE_CURRENT_SOURCE_DIR}/lookup_array.h ${CMAKE_CURRENT_SOURCE_DIR}/metrics_library.h ${CMAKE_CURRENT_SOURCE_DIR}/numeric.h ${CMAKE_CURRENT_SOURCE_DIR}/perf_counter.h diff --git a/shared/source/utilities/lookup_array.h b/shared/source/utilities/lookup_array.h new file mode 100644 index 0000000000..ee99887065 --- /dev/null +++ b/shared/source/utilities/lookup_array.h @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2022 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/helpers/debug_helpers.h" + +#include +#include + +template +struct LookupArray { + using LookupMapArrayT = std::array, NumElements>; + constexpr LookupArray(const LookupMapArrayT &lookupArray) : lookupArray(lookupArray){}; + + constexpr ValueT lookUp(const KeyT &keyToFind) const { + for (auto &[key, value] : lookupArray) { + if (keyToFind == key) { + return value; + } + } + UNRECOVERABLE_IF(true); + } + + protected: + LookupMapArrayT lookupArray; +}; \ No newline at end of file diff --git a/shared/source/xe_hpc_core/command_encoder_xe_hpc_core.cpp b/shared/source/xe_hpc_core/command_encoder_xe_hpc_core.cpp index df3ddebde0..71a0c9221b 100644 --- a/shared/source/xe_hpc_core/command_encoder_xe_hpc_core.cpp +++ b/shared/source/xe_hpc_core/command_encoder_xe_hpc_core.cpp @@ -12,6 +12,7 @@ #include "shared/source/command_stream/stream_properties.h" #include "shared/source/helpers/constants.h" #include "shared/source/kernel/grf_config.h" +#include "shared/source/utilities/lookup_array.h" #include "shared/source/xe_hpc_core/hw_cmds_base.h" using Family = NEO::XE_HPC_COREFamily; @@ -169,7 +170,17 @@ template <> void EncodeDispatchKernel::programBarrierEnable(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo) { - interfaceDescriptor.setNumberOfBarriers(static_cast(value)); + using BARRIERS = INTERFACE_DESCRIPTOR_DATA::NUMBER_OF_BARRIERS; + static const LookupArray barrierLookupArray({{{0, BARRIERS::NUMBER_OF_BARRIERS_NONE}, + {1, BARRIERS::NUMBER_OF_BARRIERS_B1}, + {2, BARRIERS::NUMBER_OF_BARRIERS_B2}, + {4, BARRIERS::NUMBER_OF_BARRIERS_B4}, + {8, BARRIERS::NUMBER_OF_BARRIERS_B8}, + {16, BARRIERS::NUMBER_OF_BARRIERS_B16}, + {24, BARRIERS::NUMBER_OF_BARRIERS_B24}, + {32, BARRIERS::NUMBER_OF_BARRIERS_B32}}}); + BARRIERS numBarriers = barrierLookupArray.lookUp(value); + interfaceDescriptor.setNumberOfBarriers(numBarriers); } template <> diff --git a/shared/source/xe_hpc_core/hw_helper_xe_hpc_core.cpp b/shared/source/xe_hpc_core/hw_helper_xe_hpc_core.cpp index c426815e74..a645a02a68 100644 --- a/shared/source/xe_hpc_core/hw_helper_xe_hpc_core.cpp +++ b/shared/source/xe_hpc_core/hw_helper_xe_hpc_core.cpp @@ -100,21 +100,6 @@ bool HwHelperHw::tilingAllowed(bool isSharedContext, bool isImage1d, boo return false; } -template <> -uint32_t HwHelperHw::getBarriersCountFromHasBarriers(uint32_t hasBarriers) { - static constexpr uint32_t possibleBarriersCounts[] = { - 0u, // 0 - 1u, // 1 - 2u, // 2 - 4u, // 3 - 8u, // 4 - 16u, // 5 - 24u, // 6 - 32u, // 7 - }; - return possibleBarriersCounts[hasBarriers]; -} - template <> uint32_t HwHelperHw::calculateAvailableThreadCount(PRODUCT_FAMILY family, uint32_t grfCount, uint32_t euCount, uint32_t threadsPerEu) { diff --git a/shared/source/xe_hpg_core/command_encoder_xe_hpg_core.cpp b/shared/source/xe_hpg_core/command_encoder_xe_hpg_core.cpp index 2c4bfb3dee..125a1bfd6e 100644 --- a/shared/source/xe_hpg_core/command_encoder_xe_hpg_core.cpp +++ b/shared/source/xe_hpg_core/command_encoder_xe_hpg_core.cpp @@ -11,6 +11,7 @@ #include "shared/source/command_container/encode_compute_mode_tgllp_and_later.inl" #include "shared/source/command_stream/stream_properties.h" #include "shared/source/os_interface/hw_info_config.h" +#include "shared/source/utilities/lookup_array.h" #include "shared/source/xe_hpg_core/hw_cmds_base.h" using Family = NEO::XE_HPG_COREFamily; @@ -102,7 +103,11 @@ void EncodeDispatchKernel::adjustInterfaceDescriptorData(INTERFACE_DESCR template <> void EncodeDispatchKernel::programBarrierEnable(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo) { - interfaceDescriptor.setNumberOfBarriers(static_cast(value)); + using BARRIERS = INTERFACE_DESCRIPTOR_DATA::NUMBER_OF_BARRIERS; + static const LookupArray barrierLookupArray({{{0, BARRIERS::NUMBER_OF_BARRIERS_NONE}, + {1, BARRIERS::NUMBER_OF_BARRIERS_B1}}}); + BARRIERS numBarriers = barrierLookupArray.lookUp(value); + interfaceDescriptor.setNumberOfBarriers(numBarriers); } template <> diff --git a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_pvc_and_later.cpp b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_pvc_and_later.cpp index 8092d70188..444af8e4ba 100644 --- a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_pvc_and_later.cpp +++ b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_pvc_and_later.cpp @@ -55,16 +55,26 @@ HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenOverrideSlmTotalSizeDebugVari HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenVariousValuesWhenCallingSetBarrierEnableThenCorrectValuesAreSet, IsAtLeastXeHpcCore) { using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; + using BARRIERS = typename INTERFACE_DESCRIPTOR_DATA::NUMBER_OF_BARRIERS; INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData; MockDevice device; auto hwInfo = device.getHardwareInfo(); - uint32_t barrierCounts[] = {0, 1, 2, 7}; - - for (auto barrierCount : barrierCounts) { + struct BarrierCountToBarrierNumEnum { + uint32_t barrierCount; + uint32_t numBarriersEncoding; + }; + constexpr BarrierCountToBarrierNumEnum barriers[8] = {{0, 0}, + {1, 1}, + {2, 2}, + {4, 3}, + {8, 4}, + {16, 5}, + {24, 6}, + {32, 7}}; + for (auto &[barrierCount, numBarriersEnum] : barriers) { EncodeDispatchKernel::programBarrierEnable(idd, barrierCount, hwInfo); - - EXPECT_EQ(barrierCount, idd.getNumberOfBarriers()); + EXPECT_EQ(numBarriersEnum, idd.getNumberOfBarriers()); } }