Encode number barriers

When programming number of barriers use BARRIER_SIZE enumeration.
Resolves: NEO-6785

Signed-off-by: Krystian Chmielewski <krystian.chmielewski@intel.com>
This commit is contained in:
Krystian Chmielewski
2022-04-04 09:11:39 +00:00
committed by Compute-Runtime-Automation
parent 81739c0265
commit 2c1bfbb5b2
12 changed files with 66 additions and 53 deletions

View File

@@ -428,7 +428,7 @@ ze_result_t KernelImp::suggestMaxCooperativeGroupCount(uint32_t *totalGroupCount
dssCount * KB * hardwareInfo.capabilityTable.slmSize,
hwHelper.alignSlmSize(slmArgsTotalSize + descriptor.kernelAttributes.slmInlineSize),
static_cast<uint32_t>(hwHelper.getMaxBarrierRegisterPerSlice()),
hwHelper.getBarriersCountFromHasBarriers(barrierCount),
barrierCount,
workDim,
localWorkSize);
*totalGroupCount = hwHelper.adjustMaxWorkGroupCount(*totalGroupCount, engineGroupType, hardwareInfo, isEngineInstanced);

View File

@@ -1076,7 +1076,7 @@ uint32_t Kernel::getMaxWorkGroupCount(const cl_uint workDim, const size_t *local
dssCount * KB * hardwareInfo.capabilityTable.slmSize,
hwHelper.alignSlmSize(slmTotalSize),
static_cast<uint32_t>(hwHelper.getMaxBarrierRegisterPerSlice()),
hwHelper.getBarriersCountFromHasBarriers(barrierCount),
barrierCount,
workDim,
localWorkSize);
auto isEngineInstanced = commandQueue->getGpgpuCommandStreamReceiver().getOsContext().isEngineInstanced();

View File

@@ -950,12 +950,6 @@ HWTEST_F(HwHelperTest, WhenIsBankOverrideRequiredIsCalledThenFalseIsReturned) {
EXPECT_FALSE(hwHelper.isBankOverrideRequired(hardwareInfo));
}
HWCMDTEST_F(IGFX_GEN8_CORE, HwHelperTest, GivenVariousValuesWhenCallingGetBarriersCountFromHasBarrierThenCorrectValueIsReturned) {
auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
EXPECT_EQ(0u, hwHelper.getBarriersCountFromHasBarriers(0u));
EXPECT_EQ(1u, hwHelper.getBarriersCountFromHasBarriers(1u));
}
HWCMDTEST_F(IGFX_GEN8_CORE, HwHelperTest, GivenVariousValuesWhenCallingCalculateAvailableThreadCountThenCorrectValueIsReturned) {
auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
auto result = hwHelper.calculateAvailableThreadCount(

View File

@@ -72,20 +72,6 @@ HWTEST2_F(HwHelperTestPvcAndLater, GivenVariousValuesWhenCallingCalculateAvailab
}
}
HWTEST2_F(HwHelperTestPvcAndLater, GivenVariousValuesWhenCallingGetBarriersCountFromHasBarrierThenCorrectValueIsReturned, IsAtLeastXeHpcCore) {
auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
EXPECT_EQ(0u, hwHelper.getBarriersCountFromHasBarriers(0u));
EXPECT_EQ(1u, hwHelper.getBarriersCountFromHasBarriers(1u));
EXPECT_EQ(2u, hwHelper.getBarriersCountFromHasBarriers(2u));
EXPECT_EQ(4u, hwHelper.getBarriersCountFromHasBarriers(3u));
EXPECT_EQ(8u, hwHelper.getBarriersCountFromHasBarriers(4u));
EXPECT_EQ(16u, hwHelper.getBarriersCountFromHasBarriers(5u));
EXPECT_EQ(24u, hwHelper.getBarriersCountFromHasBarriers(6u));
EXPECT_EQ(32u, hwHelper.getBarriersCountFromHasBarriers(7u));
}
HWTEST2_F(HwHelperTestPvcAndLater, givenHwHelperWhenCheckIsUpdateTaskCountFromWaitSupportedThenReturnsTrue, IsAtLeastXeHpcCore) {
auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);

View File

@@ -87,7 +87,6 @@ class HwHelper {
virtual uint32_t getMetricsLibraryGenId() const = 0;
virtual uint32_t getMocsIndex(const GmmHelper &gmmHelper, bool l3enabled, bool l1enabled) const = 0;
virtual bool tilingAllowed(bool isSharedContext, bool isImage1d, bool forceLinearStorage) = 0;
virtual uint32_t getBarriersCountFromHasBarriers(uint32_t hasBarriers) = 0;
virtual uint32_t calculateAvailableThreadCount(PRODUCT_FAMILY family, uint32_t grfCount, uint32_t euCount,
uint32_t threadsPerEu) = 0;
virtual uint32_t alignSlmSize(uint32_t slmSize) = 0;
@@ -286,8 +285,6 @@ class HwHelperHw : public HwHelper {
bool tilingAllowed(bool isSharedContext, bool isImage1d, bool forceLinearStorage) override;
uint32_t getBarriersCountFromHasBarriers(uint32_t hasBarriers) override;
uint32_t calculateAvailableThreadCount(PRODUCT_FAMILY family, uint32_t grfCount, uint32_t euCount, uint32_t threadsPerEu) override;
uint32_t alignSlmSize(uint32_t slmSize) override;

View File

@@ -446,11 +446,6 @@ uint32_t HwHelperHw<GfxFamily>::computeSlmValues(const HardwareInfo &hwInfo, uin
return value * !!slmSize;
}
template <typename GfxFamily>
uint32_t HwHelperHw<GfxFamily>::getBarriersCountFromHasBarriers(uint32_t hasBarriers) {
return hasBarriers;
}
template <typename GfxFamily>
inline bool HwHelperHw<GfxFamily>::isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) const {
return false;

View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2019-2021 Intel Corporation
# Copyright (C) 2019-2022 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
@@ -25,6 +25,7 @@ set(NEO_CORE_UTILITIES
${CMAKE_CURRENT_SOURCE_DIR}/io_functions.h
${CMAKE_CURRENT_SOURCE_DIR}/logger.cpp
${CMAKE_CURRENT_SOURCE_DIR}/logger.h
${CMAKE_CURRENT_SOURCE_DIR}/lookup_array.h
${CMAKE_CURRENT_SOURCE_DIR}/metrics_library.h
${CMAKE_CURRENT_SOURCE_DIR}/numeric.h
${CMAKE_CURRENT_SOURCE_DIR}/perf_counter.h

View File

@@ -0,0 +1,29 @@
/*
* Copyright (C) 2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/helpers/debug_helpers.h"
#include <array>
#include <utility>
template <typename KeyT, typename ValueT, size_t NumElements>
struct LookupArray {
using LookupMapArrayT = std::array<std::pair<KeyT, ValueT>, NumElements>;
constexpr LookupArray(const LookupMapArrayT &lookupArray) : lookupArray(lookupArray){};
constexpr ValueT lookUp(const KeyT &keyToFind) const {
for (auto &[key, value] : lookupArray) {
if (keyToFind == key) {
return value;
}
}
UNRECOVERABLE_IF(true);
}
protected:
LookupMapArrayT lookupArray;
};

View File

@@ -12,6 +12,7 @@
#include "shared/source/command_stream/stream_properties.h"
#include "shared/source/helpers/constants.h"
#include "shared/source/kernel/grf_config.h"
#include "shared/source/utilities/lookup_array.h"
#include "shared/source/xe_hpc_core/hw_cmds_base.h"
using Family = NEO::XE_HPC_COREFamily;
@@ -169,7 +170,17 @@ template <>
void EncodeDispatchKernel<Family>::programBarrierEnable(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor,
uint32_t value,
const HardwareInfo &hwInfo) {
interfaceDescriptor.setNumberOfBarriers(static_cast<INTERFACE_DESCRIPTOR_DATA::NUMBER_OF_BARRIERS>(value));
using BARRIERS = INTERFACE_DESCRIPTOR_DATA::NUMBER_OF_BARRIERS;
static const LookupArray<uint32_t, BARRIERS, 8> barrierLookupArray({{{0, BARRIERS::NUMBER_OF_BARRIERS_NONE},
{1, BARRIERS::NUMBER_OF_BARRIERS_B1},
{2, BARRIERS::NUMBER_OF_BARRIERS_B2},
{4, BARRIERS::NUMBER_OF_BARRIERS_B4},
{8, BARRIERS::NUMBER_OF_BARRIERS_B8},
{16, BARRIERS::NUMBER_OF_BARRIERS_B16},
{24, BARRIERS::NUMBER_OF_BARRIERS_B24},
{32, BARRIERS::NUMBER_OF_BARRIERS_B32}}});
BARRIERS numBarriers = barrierLookupArray.lookUp(value);
interfaceDescriptor.setNumberOfBarriers(numBarriers);
}
template <>

View File

@@ -100,21 +100,6 @@ bool HwHelperHw<Family>::tilingAllowed(bool isSharedContext, bool isImage1d, boo
return false;
}
template <>
uint32_t HwHelperHw<Family>::getBarriersCountFromHasBarriers(uint32_t hasBarriers) {
static constexpr uint32_t possibleBarriersCounts[] = {
0u, // 0
1u, // 1
2u, // 2
4u, // 3
8u, // 4
16u, // 5
24u, // 6
32u, // 7
};
return possibleBarriersCounts[hasBarriers];
}
template <>
uint32_t HwHelperHw<Family>::calculateAvailableThreadCount(PRODUCT_FAMILY family, uint32_t grfCount, uint32_t euCount,
uint32_t threadsPerEu) {

View File

@@ -11,6 +11,7 @@
#include "shared/source/command_container/encode_compute_mode_tgllp_and_later.inl"
#include "shared/source/command_stream/stream_properties.h"
#include "shared/source/os_interface/hw_info_config.h"
#include "shared/source/utilities/lookup_array.h"
#include "shared/source/xe_hpg_core/hw_cmds_base.h"
using Family = NEO::XE_HPG_COREFamily;
@@ -102,7 +103,11 @@ void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData(INTERFACE_DESCR
template <>
void EncodeDispatchKernel<Family>::programBarrierEnable(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo) {
interfaceDescriptor.setNumberOfBarriers(static_cast<INTERFACE_DESCRIPTOR_DATA::NUMBER_OF_BARRIERS>(value));
using BARRIERS = INTERFACE_DESCRIPTOR_DATA::NUMBER_OF_BARRIERS;
static const LookupArray<uint32_t, BARRIERS, 8> barrierLookupArray({{{0, BARRIERS::NUMBER_OF_BARRIERS_NONE},
{1, BARRIERS::NUMBER_OF_BARRIERS_B1}}});
BARRIERS numBarriers = barrierLookupArray.lookUp(value);
interfaceDescriptor.setNumberOfBarriers(numBarriers);
}
template <>

View File

@@ -55,16 +55,26 @@ HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenOverrideSlmTotalSizeDebugVari
HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenVariousValuesWhenCallingSetBarrierEnableThenCorrectValuesAreSet, IsAtLeastXeHpcCore) {
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
using BARRIERS = typename INTERFACE_DESCRIPTOR_DATA::NUMBER_OF_BARRIERS;
INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData;
MockDevice device;
auto hwInfo = device.getHardwareInfo();
uint32_t barrierCounts[] = {0, 1, 2, 7};
for (auto barrierCount : barrierCounts) {
struct BarrierCountToBarrierNumEnum {
uint32_t barrierCount;
uint32_t numBarriersEncoding;
};
constexpr BarrierCountToBarrierNumEnum barriers[8] = {{0, 0},
{1, 1},
{2, 2},
{4, 3},
{8, 4},
{16, 5},
{24, 6},
{32, 7}};
for (auto &[barrierCount, numBarriersEnum] : barriers) {
EncodeDispatchKernel<FamilyType>::programBarrierEnable(idd, barrierCount, hwInfo);
EXPECT_EQ(barrierCount, idd.getNumberOfBarriers());
EXPECT_EQ(numBarriersEnum, idd.getNumberOfBarriers());
}
}