mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-29 00:58:39 +08:00
Encode number barriers
When programming number of barriers use BARRIER_SIZE enumeration. Resolves: NEO-6785 Signed-off-by: Krystian Chmielewski <krystian.chmielewski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
81739c0265
commit
2c1bfbb5b2
@@ -428,7 +428,7 @@ ze_result_t KernelImp::suggestMaxCooperativeGroupCount(uint32_t *totalGroupCount
|
||||
dssCount * KB * hardwareInfo.capabilityTable.slmSize,
|
||||
hwHelper.alignSlmSize(slmArgsTotalSize + descriptor.kernelAttributes.slmInlineSize),
|
||||
static_cast<uint32_t>(hwHelper.getMaxBarrierRegisterPerSlice()),
|
||||
hwHelper.getBarriersCountFromHasBarriers(barrierCount),
|
||||
barrierCount,
|
||||
workDim,
|
||||
localWorkSize);
|
||||
*totalGroupCount = hwHelper.adjustMaxWorkGroupCount(*totalGroupCount, engineGroupType, hardwareInfo, isEngineInstanced);
|
||||
|
||||
@@ -1076,7 +1076,7 @@ uint32_t Kernel::getMaxWorkGroupCount(const cl_uint workDim, const size_t *local
|
||||
dssCount * KB * hardwareInfo.capabilityTable.slmSize,
|
||||
hwHelper.alignSlmSize(slmTotalSize),
|
||||
static_cast<uint32_t>(hwHelper.getMaxBarrierRegisterPerSlice()),
|
||||
hwHelper.getBarriersCountFromHasBarriers(barrierCount),
|
||||
barrierCount,
|
||||
workDim,
|
||||
localWorkSize);
|
||||
auto isEngineInstanced = commandQueue->getGpgpuCommandStreamReceiver().getOsContext().isEngineInstanced();
|
||||
|
||||
@@ -950,12 +950,6 @@ HWTEST_F(HwHelperTest, WhenIsBankOverrideRequiredIsCalledThenFalseIsReturned) {
|
||||
EXPECT_FALSE(hwHelper.isBankOverrideRequired(hardwareInfo));
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_GEN8_CORE, HwHelperTest, GivenVariousValuesWhenCallingGetBarriersCountFromHasBarrierThenCorrectValueIsReturned) {
|
||||
auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
|
||||
EXPECT_EQ(0u, hwHelper.getBarriersCountFromHasBarriers(0u));
|
||||
EXPECT_EQ(1u, hwHelper.getBarriersCountFromHasBarriers(1u));
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_GEN8_CORE, HwHelperTest, GivenVariousValuesWhenCallingCalculateAvailableThreadCountThenCorrectValueIsReturned) {
|
||||
auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
|
||||
auto result = hwHelper.calculateAvailableThreadCount(
|
||||
|
||||
@@ -72,20 +72,6 @@ HWTEST2_F(HwHelperTestPvcAndLater, GivenVariousValuesWhenCallingCalculateAvailab
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(HwHelperTestPvcAndLater, GivenVariousValuesWhenCallingGetBarriersCountFromHasBarrierThenCorrectValueIsReturned, IsAtLeastXeHpcCore) {
|
||||
auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
|
||||
|
||||
EXPECT_EQ(0u, hwHelper.getBarriersCountFromHasBarriers(0u));
|
||||
EXPECT_EQ(1u, hwHelper.getBarriersCountFromHasBarriers(1u));
|
||||
|
||||
EXPECT_EQ(2u, hwHelper.getBarriersCountFromHasBarriers(2u));
|
||||
EXPECT_EQ(4u, hwHelper.getBarriersCountFromHasBarriers(3u));
|
||||
EXPECT_EQ(8u, hwHelper.getBarriersCountFromHasBarriers(4u));
|
||||
EXPECT_EQ(16u, hwHelper.getBarriersCountFromHasBarriers(5u));
|
||||
EXPECT_EQ(24u, hwHelper.getBarriersCountFromHasBarriers(6u));
|
||||
EXPECT_EQ(32u, hwHelper.getBarriersCountFromHasBarriers(7u));
|
||||
}
|
||||
|
||||
HWTEST2_F(HwHelperTestPvcAndLater, givenHwHelperWhenCheckIsUpdateTaskCountFromWaitSupportedThenReturnsTrue, IsAtLeastXeHpcCore) {
|
||||
auto &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily);
|
||||
|
||||
|
||||
@@ -87,7 +87,6 @@ class HwHelper {
|
||||
virtual uint32_t getMetricsLibraryGenId() const = 0;
|
||||
virtual uint32_t getMocsIndex(const GmmHelper &gmmHelper, bool l3enabled, bool l1enabled) const = 0;
|
||||
virtual bool tilingAllowed(bool isSharedContext, bool isImage1d, bool forceLinearStorage) = 0;
|
||||
virtual uint32_t getBarriersCountFromHasBarriers(uint32_t hasBarriers) = 0;
|
||||
virtual uint32_t calculateAvailableThreadCount(PRODUCT_FAMILY family, uint32_t grfCount, uint32_t euCount,
|
||||
uint32_t threadsPerEu) = 0;
|
||||
virtual uint32_t alignSlmSize(uint32_t slmSize) = 0;
|
||||
@@ -286,8 +285,6 @@ class HwHelperHw : public HwHelper {
|
||||
|
||||
bool tilingAllowed(bool isSharedContext, bool isImage1d, bool forceLinearStorage) override;
|
||||
|
||||
uint32_t getBarriersCountFromHasBarriers(uint32_t hasBarriers) override;
|
||||
|
||||
uint32_t calculateAvailableThreadCount(PRODUCT_FAMILY family, uint32_t grfCount, uint32_t euCount, uint32_t threadsPerEu) override;
|
||||
|
||||
uint32_t alignSlmSize(uint32_t slmSize) override;
|
||||
|
||||
@@ -446,11 +446,6 @@ uint32_t HwHelperHw<GfxFamily>::computeSlmValues(const HardwareInfo &hwInfo, uin
|
||||
return value * !!slmSize;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
uint32_t HwHelperHw<GfxFamily>::getBarriersCountFromHasBarriers(uint32_t hasBarriers) {
|
||||
return hasBarriers;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
inline bool HwHelperHw<GfxFamily>::isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) const {
|
||||
return false;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# Copyright (C) 2019-2021 Intel Corporation
|
||||
# Copyright (C) 2019-2022 Intel Corporation
|
||||
#
|
||||
# SPDX-License-Identifier: MIT
|
||||
#
|
||||
@@ -25,6 +25,7 @@ set(NEO_CORE_UTILITIES
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/io_functions.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/logger.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/logger.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/lookup_array.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/metrics_library.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/numeric.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/perf_counter.h
|
||||
|
||||
29
shared/source/utilities/lookup_array.h
Normal file
29
shared/source/utilities/lookup_array.h
Normal file
@@ -0,0 +1,29 @@
|
||||
/*
|
||||
* Copyright (C) 2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/helpers/debug_helpers.h"
|
||||
|
||||
#include <array>
|
||||
#include <utility>
|
||||
|
||||
template <typename KeyT, typename ValueT, size_t NumElements>
|
||||
struct LookupArray {
|
||||
using LookupMapArrayT = std::array<std::pair<KeyT, ValueT>, NumElements>;
|
||||
constexpr LookupArray(const LookupMapArrayT &lookupArray) : lookupArray(lookupArray){};
|
||||
|
||||
constexpr ValueT lookUp(const KeyT &keyToFind) const {
|
||||
for (auto &[key, value] : lookupArray) {
|
||||
if (keyToFind == key) {
|
||||
return value;
|
||||
}
|
||||
}
|
||||
UNRECOVERABLE_IF(true);
|
||||
}
|
||||
|
||||
protected:
|
||||
LookupMapArrayT lookupArray;
|
||||
};
|
||||
@@ -12,6 +12,7 @@
|
||||
#include "shared/source/command_stream/stream_properties.h"
|
||||
#include "shared/source/helpers/constants.h"
|
||||
#include "shared/source/kernel/grf_config.h"
|
||||
#include "shared/source/utilities/lookup_array.h"
|
||||
#include "shared/source/xe_hpc_core/hw_cmds_base.h"
|
||||
|
||||
using Family = NEO::XE_HPC_COREFamily;
|
||||
@@ -169,7 +170,17 @@ template <>
|
||||
void EncodeDispatchKernel<Family>::programBarrierEnable(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor,
|
||||
uint32_t value,
|
||||
const HardwareInfo &hwInfo) {
|
||||
interfaceDescriptor.setNumberOfBarriers(static_cast<INTERFACE_DESCRIPTOR_DATA::NUMBER_OF_BARRIERS>(value));
|
||||
using BARRIERS = INTERFACE_DESCRIPTOR_DATA::NUMBER_OF_BARRIERS;
|
||||
static const LookupArray<uint32_t, BARRIERS, 8> barrierLookupArray({{{0, BARRIERS::NUMBER_OF_BARRIERS_NONE},
|
||||
{1, BARRIERS::NUMBER_OF_BARRIERS_B1},
|
||||
{2, BARRIERS::NUMBER_OF_BARRIERS_B2},
|
||||
{4, BARRIERS::NUMBER_OF_BARRIERS_B4},
|
||||
{8, BARRIERS::NUMBER_OF_BARRIERS_B8},
|
||||
{16, BARRIERS::NUMBER_OF_BARRIERS_B16},
|
||||
{24, BARRIERS::NUMBER_OF_BARRIERS_B24},
|
||||
{32, BARRIERS::NUMBER_OF_BARRIERS_B32}}});
|
||||
BARRIERS numBarriers = barrierLookupArray.lookUp(value);
|
||||
interfaceDescriptor.setNumberOfBarriers(numBarriers);
|
||||
}
|
||||
|
||||
template <>
|
||||
|
||||
@@ -100,21 +100,6 @@ bool HwHelperHw<Family>::tilingAllowed(bool isSharedContext, bool isImage1d, boo
|
||||
return false;
|
||||
}
|
||||
|
||||
template <>
|
||||
uint32_t HwHelperHw<Family>::getBarriersCountFromHasBarriers(uint32_t hasBarriers) {
|
||||
static constexpr uint32_t possibleBarriersCounts[] = {
|
||||
0u, // 0
|
||||
1u, // 1
|
||||
2u, // 2
|
||||
4u, // 3
|
||||
8u, // 4
|
||||
16u, // 5
|
||||
24u, // 6
|
||||
32u, // 7
|
||||
};
|
||||
return possibleBarriersCounts[hasBarriers];
|
||||
}
|
||||
|
||||
template <>
|
||||
uint32_t HwHelperHw<Family>::calculateAvailableThreadCount(PRODUCT_FAMILY family, uint32_t grfCount, uint32_t euCount,
|
||||
uint32_t threadsPerEu) {
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
#include "shared/source/command_container/encode_compute_mode_tgllp_and_later.inl"
|
||||
#include "shared/source/command_stream/stream_properties.h"
|
||||
#include "shared/source/os_interface/hw_info_config.h"
|
||||
#include "shared/source/utilities/lookup_array.h"
|
||||
#include "shared/source/xe_hpg_core/hw_cmds_base.h"
|
||||
|
||||
using Family = NEO::XE_HPG_COREFamily;
|
||||
@@ -102,7 +103,11 @@ void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData(INTERFACE_DESCR
|
||||
|
||||
template <>
|
||||
void EncodeDispatchKernel<Family>::programBarrierEnable(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo) {
|
||||
interfaceDescriptor.setNumberOfBarriers(static_cast<INTERFACE_DESCRIPTOR_DATA::NUMBER_OF_BARRIERS>(value));
|
||||
using BARRIERS = INTERFACE_DESCRIPTOR_DATA::NUMBER_OF_BARRIERS;
|
||||
static const LookupArray<uint32_t, BARRIERS, 8> barrierLookupArray({{{0, BARRIERS::NUMBER_OF_BARRIERS_NONE},
|
||||
{1, BARRIERS::NUMBER_OF_BARRIERS_B1}}});
|
||||
BARRIERS numBarriers = barrierLookupArray.lookUp(value);
|
||||
interfaceDescriptor.setNumberOfBarriers(numBarriers);
|
||||
}
|
||||
|
||||
template <>
|
||||
|
||||
@@ -55,16 +55,26 @@ HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenOverrideSlmTotalSizeDebugVari
|
||||
|
||||
HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenVariousValuesWhenCallingSetBarrierEnableThenCorrectValuesAreSet, IsAtLeastXeHpcCore) {
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
||||
using BARRIERS = typename INTERFACE_DESCRIPTOR_DATA::NUMBER_OF_BARRIERS;
|
||||
INTERFACE_DESCRIPTOR_DATA idd = FamilyType::cmdInitInterfaceDescriptorData;
|
||||
MockDevice device;
|
||||
auto hwInfo = device.getHardwareInfo();
|
||||
|
||||
uint32_t barrierCounts[] = {0, 1, 2, 7};
|
||||
|
||||
for (auto barrierCount : barrierCounts) {
|
||||
struct BarrierCountToBarrierNumEnum {
|
||||
uint32_t barrierCount;
|
||||
uint32_t numBarriersEncoding;
|
||||
};
|
||||
constexpr BarrierCountToBarrierNumEnum barriers[8] = {{0, 0},
|
||||
{1, 1},
|
||||
{2, 2},
|
||||
{4, 3},
|
||||
{8, 4},
|
||||
{16, 5},
|
||||
{24, 6},
|
||||
{32, 7}};
|
||||
for (auto &[barrierCount, numBarriersEnum] : barriers) {
|
||||
EncodeDispatchKernel<FamilyType>::programBarrierEnable(idd, barrierCount, hwInfo);
|
||||
|
||||
EXPECT_EQ(barrierCount, idd.getNumberOfBarriers());
|
||||
EXPECT_EQ(numBarriersEnum, idd.getNumberOfBarriers());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user