fix: Correct logic to select internal BCS engine

When BCS3 is not available, use last available copy engine as internal.

Related-To: HSD-18039263936

Signed-off-by: Filip Hazubski <filip.hazubski@intel.com>
This commit is contained in:
Filip Hazubski
2024-07-17 21:39:08 +00:00
committed by Compute-Runtime-Automation
parent d7777ef163
commit 46f9133bf2
9 changed files with 118 additions and 21 deletions

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2019-2020 Intel Corporation
* Copyright (C) 2019-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -41,4 +41,12 @@ constexpr uint64_t shiftLeftBy(uint64_t bitPosition) {
return (1ull << bitPosition);
}
constexpr uint32_t getMostSignificantSetBitIndex(uint64_t field) {
uint32_t index = 0;
while (field >>= 1) {
index++;
}
return index;
}
} // namespace NEO

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2019-2023 Intel Corporation
* Copyright (C) 2019-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -97,6 +97,7 @@ bool isBcsVirtualEngineEnabled(aub_stream::EngineType engineType) {
aub_stream::EngineType getBcsEngineType(const RootDeviceEnvironment &rootDeviceEnvironment, const DeviceBitfield &deviceBitfield, SelectorCopyEngine &selectorCopyEngine, bool internalUsage) {
auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo();
auto &productHelper = rootDeviceEnvironment.getHelper<ProductHelper>();
auto &gfxCoreHelper = rootDeviceEnvironment.getHelper<GfxCoreHelper>();
if (debugManager.flags.ForceBcsEngineIndex.get() != -1) {
auto index = debugManager.flags.ForceBcsEngineIndex.get();
UNRECOVERABLE_IF(index > 8);
@@ -110,11 +111,7 @@ aub_stream::EngineType getBcsEngineType(const RootDeviceEnvironment &rootDeviceE
}
if (internalUsage) {
if (debugManager.flags.ForceBCSForInternalCopyEngine.get() != -1) {
return debugManager.flags.ForceBCSForInternalCopyEngine.get() == 0 ? aub_stream::EngineType::ENGINE_BCS
: static_cast<aub_stream::EngineType>(aub_stream::EngineType::ENGINE_BCS1 + debugManager.flags.ForceBCSForInternalCopyEngine.get() - 1);
}
return aub_stream::ENGINE_BCS3;
return EngineHelpers::mapBcsIndexToEngineType(gfxCoreHelper.getInternalCopyEngineIndex(hwInfo), true);
}
auto enableSelector = productHelper.isCopyEngineSelectorEnabled(hwInfo);

View File

@@ -93,6 +93,7 @@ class GfxCoreHelper {
bool forceNonAuxMode,
bool useL1Cache) const = 0;
virtual const EngineInstancesContainer getGpgpuEngineInstances(const RootDeviceEnvironment &rootDeviceEnvironment) const = 0;
virtual uint32_t getInternalCopyEngineIndex(const HardwareInfo &hwInfo) const = 0;
virtual EngineGroupType getEngineGroupType(aub_stream::EngineType engineType, EngineUsage engineUsage, const HardwareInfo &hwInfo) const = 0;
virtual const StackVec<size_t, 3> getDeviceSubGroupSizes() const = 0;
virtual bool getEnableLocalMemory(const HardwareInfo &hwInfo) const = 0;
@@ -289,6 +290,8 @@ class GfxCoreHelperHw : public GfxCoreHelper {
const EngineInstancesContainer getGpgpuEngineInstances(const RootDeviceEnvironment &rootDeviceEnvironment) const override;
uint32_t getInternalCopyEngineIndex(const HardwareInfo &hwInfo) const override;
EngineGroupType getEngineGroupType(aub_stream::EngineType engineType, EngineUsage engineUsage, const HardwareInfo &hwInfo) const override;
const StackVec<size_t, 3> getDeviceSubGroupSizes() const override;

View File

@@ -13,6 +13,7 @@
#include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/helpers/aligned_memory.h"
#include "shared/source/helpers/basic_math.h"
#include "shared/source/helpers/bit_helpers.h"
#include "shared/source/helpers/constants.h"
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/hw_info.h"
@@ -814,4 +815,15 @@ bool GfxCoreHelperHw<GfxFamily>::usmCompressionSupported(const NEO::HardwareInfo
return false;
}
template <typename Family>
uint32_t GfxCoreHelperHw<Family>::getInternalCopyEngineIndex(const HardwareInfo &hwInfo) const {
if (debugManager.flags.ForceBCSForInternalCopyEngine.get() != -1) {
return debugManager.flags.ForceBCSForInternalCopyEngine.get();
}
constexpr uint32_t defaultInternalCopyEngineIndex = 3u;
auto highestAvailableIndex = getMostSignificantSetBitIndex(hwInfo.featureTable.ftrBcsInfo.to_ullong());
return std::min(defaultInternalCopyEngineIndex, highestAvailableIndex);
}
} // namespace NEO

View File

@@ -68,16 +68,13 @@ const EngineInstancesContainer GfxCoreHelperHw<Family>::getGpgpuEngineInstances(
engines.push_back({aub_stream::EngineType::ENGINE_BCS, EngineUsage::internal}); // Internal usage
}
uint32_t internalIndex = getInternalCopyEngineIndex(hwInfo);
for (uint32_t i = 1; i < hwInfo.featureTable.ftrBcsInfo.size(); i++) {
if (hwInfo.featureTable.ftrBcsInfo.test(i)) {
auto engineType = static_cast<aub_stream::EngineType>((i - 1) + aub_stream::ENGINE_BCS1); // Link copy engine
engines.push_back({engineType, EngineUsage::regular});
uint32_t internalIndex = 3;
if (debugManager.flags.ForceBCSForInternalCopyEngine.get() != -1) {
internalIndex = debugManager.flags.ForceBCSForInternalCopyEngine.get();
}
if (i == internalIndex) {
engines.push_back({engineType, EngineUsage::internal}); // BCS3 for internal usage
engines.push_back({engineType, EngineUsage::internal});
}
}
}

View File

@@ -72,16 +72,13 @@ const EngineInstancesContainer GfxCoreHelperHw<Family>::getGpgpuEngineInstances(
engines.push_back({aub_stream::EngineType::ENGINE_BCS, EngineUsage::internal}); // Internal usage
}
uint32_t internalIndex = getInternalCopyEngineIndex(hwInfo);
for (uint32_t i = 1; i < hwInfo.featureTable.ftrBcsInfo.size(); i++) {
if (hwInfo.featureTable.ftrBcsInfo.test(i)) {
auto engineType = static_cast<aub_stream::EngineType>((i - 1) + aub_stream::ENGINE_BCS1); // Link copy engine
engines.push_back({engineType, EngineUsage::regular});
uint32_t internalIndex = 3;
if (debugManager.flags.ForceBCSForInternalCopyEngine.get() != -1) {
internalIndex = debugManager.flags.ForceBCSForInternalCopyEngine.get();
}
if (i == internalIndex) {
engines.push_back({engineType, EngineUsage::internal}); // BCS3 for internal usage
engines.push_back({engineType, EngineUsage::internal});
}
}
}

View File

@@ -1,11 +1,12 @@
/*
* Copyright (C) 2019-2021 Intel Corporation
* Copyright (C) 2019-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/helpers/bit_helpers.h"
#include "shared/source/helpers/constants.h"
#include "gtest/gtest.h"
@@ -197,3 +198,23 @@ TEST(SetBitsTests, givenDifferentValuesWhenTestingSetBitsThenCorrectValueIsRetur
EXPECT_EQ(0b0u, setBits(0b1010, false, 0b1010));
EXPECT_EQ(0b1010u, setBits(0b1010, true, 0b1010));
}
TEST(GetMsbIndexTests, givenDifferentValuesWhenTestingGetMostSignificantSetBitIndexThenCorrectValueIsReturned) {
EXPECT_EQ(0u, getMostSignificantSetBitIndex(0b0));
EXPECT_EQ(0u, getMostSignificantSetBitIndex(0b1));
EXPECT_EQ(1u, getMostSignificantSetBitIndex(0b10));
EXPECT_EQ(2u, getMostSignificantSetBitIndex(0b100));
EXPECT_EQ(3u, getMostSignificantSetBitIndex(0b1000));
EXPECT_EQ(3u, getMostSignificantSetBitIndex(0b1001));
EXPECT_EQ(3u, getMostSignificantSetBitIndex(0b1010));
EXPECT_EQ(3u, getMostSignificantSetBitIndex(0b1100));
EXPECT_EQ(3u, getMostSignificantSetBitIndex(0b1101));
EXPECT_EQ(6u, getMostSignificantSetBitIndex(maxNBitValue(7)));
EXPECT_EQ(7u, getMostSignificantSetBitIndex(maxNBitValue(8)));
EXPECT_EQ(8u, getMostSignificantSetBitIndex(maxNBitValue(9)));
EXPECT_EQ(16u, getMostSignificantSetBitIndex(maxNBitValue(17)));
EXPECT_EQ(32u, getMostSignificantSetBitIndex(maxNBitValue(33)));
EXPECT_EQ(63u, getMostSignificantSetBitIndex(maxNBitValue(64)));
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2023 Intel Corporation
* Copyright (C) 2021-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -163,7 +163,7 @@ TEST(EngineNodeHelperTest, givenLinkCopyEnginesAndInternalUsageEnabledWhenGettin
auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0];
auto &hwInfo = *rootDeviceEnvironment.getMutableHardwareInfo();
DeviceBitfield deviceBitfield = 0b11;
hwInfo.featureTable.ftrBcsInfo = 0b111;
hwInfo.featureTable.ftrBcsInfo = 0b1111;
auto isInternalUsage = true;
EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS3, EngineHelpers::getBcsEngineType(rootDeviceEnvironment, deviceBitfield, selectorCopyEngine, isInternalUsage));
EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS3, EngineHelpers::getBcsEngineType(rootDeviceEnvironment, deviceBitfield, selectorCopyEngine, isInternalUsage));

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2023 Intel Corporation
* Copyright (C) 2021-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -64,6 +64,68 @@ HWTEST2_F(EngineNodeHelperTestsXeHPAndLater, givenForceBCSForInternalCopyEngineW
}
}
HWTEST2_F(EngineNodeHelperTestsXeHPAndLater, givenLessThanFourCopyEnginesWhenGetBcsEngineTypeForInternalEngineThenReturnLastAvailableEngine, IsAtLeastXeHpcCore) {
auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment();
auto &hwInfo = *rootDeviceEnvironment.getMutableHardwareInfo();
hwInfo.capabilityTable.blitterOperationsSupported = true;
auto &selectorCopyEngine = pDevice->getNearestGenericSubDevice(0)->getSelectorCopyEngine();
DeviceBitfield deviceBitfield = 0xff;
{
hwInfo.featureTable.ftrBcsInfo = 0b1;
auto engineType = EngineHelpers::getBcsEngineType(rootDeviceEnvironment, deviceBitfield, selectorCopyEngine, true);
EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS, engineType);
}
{
hwInfo.featureTable.ftrBcsInfo = 0b11;
auto engineType = EngineHelpers::getBcsEngineType(rootDeviceEnvironment, deviceBitfield, selectorCopyEngine, true);
EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS1, engineType);
}
{
hwInfo.featureTable.ftrBcsInfo = 0b111;
auto engineType = EngineHelpers::getBcsEngineType(rootDeviceEnvironment, deviceBitfield, selectorCopyEngine, true);
EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS2, engineType);
}
}
HWTEST2_F(EngineNodeHelperTestsXeHPAndLater, givenLessThanFourCopyEnginesWhenGetGpgpuEngineInstancesThenUseLastCopyEngineAsInternal, IsAtLeastXeHpcCore) {
auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment();
auto &hwInfo = *rootDeviceEnvironment.getMutableHardwareInfo();
hwInfo.capabilityTable.blitterOperationsSupported = true;
auto &gfxCoreHelper = pDevice->getGfxCoreHelper();
auto &productHelper = rootDeviceEnvironment.getProductHelper();
auto hasInternalEngine = [](const EngineInstancesContainer &engines, aub_stream::EngineType expectedEngineType) {
for (auto &[engineType, engineUsage] : engines) {
if (engineType == expectedEngineType && engineUsage == EngineUsage::internal) {
return true;
}
}
return false;
};
{
if (aub_stream::EngineType::ENGINE_BCS == productHelper.getDefaultCopyEngine()) {
hwInfo.featureTable.ftrBcsInfo = 0b1;
auto &engines = gfxCoreHelper.getGpgpuEngineInstances(rootDeviceEnvironment);
EXPECT_TRUE(hasInternalEngine(engines, aub_stream::EngineType::ENGINE_BCS));
EXPECT_FALSE(hasInternalEngine(engines, aub_stream::EngineType::ENGINE_BCS3));
}
}
{
hwInfo.featureTable.ftrBcsInfo = 0b11;
auto &engines = gfxCoreHelper.getGpgpuEngineInstances(rootDeviceEnvironment);
EXPECT_TRUE(hasInternalEngine(engines, aub_stream::EngineType::ENGINE_BCS1));
EXPECT_FALSE(hasInternalEngine(engines, aub_stream::EngineType::ENGINE_BCS3));
}
{
hwInfo.featureTable.ftrBcsInfo = 0b111;
auto &engines = gfxCoreHelper.getGpgpuEngineInstances(rootDeviceEnvironment);
EXPECT_TRUE(hasInternalEngine(engines, aub_stream::EngineType::ENGINE_BCS2));
EXPECT_FALSE(hasInternalEngine(engines, aub_stream::EngineType::ENGINE_BCS3));
}
}
HWTEST2_F(EngineNodeHelperTestsXeHPAndLater, givenEnableCmdQRoundRobindBcsEngineAssignWhenSelectLinkCopyEngineThenRoundRobinOverAllAvailableLinkedCopyEngines, IsAtLeastXeHpCore) {
DebugManagerStateRestore restore;
debugManager.flags.EnableCmdQRoundRobindBcsEngineAssign.set(1u);