mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-19 06:24:51 +08:00
fix: Correct logic to select internal BCS engine
When BCS3 is not available, use last available copy engine as internal. Related-To: HSD-18039263936 Signed-off-by: Filip Hazubski <filip.hazubski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
d7777ef163
commit
46f9133bf2
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2020 Intel Corporation
|
||||
* Copyright (C) 2019-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -41,4 +41,12 @@ constexpr uint64_t shiftLeftBy(uint64_t bitPosition) {
|
||||
return (1ull << bitPosition);
|
||||
}
|
||||
|
||||
constexpr uint32_t getMostSignificantSetBitIndex(uint64_t field) {
|
||||
uint32_t index = 0;
|
||||
while (field >>= 1) {
|
||||
index++;
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2023 Intel Corporation
|
||||
* Copyright (C) 2019-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -97,6 +97,7 @@ bool isBcsVirtualEngineEnabled(aub_stream::EngineType engineType) {
|
||||
aub_stream::EngineType getBcsEngineType(const RootDeviceEnvironment &rootDeviceEnvironment, const DeviceBitfield &deviceBitfield, SelectorCopyEngine &selectorCopyEngine, bool internalUsage) {
|
||||
auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo();
|
||||
auto &productHelper = rootDeviceEnvironment.getHelper<ProductHelper>();
|
||||
auto &gfxCoreHelper = rootDeviceEnvironment.getHelper<GfxCoreHelper>();
|
||||
if (debugManager.flags.ForceBcsEngineIndex.get() != -1) {
|
||||
auto index = debugManager.flags.ForceBcsEngineIndex.get();
|
||||
UNRECOVERABLE_IF(index > 8);
|
||||
@@ -110,11 +111,7 @@ aub_stream::EngineType getBcsEngineType(const RootDeviceEnvironment &rootDeviceE
|
||||
}
|
||||
|
||||
if (internalUsage) {
|
||||
if (debugManager.flags.ForceBCSForInternalCopyEngine.get() != -1) {
|
||||
return debugManager.flags.ForceBCSForInternalCopyEngine.get() == 0 ? aub_stream::EngineType::ENGINE_BCS
|
||||
: static_cast<aub_stream::EngineType>(aub_stream::EngineType::ENGINE_BCS1 + debugManager.flags.ForceBCSForInternalCopyEngine.get() - 1);
|
||||
}
|
||||
return aub_stream::ENGINE_BCS3;
|
||||
return EngineHelpers::mapBcsIndexToEngineType(gfxCoreHelper.getInternalCopyEngineIndex(hwInfo), true);
|
||||
}
|
||||
|
||||
auto enableSelector = productHelper.isCopyEngineSelectorEnabled(hwInfo);
|
||||
|
||||
@@ -93,6 +93,7 @@ class GfxCoreHelper {
|
||||
bool forceNonAuxMode,
|
||||
bool useL1Cache) const = 0;
|
||||
virtual const EngineInstancesContainer getGpgpuEngineInstances(const RootDeviceEnvironment &rootDeviceEnvironment) const = 0;
|
||||
virtual uint32_t getInternalCopyEngineIndex(const HardwareInfo &hwInfo) const = 0;
|
||||
virtual EngineGroupType getEngineGroupType(aub_stream::EngineType engineType, EngineUsage engineUsage, const HardwareInfo &hwInfo) const = 0;
|
||||
virtual const StackVec<size_t, 3> getDeviceSubGroupSizes() const = 0;
|
||||
virtual bool getEnableLocalMemory(const HardwareInfo &hwInfo) const = 0;
|
||||
@@ -289,6 +290,8 @@ class GfxCoreHelperHw : public GfxCoreHelper {
|
||||
|
||||
const EngineInstancesContainer getGpgpuEngineInstances(const RootDeviceEnvironment &rootDeviceEnvironment) const override;
|
||||
|
||||
uint32_t getInternalCopyEngineIndex(const HardwareInfo &hwInfo) const override;
|
||||
|
||||
EngineGroupType getEngineGroupType(aub_stream::EngineType engineType, EngineUsage engineUsage, const HardwareInfo &hwInfo) const override;
|
||||
|
||||
const StackVec<size_t, 3> getDeviceSubGroupSizes() const override;
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
#include "shared/source/gmm_helper/gmm_helper.h"
|
||||
#include "shared/source/helpers/aligned_memory.h"
|
||||
#include "shared/source/helpers/basic_math.h"
|
||||
#include "shared/source/helpers/bit_helpers.h"
|
||||
#include "shared/source/helpers/constants.h"
|
||||
#include "shared/source/helpers/gfx_core_helper.h"
|
||||
#include "shared/source/helpers/hw_info.h"
|
||||
@@ -814,4 +815,15 @@ bool GfxCoreHelperHw<GfxFamily>::usmCompressionSupported(const NEO::HardwareInfo
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
uint32_t GfxCoreHelperHw<Family>::getInternalCopyEngineIndex(const HardwareInfo &hwInfo) const {
|
||||
if (debugManager.flags.ForceBCSForInternalCopyEngine.get() != -1) {
|
||||
return debugManager.flags.ForceBCSForInternalCopyEngine.get();
|
||||
}
|
||||
|
||||
constexpr uint32_t defaultInternalCopyEngineIndex = 3u;
|
||||
auto highestAvailableIndex = getMostSignificantSetBitIndex(hwInfo.featureTable.ftrBcsInfo.to_ullong());
|
||||
return std::min(defaultInternalCopyEngineIndex, highestAvailableIndex);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -68,16 +68,13 @@ const EngineInstancesContainer GfxCoreHelperHw<Family>::getGpgpuEngineInstances(
|
||||
engines.push_back({aub_stream::EngineType::ENGINE_BCS, EngineUsage::internal}); // Internal usage
|
||||
}
|
||||
|
||||
uint32_t internalIndex = getInternalCopyEngineIndex(hwInfo);
|
||||
for (uint32_t i = 1; i < hwInfo.featureTable.ftrBcsInfo.size(); i++) {
|
||||
if (hwInfo.featureTable.ftrBcsInfo.test(i)) {
|
||||
auto engineType = static_cast<aub_stream::EngineType>((i - 1) + aub_stream::ENGINE_BCS1); // Link copy engine
|
||||
engines.push_back({engineType, EngineUsage::regular});
|
||||
uint32_t internalIndex = 3;
|
||||
if (debugManager.flags.ForceBCSForInternalCopyEngine.get() != -1) {
|
||||
internalIndex = debugManager.flags.ForceBCSForInternalCopyEngine.get();
|
||||
}
|
||||
if (i == internalIndex) {
|
||||
engines.push_back({engineType, EngineUsage::internal}); // BCS3 for internal usage
|
||||
engines.push_back({engineType, EngineUsage::internal});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -72,16 +72,13 @@ const EngineInstancesContainer GfxCoreHelperHw<Family>::getGpgpuEngineInstances(
|
||||
engines.push_back({aub_stream::EngineType::ENGINE_BCS, EngineUsage::internal}); // Internal usage
|
||||
}
|
||||
|
||||
uint32_t internalIndex = getInternalCopyEngineIndex(hwInfo);
|
||||
for (uint32_t i = 1; i < hwInfo.featureTable.ftrBcsInfo.size(); i++) {
|
||||
if (hwInfo.featureTable.ftrBcsInfo.test(i)) {
|
||||
auto engineType = static_cast<aub_stream::EngineType>((i - 1) + aub_stream::ENGINE_BCS1); // Link copy engine
|
||||
engines.push_back({engineType, EngineUsage::regular});
|
||||
uint32_t internalIndex = 3;
|
||||
if (debugManager.flags.ForceBCSForInternalCopyEngine.get() != -1) {
|
||||
internalIndex = debugManager.flags.ForceBCSForInternalCopyEngine.get();
|
||||
}
|
||||
if (i == internalIndex) {
|
||||
engines.push_back({engineType, EngineUsage::internal}); // BCS3 for internal usage
|
||||
engines.push_back({engineType, EngineUsage::internal});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2021 Intel Corporation
|
||||
* Copyright (C) 2019-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/helpers/bit_helpers.h"
|
||||
#include "shared/source/helpers/constants.h"
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
@@ -197,3 +198,23 @@ TEST(SetBitsTests, givenDifferentValuesWhenTestingSetBitsThenCorrectValueIsRetur
|
||||
EXPECT_EQ(0b0u, setBits(0b1010, false, 0b1010));
|
||||
EXPECT_EQ(0b1010u, setBits(0b1010, true, 0b1010));
|
||||
}
|
||||
|
||||
TEST(GetMsbIndexTests, givenDifferentValuesWhenTestingGetMostSignificantSetBitIndexThenCorrectValueIsReturned) {
|
||||
EXPECT_EQ(0u, getMostSignificantSetBitIndex(0b0));
|
||||
EXPECT_EQ(0u, getMostSignificantSetBitIndex(0b1));
|
||||
EXPECT_EQ(1u, getMostSignificantSetBitIndex(0b10));
|
||||
EXPECT_EQ(2u, getMostSignificantSetBitIndex(0b100));
|
||||
EXPECT_EQ(3u, getMostSignificantSetBitIndex(0b1000));
|
||||
|
||||
EXPECT_EQ(3u, getMostSignificantSetBitIndex(0b1001));
|
||||
EXPECT_EQ(3u, getMostSignificantSetBitIndex(0b1010));
|
||||
EXPECT_EQ(3u, getMostSignificantSetBitIndex(0b1100));
|
||||
EXPECT_EQ(3u, getMostSignificantSetBitIndex(0b1101));
|
||||
|
||||
EXPECT_EQ(6u, getMostSignificantSetBitIndex(maxNBitValue(7)));
|
||||
EXPECT_EQ(7u, getMostSignificantSetBitIndex(maxNBitValue(8)));
|
||||
EXPECT_EQ(8u, getMostSignificantSetBitIndex(maxNBitValue(9)));
|
||||
EXPECT_EQ(16u, getMostSignificantSetBitIndex(maxNBitValue(17)));
|
||||
EXPECT_EQ(32u, getMostSignificantSetBitIndex(maxNBitValue(33)));
|
||||
EXPECT_EQ(63u, getMostSignificantSetBitIndex(maxNBitValue(64)));
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021-2023 Intel Corporation
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -163,7 +163,7 @@ TEST(EngineNodeHelperTest, givenLinkCopyEnginesAndInternalUsageEnabledWhenGettin
|
||||
auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0];
|
||||
auto &hwInfo = *rootDeviceEnvironment.getMutableHardwareInfo();
|
||||
DeviceBitfield deviceBitfield = 0b11;
|
||||
hwInfo.featureTable.ftrBcsInfo = 0b111;
|
||||
hwInfo.featureTable.ftrBcsInfo = 0b1111;
|
||||
auto isInternalUsage = true;
|
||||
EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS3, EngineHelpers::getBcsEngineType(rootDeviceEnvironment, deviceBitfield, selectorCopyEngine, isInternalUsage));
|
||||
EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS3, EngineHelpers::getBcsEngineType(rootDeviceEnvironment, deviceBitfield, selectorCopyEngine, isInternalUsage));
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021-2023 Intel Corporation
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -64,6 +64,68 @@ HWTEST2_F(EngineNodeHelperTestsXeHPAndLater, givenForceBCSForInternalCopyEngineW
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(EngineNodeHelperTestsXeHPAndLater, givenLessThanFourCopyEnginesWhenGetBcsEngineTypeForInternalEngineThenReturnLastAvailableEngine, IsAtLeastXeHpcCore) {
|
||||
auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment();
|
||||
auto &hwInfo = *rootDeviceEnvironment.getMutableHardwareInfo();
|
||||
hwInfo.capabilityTable.blitterOperationsSupported = true;
|
||||
auto &selectorCopyEngine = pDevice->getNearestGenericSubDevice(0)->getSelectorCopyEngine();
|
||||
DeviceBitfield deviceBitfield = 0xff;
|
||||
|
||||
{
|
||||
hwInfo.featureTable.ftrBcsInfo = 0b1;
|
||||
auto engineType = EngineHelpers::getBcsEngineType(rootDeviceEnvironment, deviceBitfield, selectorCopyEngine, true);
|
||||
EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS, engineType);
|
||||
}
|
||||
{
|
||||
hwInfo.featureTable.ftrBcsInfo = 0b11;
|
||||
auto engineType = EngineHelpers::getBcsEngineType(rootDeviceEnvironment, deviceBitfield, selectorCopyEngine, true);
|
||||
EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS1, engineType);
|
||||
}
|
||||
{
|
||||
hwInfo.featureTable.ftrBcsInfo = 0b111;
|
||||
auto engineType = EngineHelpers::getBcsEngineType(rootDeviceEnvironment, deviceBitfield, selectorCopyEngine, true);
|
||||
EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS2, engineType);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(EngineNodeHelperTestsXeHPAndLater, givenLessThanFourCopyEnginesWhenGetGpgpuEngineInstancesThenUseLastCopyEngineAsInternal, IsAtLeastXeHpcCore) {
|
||||
auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment();
|
||||
auto &hwInfo = *rootDeviceEnvironment.getMutableHardwareInfo();
|
||||
hwInfo.capabilityTable.blitterOperationsSupported = true;
|
||||
auto &gfxCoreHelper = pDevice->getGfxCoreHelper();
|
||||
auto &productHelper = rootDeviceEnvironment.getProductHelper();
|
||||
|
||||
auto hasInternalEngine = [](const EngineInstancesContainer &engines, aub_stream::EngineType expectedEngineType) {
|
||||
for (auto &[engineType, engineUsage] : engines) {
|
||||
if (engineType == expectedEngineType && engineUsage == EngineUsage::internal) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
{
|
||||
if (aub_stream::EngineType::ENGINE_BCS == productHelper.getDefaultCopyEngine()) {
|
||||
hwInfo.featureTable.ftrBcsInfo = 0b1;
|
||||
auto &engines = gfxCoreHelper.getGpgpuEngineInstances(rootDeviceEnvironment);
|
||||
EXPECT_TRUE(hasInternalEngine(engines, aub_stream::EngineType::ENGINE_BCS));
|
||||
EXPECT_FALSE(hasInternalEngine(engines, aub_stream::EngineType::ENGINE_BCS3));
|
||||
}
|
||||
}
|
||||
{
|
||||
hwInfo.featureTable.ftrBcsInfo = 0b11;
|
||||
auto &engines = gfxCoreHelper.getGpgpuEngineInstances(rootDeviceEnvironment);
|
||||
EXPECT_TRUE(hasInternalEngine(engines, aub_stream::EngineType::ENGINE_BCS1));
|
||||
EXPECT_FALSE(hasInternalEngine(engines, aub_stream::EngineType::ENGINE_BCS3));
|
||||
}
|
||||
{
|
||||
hwInfo.featureTable.ftrBcsInfo = 0b111;
|
||||
auto &engines = gfxCoreHelper.getGpgpuEngineInstances(rootDeviceEnvironment);
|
||||
EXPECT_TRUE(hasInternalEngine(engines, aub_stream::EngineType::ENGINE_BCS2));
|
||||
EXPECT_FALSE(hasInternalEngine(engines, aub_stream::EngineType::ENGINE_BCS3));
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(EngineNodeHelperTestsXeHPAndLater, givenEnableCmdQRoundRobindBcsEngineAssignWhenSelectLinkCopyEngineThenRoundRobinOverAllAvailableLinkedCopyEngines, IsAtLeastXeHpCore) {
|
||||
DebugManagerStateRestore restore;
|
||||
debugManager.flags.EnableCmdQRoundRobindBcsEngineAssign.set(1u);
|
||||
|
||||
Reference in New Issue
Block a user