feature: add support for HP copy engine context

- add support for contect group with HP copy engine
- choose HP copy engine when available

Related-To: NEO-11983

Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
Mateusz Hoppe
2024-07-19 10:20:37 +00:00
committed by Compute-Runtime-Automation
parent 4fc37f9afd
commit b6299b8a21
13 changed files with 510 additions and 46 deletions

View File

@@ -364,6 +364,33 @@ bool Device::createEngines() {
}
if (gfxCoreHelper.areSecondaryContextsSupported()) {
auto createSecondaryContext = [this](const EngineControl &primaryEngine, SecondaryContexts &secondaryEnginesForType, uint32_t contextCount, uint32_t regularPriorityCount, uint32_t highPriorityContextCount) {
secondaryEnginesForType.regularEnginesTotal = contextCount - highPriorityContextCount;
secondaryEnginesForType.highPriorityEnginesTotal = highPriorityContextCount;
secondaryEnginesForType.regularCounter = 0;
secondaryEnginesForType.highPriorityCounter = 0;
secondaryEnginesForType.assignedContextsCounter = 1;
NEO::EngineTypeUsage engineTypeUsage;
engineTypeUsage.first = primaryEngine.getEngineType();
engineTypeUsage.second = primaryEngine.getEngineUsage();
UNRECOVERABLE_IF(engineTypeUsage.second != EngineUsage::regular && engineTypeUsage.second != EngineUsage::highPriority);
secondaryEnginesForType.engines.push_back(primaryEngine);
for (uint32_t i = 1; i < contextCount; i++) {
if (i >= contextCount - highPriorityContextCount) {
engineTypeUsage.second = EngineUsage::highPriority;
}
this->createSecondaryEngine(primaryEngine.commandStreamReceiver, engineTypeUsage);
}
primaryEngine.osContext->setContextGroup(true);
};
for (auto engineGroupType : {EngineGroupType::compute, EngineGroupType::copy, EngineGroupType::linkedCopy}) {
auto engineGroup = tryGetRegularEngineGroup(engineGroupType);
@@ -372,7 +399,7 @@ bool Device::createEngines() {
}
auto contextCount = gfxCoreHelper.getContextGroupContextsCount();
auto highPriorityContextCount = std::min(contextCount / 2, 4u);
auto highPriorityContextCount = gfxCoreHelper.getContextGroupHpContextsCount(engineGroupType);
if (debugManager.flags.OverrideNumHighPriorityContexts.get() != -1) {
highPriorityContextCount = static_cast<uint32_t>(debugManager.flags.OverrideNumHighPriorityContexts.get());
@@ -390,27 +417,23 @@ bool Device::createEngines() {
auto primaryEngine = engineGroup->engines[engineIndex];
secondaryEnginesForType.regularEnginesTotal = contextCount - highPriorityContextCount;
secondaryEnginesForType.highPriorityEnginesTotal = highPriorityContextCount;
secondaryEnginesForType.regularCounter = 0;
secondaryEnginesForType.highPriorityCounter = 0;
secondaryEnginesForType.assignedContextsCounter = 1;
createSecondaryContext(primaryEngine, secondaryEnginesForType, contextCount, contextCount - highPriorityContextCount, highPriorityContextCount);
}
}
NEO::EngineTypeUsage engineTypeUsage;
engineTypeUsage.first = primaryEngine.getEngineType();
auto hpCopyEngine = getHpCopyEngine();
if (hpCopyEngine) {
auto engineType = hpCopyEngine->getEngineType();
if ((static_cast<uint32_t>(debugManager.flags.SecondaryContextEngineTypeMask.get()) & (1 << static_cast<uint32_t>(engineType))) != 0) {
secondaryEnginesForType.engines.push_back(primaryEngine);
UNRECOVERABLE_IF(secondaryEngines.find(engineType) != secondaryEngines.end());
auto &secondaryEnginesForType = secondaryEngines[engineType];
for (uint32_t i = 1; i < contextCount; i++) {
engineTypeUsage.second = EngineUsage::regular;
auto primaryEngine = *hpCopyEngine;
if (i >= contextCount - highPriorityContextCount) {
engineTypeUsage.second = EngineUsage::highPriority;
}
createSecondaryEngine(primaryEngine.commandStreamReceiver, engineTypeUsage);
}
auto contextCount = gfxCoreHelper.getContextGroupContextsCount();
primaryEngine.osContext->setContextGroup(true);
createSecondaryContext(primaryEngine, secondaryEnginesForType, contextCount, 0, contextCount);
}
}
}
@@ -468,7 +491,7 @@ bool Device::createEngine(EngineTypeUsage engineTypeUsage) {
primaryEngineTypeAllowed &= (static_cast<uint32_t>(debugManager.flags.SecondaryContextEngineTypeMask.get()) & (1 << static_cast<uint32_t>(engineType))) != 0;
}
const bool isPrimaryEngine = primaryEngineTypeAllowed && (engineUsage == EngineUsage::regular);
const bool isPrimaryEngine = primaryEngineTypeAllowed && (engineUsage == EngineUsage::regular || engineUsage == EngineUsage::highPriority);
const bool useContextGroup = isPrimaryEngine && gfxCoreHelper.areSecondaryContextsSupported();
UNRECOVERABLE_IF(EngineHelpers::isBcs(engineType) && !hwInfo.capabilityTable.blitterOperationsSupported);
@@ -881,6 +904,19 @@ EngineControl *Device::getInternalCopyEngine() {
return nullptr;
}
EngineControl *Device::getHpCopyEngine() {
if (!getHardwareInfo().capabilityTable.blitterOperationsSupported) {
return nullptr;
}
for (auto &engine : allEngines) {
if (NEO::EngineHelpers::isBcs(engine.osContext->getEngineType()) &&
engine.osContext->isHighPriority()) {
return &engine;
}
}
return nullptr;
}
RTDispatchGlobalsInfo *Device::getRTDispatchGlobals(uint32_t maxBvhLevels) {
if (rtDispatchGlobalsInfos.size() == 0) {
return nullptr;
@@ -1204,7 +1240,9 @@ EngineControl *SecondaryContexts::getEngine(EngineUsage usage) {
std::lock_guard<std::mutex> guard(mutex);
if (usage == EngineUsage::highPriority) {
if (highPriorityEnginesTotal == 0) {
return nullptr;
}
// Use index from reserved HP pool
if (hpIndices.size() < highPriorityEnginesTotal) {
secondaryEngineIndex = (highPriorityCounter.fetch_add(1)) % (highPriorityEnginesTotal);
@@ -1228,6 +1266,9 @@ EngineControl *SecondaryContexts::getEngine(EngineUsage usage) {
}
} else if (usage == EngineUsage::regular) {
if (regularEnginesTotal == 0) {
return nullptr;
}
if (npIndices.size() == 0) {
regularCounter.fetch_add(1);
npIndices.push_back(secondaryEngineIndex);

View File

@@ -118,6 +118,7 @@ class Device : public ReferenceTrackedObject<Device> {
EngineControl &getNextEngineForCommandQueue();
EngineControl &getInternalEngine();
EngineControl *getInternalCopyEngine();
EngineControl *getHpCopyEngine();
SelectorCopyEngine &getSelectorCopyEngine();
MemoryManager *getMemoryManager() const;
GmmHelper *getGmmHelper() const;

View File

@@ -256,12 +256,21 @@ aub_stream::EngineType selectLinkCopyEngine(const RootDeviceEnvironment &rootDev
: aub_stream::ENGINE_BCS4;
const aub_stream::EngineType engine2 = aub_stream::ENGINE_BCS2;
if (isBcsEnabled(hwInfo, engine1) && isBcsEnabled(hwInfo, engine2)) {
auto hpEngine = gfxCoreHelper.getDefaultHpCopyEngine(hwInfo);
if (isBcsEnabled(hwInfo, engine1) && engine1 != hpEngine &&
isBcsEnabled(hwInfo, engine2) && engine2 != hpEngine) {
// both BCS enabled, round robin
return selectorCopyEngine.fetch_xor(1u) ? engine1 : engine2;
} else {
// one BCS enabled
return isBcsEnabled(hwInfo, engine1) ? engine1 : engine2;
if (isBcsEnabled(hwInfo, engine1) && (engine1 != hpEngine)) {
return engine1;
} else if (isBcsEnabled(hwInfo, engine2) && (engine2 != hpEngine)) {
return engine2;
} else {
return productHelper.getDefaultCopyEngine();
}
}
}
aub_stream::EngineType mapCcsIndexToEngineType(uint32_t index) {

View File

@@ -182,6 +182,8 @@ class GfxCoreHelper {
virtual bool areSecondaryContextsSupported() const = 0;
virtual uint32_t getContextGroupContextsCount() const = 0;
virtual uint32_t getContextGroupHpContextsCount(EngineGroupType type) const = 0;
virtual aub_stream::EngineType getDefaultHpCopyEngine(const HardwareInfo &hwInfo) const = 0;
virtual bool is48ResourceNeededForCmdBuffer() const = 0;
virtual uint32_t getKernelPrivateMemSize(const KernelDescriptor &kernelDescriptor) const = 0;
@@ -418,6 +420,8 @@ class GfxCoreHelperHw : public GfxCoreHelper {
bool areSecondaryContextsSupported() const override;
uint32_t getContextGroupContextsCount() const override;
uint32_t getContextGroupHpContextsCount(EngineGroupType type) const override;
aub_stream::EngineType getDefaultHpCopyEngine(const HardwareInfo &hwInfo) const override;
bool is48ResourceNeededForCmdBuffer() const override;

View File

@@ -748,6 +748,17 @@ uint32_t GfxCoreHelperHw<GfxFamily>::getContextGroupContextsCount() const {
return 0;
}
template <typename GfxFamily>
uint32_t GfxCoreHelperHw<GfxFamily>::getContextGroupHpContextsCount(EngineGroupType type) const {
return std::min(getContextGroupContextsCount() / 2, 4u);
}
template <typename GfxFamily>
aub_stream::EngineType GfxCoreHelperHw<GfxFamily>::getDefaultHpCopyEngine(const HardwareInfo &hwInfo) const {
return aub_stream::EngineType::NUM_ENGINES;
}
template <typename GfxFamily>
bool GfxCoreHelperHw<GfxFamily>::is48ResourceNeededForCmdBuffer() const {
return true;

View File

@@ -1376,30 +1376,52 @@ HWTEST_F(DeviceTests, givenDebugFlagSetWhenCreatingSecondaryEnginesThenCreateCor
hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_CCS;
hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 1;
auto device = std::unique_ptr<MockDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo));
auto &engineGroups = device->getRegularEngineGroups();
{
auto device = std::unique_ptr<MockDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo));
auto &engineGroups = device->getRegularEngineGroups();
auto engineGroupType = EngineGroupType::compute;
size_t computeEnginesCount = 0;
for (const auto &engine : engineGroups) {
if (engine.engineGroupType == engineGroupType) {
computeEnginesCount = engine.engines.size();
auto engineGroupType = EngineGroupType::compute;
size_t computeEnginesCount = 0;
for (const auto &engine : engineGroups) {
if (engine.engineGroupType == engineGroupType) {
computeEnginesCount = engine.engines.size();
}
}
if (computeEnginesCount == 0) {
GTEST_SKIP();
}
ASSERT_EQ(computeEnginesCount, device->secondaryEngines.size());
ASSERT_EQ(contextGroupSize, device->secondaryEngines[aub_stream::EngineType::ENGINE_CCS].engines.size());
constexpr uint32_t regularContextCount = contextGroupSize - numHighPriorityContexts;
auto &secondaryEngines = device->secondaryEngines[EngineHelpers::mapCcsIndexToEngineType(0)];
EXPECT_EQ(regularContextCount, secondaryEngines.regularEnginesTotal);
EXPECT_EQ(contextGroupSize - regularContextCount, secondaryEngines.highPriorityEnginesTotal);
}
{
debugManager.flags.OverrideNumHighPriorityContexts.set(0);
auto device = std::unique_ptr<MockDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo));
if (computeEnginesCount == 0) {
GTEST_SKIP();
ASSERT_EQ(contextGroupSize, device->secondaryEngines[aub_stream::EngineType::ENGINE_CCS].engines.size());
auto &secondaryEngines = device->secondaryEngines[EngineHelpers::mapCcsIndexToEngineType(0)];
EXPECT_EQ(nullptr, secondaryEngines.getEngine(EngineUsage::highPriority));
}
{
debugManager.flags.OverrideNumHighPriorityContexts.set(contextGroupSize);
auto device = std::unique_ptr<MockDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo));
ASSERT_EQ(computeEnginesCount, device->secondaryEngines.size());
ASSERT_EQ(contextGroupSize, device->secondaryEngines[aub_stream::EngineType::ENGINE_CCS].engines.size());
ASSERT_EQ(contextGroupSize, device->secondaryEngines[aub_stream::EngineType::ENGINE_CCS].engines.size());
constexpr uint32_t regularContextCount = contextGroupSize - numHighPriorityContexts;
auto &secondaryEngines = device->secondaryEngines[EngineHelpers::mapCcsIndexToEngineType(0)];
auto &secondaryEngines = device->secondaryEngines[EngineHelpers::mapCcsIndexToEngineType(0)];
EXPECT_EQ(regularContextCount, secondaryEngines.regularEnginesTotal);
EXPECT_EQ(contextGroupSize - regularContextCount, secondaryEngines.highPriorityEnginesTotal);
EXPECT_EQ(nullptr, secondaryEngines.getEngine(EngineUsage::regular));
}
}
HWTEST_F(DeviceTests, givenContextGroupEnabledWhenGettingSecondaryEngineThenResourcesAndContextAreInitialized) {
@@ -1575,18 +1597,21 @@ HWTEST_F(DeviceTests, givenCopyEnginesWhenCreatingSecondaryContextsThenUseCopyTy
auto &enabledEngines = gfxCoreHelper.getGpgpuEngineInstances(device->getRootDeviceEnvironment());
for (auto engineType : {aub_stream::EngineType::ENGINE_BCS, aub_stream::EngineType::ENGINE_BCS1, aub_stream::EngineType::ENGINE_BCS2, aub_stream::EngineType::ENGINE_BCS3}) {
auto supported = std::find_if(enabledEngines.begin(), enabledEngines.end(),
[&engineType](const auto &engine) { return (engine.first == engineType) && (engine.second == EngineUsage::regular); }) != enabledEngines.end();
auto supportedRegular = std::find_if(enabledEngines.begin(), enabledEngines.end(),
[&engineType](const auto &engine) { return (engine.first == engineType) && (engine.second == EngineUsage::regular); }) != enabledEngines.end();
auto supportedHp = std::find_if(enabledEngines.begin(), enabledEngines.end(),
[&engineType](const auto &engine) { return (engine.first == engineType) && (engine.second == EngineUsage::highPriority); }) != enabledEngines.end();
if (supported) {
if (supportedRegular || supportedHp) {
auto usage = supportedRegular ? EngineUsage::regular : EngineUsage::highPriority;
EXPECT_NE(device->secondaryEngines.end(), device->secondaryEngines.find(engineType));
EXPECT_EQ(5u, device->secondaryEngines[engineType].engines.size());
auto engine = device->getSecondaryEngineCsr({engineType, EngineUsage::regular}, false);
auto engine = device->getSecondaryEngineCsr({engineType, usage}, false);
ASSERT_NE(nullptr, engine);
auto csr = engine->commandStreamReceiver;
auto engine2 = device->getSecondaryEngineCsr({engineType, EngineUsage::regular}, false);
auto engine2 = device->getSecondaryEngineCsr({engineType, usage}, false);
ASSERT_NE(nullptr, engine2);
auto csr2 = engine2->commandStreamReceiver;
@@ -1635,6 +1660,36 @@ HWTEST_F(DeviceTests, givenDebugFlagSetWhenCreatingSecondaryEnginesThenSkipSelec
executionEnvironment->decRefInternal();
}
HWTEST_F(DeviceTests, givenHpCopyEngineAndDebugFlagSetWhenCreatingSecondaryEnginesThenSkipSelectedEngineTypes) {
HardwareInfo hwInfo = *defaultHwInfo;
DebugManagerStateRestore dbgRestorer;
debugManager.flags.ContextGroupSize.set(5);
hwInfo.featureTable.flags.ftrCCSNode = true;
hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_CCS;
hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 1;
hwInfo.capabilityTable.blitterOperationsSupported = true;
hwInfo.featureTable.ftrBcsInfo = 0b111;
auto executionEnvironment = std::unique_ptr<ExecutionEnvironment>(NEO::MockDevice::prepareExecutionEnvironment(&hwInfo, 0u));
const auto &gfxCoreHelper = executionEnvironment->rootDeviceEnvironments[0]->getHelper<GfxCoreHelper>();
auto hpEngine = gfxCoreHelper.getDefaultHpCopyEngine(hwInfo);
if (hpEngine == aub_stream::EngineType::NUM_ENGINES) {
GTEST_SKIP();
}
uint32_t computeEngineBit = 1 << static_cast<uint32_t>(hpEngine);
debugManager.flags.SecondaryContextEngineTypeMask.set(~computeEngineBit);
auto device = std::unique_ptr<MockDevice>(MockDevice::createWithExecutionEnvironment<MockDevice>(&hwInfo, executionEnvironment.release(), 0));
EXPECT_NE(nullptr, device->getHpCopyEngine());
EXPECT_EQ(device->secondaryEngines.end(), device->secondaryEngines.find(hpEngine));
}
TEST_F(DeviceTests, GivenDebuggingEnabledWhenDeviceIsInitializedThenL0DebuggerIsCreated) {
auto executionEnvironment = MockDevice::prepareExecutionEnvironment(defaultHwInfo.get(), 0u);
executionEnvironment->setDebuggingMode(NEO::DebuggingMode::online);

View File

@@ -321,3 +321,88 @@ HWTEST2_F(EngineNodeHelperTestsXeHPAndLater, givenEnableCmdQRoundRobindBcsEngine
}
}
}
HWTEST2_F(EngineNodeHelperTestsXeHPAndLater, givenHpCopyEngineWhenSelectLinkCopyEngineThenHpEngineIsNotSelected, IsAtLeastXeHpCore) {
DebugManagerStateRestore restore;
debugManager.flags.ContextGroupSize.set(8);
DeviceBitfield deviceBitfield = 0b1;
auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment();
auto &hwInfo = *rootDeviceEnvironment.getMutableHardwareInfo();
hwInfo.featureTable.ftrBcsInfo = 0b10010;
const auto &gfxCoreHelper = pDevice->getGfxCoreHelper();
auto hpEngine = gfxCoreHelper.getDefaultHpCopyEngine(hwInfo);
if (hpEngine == aub_stream::EngineType::NUM_ENGINES) {
GTEST_SKIP();
}
auto &selectorCopyEngine = pDevice->getNearestGenericSubDevice(0)->getSelectorCopyEngine();
auto engineType = EngineHelpers::selectLinkCopyEngine(pDevice->getRootDeviceEnvironment(), deviceBitfield, selectorCopyEngine.selector);
EXPECT_NE(hpEngine, engineType);
auto engineType2 = EngineHelpers::selectLinkCopyEngine(pDevice->getRootDeviceEnvironment(), deviceBitfield, selectorCopyEngine.selector);
EXPECT_NE(hpEngine, engineType2);
}
HWTEST2_F(EngineNodeHelperTestsXeHPAndLater, givenHpCopyEngineAndBcs0And1And2RegularEnginesWhenDefaultCopyIsNotBcs1ThenHpEngineIsNotSelectedAndDifferentEnginesAreReturned, IsAtLeastXeHpCore) {
DebugManagerStateRestore restore;
debugManager.flags.ContextGroupSize.set(8);
DeviceBitfield deviceBitfield = 0b1;
auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment();
auto &hwInfo = *rootDeviceEnvironment.getMutableHardwareInfo();
hwInfo.featureTable.ftrBcsInfo = 0b10111;
const auto &gfxCoreHelper = pDevice->getGfxCoreHelper();
auto hpEngine = gfxCoreHelper.getDefaultHpCopyEngine(hwInfo);
if (hpEngine == aub_stream::EngineType::NUM_ENGINES) {
GTEST_SKIP();
}
auto &selectorCopyEngine = pDevice->getNearestGenericSubDevice(0)->getSelectorCopyEngine();
auto engineType = EngineHelpers::selectLinkCopyEngine(pDevice->getRootDeviceEnvironment(), deviceBitfield, selectorCopyEngine.selector);
EXPECT_NE(hpEngine, engineType);
auto engineType2 = EngineHelpers::selectLinkCopyEngine(pDevice->getRootDeviceEnvironment(), deviceBitfield, selectorCopyEngine.selector);
EXPECT_NE(hpEngine, engineType2);
auto &productHelper = rootDeviceEnvironment.getProductHelper();
if (aub_stream::ENGINE_BCS1 != productHelper.getDefaultCopyEngine()) {
EXPECT_NE(engineType, engineType2);
EXPECT_EQ(aub_stream::ENGINE_BCS2, engineType);
EXPECT_EQ(aub_stream::ENGINE_BCS1, engineType2);
} else {
EXPECT_EQ(engineType, engineType2);
}
}
HWTEST2_F(EngineNodeHelperTestsXeHPAndLater, givenBcs2HpCopyEngineAndBcs0And1RegularEnginesWhenSelectingLinkCopyEngineThenBcs1IsSelected, IsAtLeastXeHpCore) {
DebugManagerStateRestore restore;
debugManager.flags.ContextGroupSize.set(8);
DeviceBitfield deviceBitfield = 0b1;
auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment();
auto &hwInfo = *rootDeviceEnvironment.getMutableHardwareInfo();
hwInfo.featureTable.ftrBcsInfo = 0b00111;
const auto &gfxCoreHelper = pDevice->getGfxCoreHelper();
auto hpEngine = gfxCoreHelper.getDefaultHpCopyEngine(hwInfo);
if (hpEngine == aub_stream::EngineType::NUM_ENGINES) {
GTEST_SKIP();
}
auto &selectorCopyEngine = pDevice->getNearestGenericSubDevice(0)->getSelectorCopyEngine();
auto engineType = EngineHelpers::selectLinkCopyEngine(pDevice->getRootDeviceEnvironment(), deviceBitfield, selectorCopyEngine.selector);
EXPECT_NE(hpEngine, engineType);
auto engineType2 = EngineHelpers::selectLinkCopyEngine(pDevice->getRootDeviceEnvironment(), deviceBitfield, selectorCopyEngine.selector);
EXPECT_NE(hpEngine, engineType2);
EXPECT_EQ(engineType, engineType2);
EXPECT_EQ(aub_stream::ENGINE_BCS1, engineType2);
}