feature: add support for HP copy engine context

- add support for contect group with HP copy engine
- choose HP copy engine when available

Related-To: NEO-11983

Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
Mateusz Hoppe
2024-07-18 14:47:32 +00:00
committed by Compute-Runtime-Automation
parent ce6e7f63af
commit 3fbcbcaef2
13 changed files with 456 additions and 46 deletions

View File

@@ -364,6 +364,33 @@ bool Device::createEngines() {
}
if (gfxCoreHelper.areSecondaryContextsSupported()) {
auto createSecondaryContext = [this](const EngineControl &primaryEngine, SecondaryContexts &secondaryEnginesForType, uint32_t contextCount, uint32_t regularPriorityCount, uint32_t highPriorityContextCount) {
secondaryEnginesForType.regularEnginesTotal = contextCount - highPriorityContextCount;
secondaryEnginesForType.highPriorityEnginesTotal = highPriorityContextCount;
secondaryEnginesForType.regularCounter = 0;
secondaryEnginesForType.highPriorityCounter = 0;
secondaryEnginesForType.assignedContextsCounter = 1;
NEO::EngineTypeUsage engineTypeUsage;
engineTypeUsage.first = primaryEngine.getEngineType();
engineTypeUsage.second = primaryEngine.getEngineUsage();
UNRECOVERABLE_IF(engineTypeUsage.second != EngineUsage::regular && engineTypeUsage.second != EngineUsage::highPriority);
secondaryEnginesForType.engines.push_back(primaryEngine);
for (uint32_t i = 1; i < contextCount; i++) {
if (i >= contextCount - highPriorityContextCount) {
engineTypeUsage.second = EngineUsage::highPriority;
}
this->createSecondaryEngine(primaryEngine.commandStreamReceiver, engineTypeUsage);
}
primaryEngine.osContext->setContextGroup(true);
};
for (auto engineGroupType : {EngineGroupType::compute, EngineGroupType::copy, EngineGroupType::linkedCopy}) {
auto engineGroup = tryGetRegularEngineGroup(engineGroupType);
@@ -372,7 +399,7 @@ bool Device::createEngines() {
}
auto contextCount = gfxCoreHelper.getContextGroupContextsCount();
auto highPriorityContextCount = std::min(contextCount / 2, 4u);
auto highPriorityContextCount = gfxCoreHelper.getContextGroupHpContextsCount(engineGroupType);
if (debugManager.flags.OverrideNumHighPriorityContexts.get() != -1) {
highPriorityContextCount = static_cast<uint32_t>(debugManager.flags.OverrideNumHighPriorityContexts.get());
@@ -390,27 +417,23 @@ bool Device::createEngines() {
auto primaryEngine = engineGroup->engines[engineIndex];
secondaryEnginesForType.regularEnginesTotal = contextCount - highPriorityContextCount;
secondaryEnginesForType.highPriorityEnginesTotal = highPriorityContextCount;
secondaryEnginesForType.regularCounter = 0;
secondaryEnginesForType.highPriorityCounter = 0;
secondaryEnginesForType.assignedContextsCounter = 1;
createSecondaryContext(primaryEngine, secondaryEnginesForType, contextCount, contextCount - highPriorityContextCount, highPriorityContextCount);
}
}
NEO::EngineTypeUsage engineTypeUsage;
engineTypeUsage.first = primaryEngine.getEngineType();
auto hpCopyEngine = getHpCopyEngine();
if (hpCopyEngine) {
auto engineType = hpCopyEngine->getEngineType();
if ((static_cast<uint32_t>(debugManager.flags.SecondaryContextEngineTypeMask.get()) & (1 << static_cast<uint32_t>(engineType))) != 0) {
secondaryEnginesForType.engines.push_back(primaryEngine);
UNRECOVERABLE_IF(secondaryEngines.find(engineType) != secondaryEngines.end());
auto &secondaryEnginesForType = secondaryEngines[engineType];
for (uint32_t i = 1; i < contextCount; i++) {
engineTypeUsage.second = EngineUsage::regular;
auto primaryEngine = *hpCopyEngine;
if (i >= contextCount - highPriorityContextCount) {
engineTypeUsage.second = EngineUsage::highPriority;
}
createSecondaryEngine(primaryEngine.commandStreamReceiver, engineTypeUsage);
}
auto contextCount = gfxCoreHelper.getContextGroupContextsCount();
primaryEngine.osContext->setContextGroup(true);
createSecondaryContext(primaryEngine, secondaryEnginesForType, contextCount, 0, contextCount);
}
}
}
@@ -468,7 +491,7 @@ bool Device::createEngine(EngineTypeUsage engineTypeUsage) {
primaryEngineTypeAllowed &= (static_cast<uint32_t>(debugManager.flags.SecondaryContextEngineTypeMask.get()) & (1 << static_cast<uint32_t>(engineType))) != 0;
}
const bool isPrimaryEngine = primaryEngineTypeAllowed && (engineUsage == EngineUsage::regular);
const bool isPrimaryEngine = primaryEngineTypeAllowed && (engineUsage == EngineUsage::regular || engineUsage == EngineUsage::highPriority);
const bool useContextGroup = isPrimaryEngine && gfxCoreHelper.areSecondaryContextsSupported();
UNRECOVERABLE_IF(EngineHelpers::isBcs(engineType) && !hwInfo.capabilityTable.blitterOperationsSupported);
@@ -881,6 +904,19 @@ EngineControl *Device::getInternalCopyEngine() {
return nullptr;
}
EngineControl *Device::getHpCopyEngine() {
if (!getHardwareInfo().capabilityTable.blitterOperationsSupported) {
return nullptr;
}
for (auto &engine : allEngines) {
if (NEO::EngineHelpers::isBcs(engine.osContext->getEngineType()) &&
engine.osContext->isHighPriority()) {
return &engine;
}
}
return nullptr;
}
RTDispatchGlobalsInfo *Device::getRTDispatchGlobals(uint32_t maxBvhLevels) {
if (rtDispatchGlobalsInfos.size() == 0) {
return nullptr;
@@ -1204,7 +1240,9 @@ EngineControl *SecondaryContexts::getEngine(EngineUsage usage) {
std::lock_guard<std::mutex> guard(mutex);
if (usage == EngineUsage::highPriority) {
if (highPriorityEnginesTotal == 0) {
return nullptr;
}
// Use index from reserved HP pool
if (hpIndices.size() < highPriorityEnginesTotal) {
secondaryEngineIndex = (highPriorityCounter.fetch_add(1)) % (highPriorityEnginesTotal);
@@ -1228,6 +1266,9 @@ EngineControl *SecondaryContexts::getEngine(EngineUsage usage) {
}
} else if (usage == EngineUsage::regular) {
if (regularEnginesTotal == 0) {
return nullptr;
}
if (npIndices.size() == 0) {
regularCounter.fetch_add(1);
npIndices.push_back(secondaryEngineIndex);

View File

@@ -118,6 +118,7 @@ class Device : public ReferenceTrackedObject<Device> {
EngineControl &getNextEngineForCommandQueue();
EngineControl &getInternalEngine();
EngineControl *getInternalCopyEngine();
EngineControl *getHpCopyEngine();
SelectorCopyEngine &getSelectorCopyEngine();
MemoryManager *getMemoryManager() const;
GmmHelper *getGmmHelper() const;

View File

@@ -256,12 +256,21 @@ aub_stream::EngineType selectLinkCopyEngine(const RootDeviceEnvironment &rootDev
: aub_stream::ENGINE_BCS4;
const aub_stream::EngineType engine2 = aub_stream::ENGINE_BCS2;
if (isBcsEnabled(hwInfo, engine1) && isBcsEnabled(hwInfo, engine2)) {
auto hpEngine = gfxCoreHelper.getDefaultHpCopyEngine(hwInfo);
if (isBcsEnabled(hwInfo, engine1) && engine1 != hpEngine &&
isBcsEnabled(hwInfo, engine2) && engine2 != hpEngine) {
// both BCS enabled, round robin
return selectorCopyEngine.fetch_xor(1u) ? engine1 : engine2;
} else {
// one BCS enabled
return isBcsEnabled(hwInfo, engine1) ? engine1 : engine2;
if (isBcsEnabled(hwInfo, engine1) && (engine1 != hpEngine)) {
return engine1;
} else if (isBcsEnabled(hwInfo, engine2) && (engine2 != hpEngine)) {
return engine2;
} else {
return productHelper.getDefaultCopyEngine();
}
}
}
aub_stream::EngineType mapCcsIndexToEngineType(uint32_t index) {

View File

@@ -182,6 +182,8 @@ class GfxCoreHelper {
virtual bool areSecondaryContextsSupported() const = 0;
virtual uint32_t getContextGroupContextsCount() const = 0;
virtual uint32_t getContextGroupHpContextsCount(EngineGroupType type) const = 0;
virtual aub_stream::EngineType getDefaultHpCopyEngine(const HardwareInfo &hwInfo) const = 0;
virtual bool is48ResourceNeededForCmdBuffer() const = 0;
virtual uint32_t getKernelPrivateMemSize(const KernelDescriptor &kernelDescriptor) const = 0;
@@ -418,6 +420,8 @@ class GfxCoreHelperHw : public GfxCoreHelper {
bool areSecondaryContextsSupported() const override;
uint32_t getContextGroupContextsCount() const override;
uint32_t getContextGroupHpContextsCount(EngineGroupType type) const override;
aub_stream::EngineType getDefaultHpCopyEngine(const HardwareInfo &hwInfo) const override;
bool is48ResourceNeededForCmdBuffer() const override;

View File

@@ -748,6 +748,17 @@ uint32_t GfxCoreHelperHw<GfxFamily>::getContextGroupContextsCount() const {
return 0;
}
template <typename GfxFamily>
uint32_t GfxCoreHelperHw<GfxFamily>::getContextGroupHpContextsCount(EngineGroupType type) const {
return std::min(getContextGroupContextsCount() / 2, 4u);
}
template <typename GfxFamily>
aub_stream::EngineType GfxCoreHelperHw<GfxFamily>::getDefaultHpCopyEngine(const HardwareInfo &hwInfo) const {
return aub_stream::EngineType::NUM_ENGINES;
}
template <typename GfxFamily>
bool GfxCoreHelperHw<GfxFamily>::is48ResourceNeededForCmdBuffer() const {
return true;