feature: allow dynamic count of HP contexts in context group

- HP contexts may use up to half contexts in group

Related-To: NEO-7824

Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
Mateusz Hoppe 2024-07-09 12:15:56 +00:00 committed by Compute-Runtime-Automation
parent 5d09dd8273
commit 14a1bbfe12
6 changed files with 187 additions and 24 deletions

View File

@ -2031,6 +2031,9 @@ TEST(CommandList, givenContextGroupEnabledWhenCreatingImmediateCommandListWithIn
mockOsContexts.push_back(newOsContext);
newOsContext->incRefInternal();
newOsContext->setIsPrimaryEngine(engine.osContext->getIsPrimaryEngine());
newOsContext->setContextGroup(engine.osContext->isPartOfContextGroup());
engine.osContext = newOsContext;
engine.commandStreamReceiver->setupContext(*newOsContext);
}
@ -2059,9 +2062,7 @@ TEST(CommandList, givenContextGroupEnabledWhenCreatingImmediateCommandListWithIn
auto commandList3 = static_cast<CommandListImp *>(L0::CommandList::fromHandle(commandListHandle3));
EXPECT_TRUE(static_cast<MockOsContext &>(commandList1->getCsr(false)->getOsContext()).allocateInterruptPassed);
EXPECT_FALSE(static_cast<MockOsContext &>(commandList1->getCsr(false)->getOsContext()).isPartOfContextGroup());
EXPECT_TRUE(static_cast<MockOsContext &>(commandList2->getCsr(false)->getOsContext()).allocateInterruptPassed);
EXPECT_FALSE(static_cast<MockOsContext &>(commandList2->getCsr(false)->getOsContext()).isPartOfContextGroup());
EXPECT_FALSE(static_cast<MockOsContext &>(commandList3->getCsr(false)->getOsContext()).allocateInterruptPassed);
commandList1->destroy();

View File

@ -1404,6 +1404,9 @@ TEST(CommandQueue, givenContextGroupEnabledWhenCreatingCommandQueuesWithInterrup
mockOsContexts.push_back(newOsContext);
newOsContext->incRefInternal();
newOsContext->setIsPrimaryEngine(engine.osContext->getIsPrimaryEngine());
newOsContext->setContextGroup(engine.osContext->isPartOfContextGroup());
engine.osContext = newOsContext;
engine.commandStreamReceiver->setupContext(*newOsContext);
}
@ -1432,9 +1435,7 @@ TEST(CommandQueue, givenContextGroupEnabledWhenCreatingCommandQueuesWithInterrup
auto commandQueue3 = static_cast<CommandQueueImp *>(L0::CommandQueue::fromHandle(commandQueueHandle3));
EXPECT_TRUE(static_cast<MockOsContext &>(commandQueue1->getCsr()->getOsContext()).allocateInterruptPassed);
EXPECT_FALSE(static_cast<MockOsContext &>(commandQueue1->getCsr()->getOsContext()).isPartOfContextGroup());
EXPECT_TRUE(static_cast<MockOsContext &>(commandQueue2->getCsr()->getOsContext()).allocateInterruptPassed);
EXPECT_FALSE(static_cast<MockOsContext &>(commandQueue2->getCsr()->getOsContext()).isPartOfContextGroup());
EXPECT_FALSE(static_cast<MockOsContext &>(commandQueue3->getCsr()->getOsContext()).allocateInterruptPassed);
commandQueue1->destroy();

View File

@ -394,6 +394,7 @@ bool Device::createEngines() {
secondaryEnginesForType.highPriorityEnginesTotal = highPriorityContextCount;
secondaryEnginesForType.regularCounter = 0;
secondaryEnginesForType.highPriorityCounter = 0;
secondaryEnginesForType.assignedContextsCounter = 1;
NEO::EngineTypeUsage engineTypeUsage;
engineTypeUsage.first = primaryEngine.getEngineType();
@ -591,24 +592,20 @@ EngineControl *Device::getSecondaryEngineCsr(EngineTypeUsage engineTypeUsage, bo
auto &secondaryEnginesForType = secondaryEngines[engineTypeUsage.first];
auto secondaryEngineIndex = 0;
if (engineTypeUsage.second == EngineUsage::highPriority) {
secondaryEngineIndex = (secondaryEnginesForType.highPriorityCounter.fetch_add(1)) % (secondaryEnginesForType.highPriorityEnginesTotal);
secondaryEngineIndex += secondaryEnginesForType.regularEnginesTotal;
} else if (engineTypeUsage.second == EngineUsage::regular) {
secondaryEngineIndex = (secondaryEnginesForType.regularCounter.fetch_add(1)) % (secondaryEnginesForType.regularEnginesTotal);
auto engineControl = secondaryEnginesForType.getEngine(engineTypeUsage.second);
if (secondaryEngineIndex == 0 && allocateInterrupt) {
// Context 0 is already pre-initialized. We need non-initialized context, to pass context creation flag.
// If all contexts are already initialized, just take next available. Interrupt request is only a hint.
secondaryEngineIndex = (secondaryEnginesForType.regularCounter.fetch_add(1)) % (secondaryEnginesForType.regularEnginesTotal);
}
} else {
DEBUG_BREAK_IF(true);
bool isPrimaryContextInGroup = engineControl->osContext->getIsPrimaryEngine() && engineControl->osContext->isPartOfContextGroup();
if (isPrimaryContextInGroup && allocateInterrupt) {
// Context 0 is already pre-initialized. We need non-initialized context, to pass context creation flag.
// If all contexts are already initialized, just take next available. Interrupt request is only a hint.
engineControl = secondaryEnginesForType.getEngine(engineTypeUsage.second);
}
if (secondaryEngineIndex > 0) {
auto commandStreamReceiver = secondaryEnginesForType.engines[secondaryEngineIndex].commandStreamReceiver;
isPrimaryContextInGroup = engineControl->osContext->getIsPrimaryEngine() && engineControl->osContext->isPartOfContextGroup();
if (!isPrimaryContextInGroup) {
auto commandStreamReceiver = engineControl->commandStreamReceiver;
auto lock = commandStreamReceiver->obtainUniqueOwnership();
@ -633,7 +630,7 @@ EngineControl *Device::getSecondaryEngineCsr(EngineTypeUsage engineTypeUsage, bo
}
}
}
return &secondaryEnginesForType.engines[secondaryEngineIndex];
return engineControl;
}
const HardwareInfo &Device::getHardwareInfo() const { return *getRootDeviceEnvironment().getHardwareInfo(); }
@ -1200,4 +1197,57 @@ const EngineGroupT *Device::tryGetRegularEngineGroup(EngineGroupType engineGroup
}
return nullptr;
}
EngineControl *SecondaryContexts::getEngine(EngineUsage usage) {
auto secondaryEngineIndex = 0;
std::lock_guard<std::mutex> guard(mutex);
if (usage == EngineUsage::highPriority) {
// Use index from reserved HP pool
if (hpIndices.size() < highPriorityEnginesTotal) {
secondaryEngineIndex = (highPriorityCounter.fetch_add(1)) % (highPriorityEnginesTotal);
secondaryEngineIndex += regularEnginesTotal;
hpIndices.push_back(secondaryEngineIndex);
}
// Check if there is free index
else if (assignedContextsCounter < regularEnginesTotal) {
secondaryEngineIndex = assignedContextsCounter.fetch_add(1);
highPriorityCounter.fetch_add(1);
hpIndices.push_back(secondaryEngineIndex);
}
// Assign from existing indices
else {
auto index = (highPriorityCounter.fetch_add(1)) % (hpIndices.size());
secondaryEngineIndex = hpIndices[index];
}
if (engines[secondaryEngineIndex].osContext->getEngineUsage() != EngineUsage::highPriority) {
engines[secondaryEngineIndex].osContext->overrideEngineUsage(EngineUsage::highPriority);
}
} else if (usage == EngineUsage::regular) {
if (npIndices.size() == 0) {
regularCounter.fetch_add(1);
npIndices.push_back(secondaryEngineIndex);
}
// Check if there is free index
else if (assignedContextsCounter < regularEnginesTotal) {
secondaryEngineIndex = assignedContextsCounter.fetch_add(1);
regularCounter.fetch_add(1);
npIndices.push_back(secondaryEngineIndex);
}
// Assign from existing indices
else {
auto index = (regularCounter.fetch_add(1)) % (npIndices.size());
secondaryEngineIndex = npIndices[index];
}
} else {
DEBUG_BREAK_IF(true);
}
return &engines[secondaryEngineIndex];
}
} // namespace NEO

View File

@ -18,6 +18,7 @@
#include "shared/source/utilities/reference_tracked_object.h"
#include <array>
#include <mutex>
namespace NEO {
class BindlessHeapsHelper;
@ -63,11 +64,18 @@ struct SecondaryContexts {
SecondaryContexts(const SecondaryContexts &in) = delete;
SecondaryContexts &operator=(const SecondaryContexts &) = delete;
EnginesT engines; // vector of secondary EngineControls
std::atomic<uint8_t> regularCounter = 0; // Counter used to assign next regular EngineControl
std::atomic<uint8_t> highPriorityCounter = 0; // Counter used to assign next highPriority EngineControl
EngineControl *getEngine(const EngineUsage usage);
EnginesT engines; // vector of secondary EngineControls
std::atomic<uint8_t> regularCounter = 0; // Counter used to assign next regular EngineControl
std::atomic<uint8_t> highPriorityCounter = 0; // Counter used to assign next highPriority EngineControl
std::atomic<uint8_t> assignedContextsCounter = 0; // Counter of assigned contexts in group
uint32_t regularEnginesTotal;
uint32_t highPriorityEnginesTotal;
std::vector<int32_t> npIndices;
std::vector<int32_t> hpIndices;
std::mutex mutex;
};
struct RTDispatchGlobalsInfo {

View File

@ -34,6 +34,8 @@ class OsContext : public ReferenceTrackedObject<OsContext> {
PreemptionMode getPreemptionMode() const { return preemptionMode; }
const aub_stream::EngineType &getEngineType() const { return engineType; }
EngineUsage getEngineUsage() { return engineUsage; }
void overrideEngineUsage(EngineUsage usage) { engineUsage = usage; }
bool isRegular() const { return engineUsage == EngineUsage::regular; }
bool isLowPriority() const { return engineUsage == EngineUsage::lowPriority; }
bool isHighPriority() const { return engineUsage == EngineUsage::highPriority; }
@ -110,7 +112,7 @@ class OsContext : public ReferenceTrackedObject<OsContext> {
const PreemptionMode preemptionMode;
const uint32_t numSupportedDevices;
aub_stream::EngineType engineType = aub_stream::ENGINE_RCS;
const EngineUsage engineUsage;
EngineUsage engineUsage;
const bool rootDevice = false;
bool defaultContext = false;
bool directSubmissionActive = false;

View File

@ -1262,6 +1262,107 @@ HWTEST_F(DeviceTests, givenCCSEnginesAndContextGroupSizeEnabledWhenDeviceIsCreat
EXPECT_NE(internalEngine.commandStreamReceiver, device->getSecondaryEngineCsr({aub_stream::EngineType::ENGINE_CCS, EngineUsage::internal}, false)->commandStreamReceiver);
}
HWTEST_F(DeviceTests, givenContextGroupSizeEnabledWhenMoreHpEnginesCreatedThenFreeEnginesAreAssignedUpToHalfOfContextGroup) {
DebugManagerStateRestore dbgRestorer;
const uint32_t contextGroupSize = 14;
debugManager.flags.ContextGroupSize.set(contextGroupSize);
HardwareInfo hwInfo = *defaultHwInfo;
hwInfo.featureTable.flags.ftrCCSNode = true;
hwInfo.featureTable.ftrBcsInfo = 0;
hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_CCS;
hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 1;
auto device = std::unique_ptr<MockDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo));
auto &engineGroups = device->getRegularEngineGroups();
auto engineGroupType = EngineGroupType::compute;
size_t computeEnginesCount = 0;
for (const auto &engine : engineGroups) {
if (engine.engineGroupType == engineGroupType) {
computeEnginesCount = engine.engines.size();
}
}
if (computeEnginesCount == 0) {
GTEST_SKIP();
}
ASSERT_EQ(computeEnginesCount, device->secondaryEngines.size());
ASSERT_EQ(contextGroupSize, device->secondaryEngines[aub_stream::EngineType::ENGINE_CCS].engines.size());
auto defaultEngine = device->getDefaultEngine();
EXPECT_EQ(defaultEngine.commandStreamReceiver, device->secondaryEngines[aub_stream::EngineType::ENGINE_CCS].engines[0].commandStreamReceiver);
const uint32_t maxHpContextCount = contextGroupSize / 2;
for (uint32_t ccsIndex = 0; ccsIndex < computeEnginesCount; ccsIndex++) {
auto &secondaryEngines = device->secondaryEngines[EngineHelpers::mapCcsIndexToEngineType(ccsIndex)];
EXPECT_TRUE(secondaryEngines.engines[0].osContext->isPartOfContextGroup());
EXPECT_EQ(nullptr, secondaryEngines.engines[0].osContext->getPrimaryContext());
for (size_t i = 1; i < device->secondaryEngines[aub_stream::EngineType::ENGINE_CCS].engines.size(); i++) {
EXPECT_EQ(secondaryEngines.engines[0].osContext, secondaryEngines.engines[i].osContext->getPrimaryContext());
EXPECT_TRUE(secondaryEngines.engines[i].osContext->isPartOfContextGroup());
}
EXPECT_EQ(0u, secondaryEngines.regularCounter.load());
EXPECT_EQ(0u, secondaryEngines.highPriorityCounter.load());
auto regularContextCount = secondaryEngines.regularEnginesTotal;
EXPECT_EQ(contextGroupSize - regularContextCount, secondaryEngines.highPriorityEnginesTotal);
uint32_t npCounter = 0;
uint32_t hpCounter = 0;
std::vector<EngineControl *> hpEngines;
for (size_t contextId = 0; contextId < maxHpContextCount + 2; contextId++) {
if (contextId == 2) {
auto engine = device->getSecondaryEngineCsr({EngineHelpers::mapCcsIndexToEngineType(ccsIndex), EngineUsage::regular}, false);
ASSERT_NE(nullptr, engine);
EXPECT_EQ(1, secondaryEngines.regularCounter.load());
EXPECT_EQ(&secondaryEngines.engines[npCounter], engine);
EXPECT_FALSE(secondaryEngines.engines[npCounter].osContext->isHighPriority());
npCounter++;
}
if (contextId == 6) {
auto engine = device->getSecondaryEngineCsr({EngineHelpers::mapCcsIndexToEngineType(ccsIndex), EngineUsage::regular}, false);
ASSERT_NE(nullptr, engine);
EXPECT_EQ(2, secondaryEngines.regularCounter.load());
EXPECT_EQ(&secondaryEngines.engines[npCounter], engine);
EXPECT_FALSE(secondaryEngines.engines[npCounter].osContext->isHighPriority());
npCounter++;
}
auto engine = device->getSecondaryEngineCsr({EngineHelpers::mapCcsIndexToEngineType(ccsIndex), EngineUsage::highPriority}, false);
ASSERT_NE(nullptr, engine);
hpEngines.push_back(engine);
hpCounter++;
if (contextId < secondaryEngines.highPriorityEnginesTotal) {
EXPECT_EQ(&secondaryEngines.engines[regularContextCount + hpCounter - 1], engine);
EXPECT_TRUE(secondaryEngines.engines[regularContextCount + hpCounter - 1].osContext->isHighPriority());
} else if (contextId >= secondaryEngines.highPriorityEnginesTotal) {
if (hpCounter <= maxHpContextCount) {
EXPECT_EQ(&secondaryEngines.engines[npCounter], engine);
EXPECT_TRUE(secondaryEngines.engines[npCounter].osContext->isHighPriority());
npCounter++;
} else {
EXPECT_EQ(hpEngines[hpCounter - 1 % maxHpContextCount], engine);
EXPECT_TRUE(hpEngines[hpCounter - 1 % maxHpContextCount]->osContext->isHighPriority());
}
}
}
}
}
HWTEST_F(DeviceTests, givenDebugFlagSetWhenCreatingSecondaryEnginesThenCreateCorrectNumberOfHighPriorityContexts) {
DebugManagerStateRestore dbgRestorer;
constexpr uint32_t contextGroupSize = 16;