mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-04 15:53:45 +08:00
Unify multi regular context selection
Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com> Related-To: NEO-7618
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
026d50c7b9
commit
d42ec1ad8a
@@ -93,6 +93,9 @@ CommandQueue::CommandQueue(Context *context, ClDevice *device, const cl_queue_pr
|
||||
commandQueueProperties = getCmdQueueProperties<cl_command_queue_properties>(properties);
|
||||
flushStamp.reset(new FlushStampTracker(true));
|
||||
|
||||
storeProperties(properties);
|
||||
processProperties(properties);
|
||||
|
||||
if (device) {
|
||||
auto &hwInfo = device->getHardwareInfo();
|
||||
auto &gfxCoreHelper = device->getGfxCoreHelper();
|
||||
@@ -122,9 +125,6 @@ CommandQueue::CommandQueue(Context *context, ClDevice *device, const cl_queue_pr
|
||||
device->getDevice().getL0Debugger()->notifyCommandQueueCreated(&device->getDevice());
|
||||
}
|
||||
}
|
||||
|
||||
storeProperties(properties);
|
||||
processProperties(properties);
|
||||
}
|
||||
|
||||
CommandQueue::~CommandQueue() {
|
||||
@@ -181,8 +181,10 @@ void CommandQueue::initializeGpgpu() const {
|
||||
!(getCmdQueueProperties<cl_queue_priority_khr>(propertiesVector.data(), CL_QUEUE_PRIORITY_KHR) & static_cast<cl_queue_priority_khr>(CL_QUEUE_PRIORITY_LOW_KHR)) &&
|
||||
engineRoundRobinAvailable;
|
||||
|
||||
if (device->getDevice().getNumberOfRegularContextsPerEngine() > 1) {
|
||||
this->gpgpuEngine = &device->getDevice().getNextEngineForMultiRegularContextMode(aub_stream::EngineType::ENGINE_CCS);
|
||||
auto defaultEngineType = device->getDefaultEngine().getEngineType();
|
||||
|
||||
if (device->getDevice().isMultiRegularContextSelectionAllowed(defaultEngineType, EngineUsage::Regular)) {
|
||||
this->gpgpuEngine = &device->getDevice().getNextEngineForMultiRegularContextMode(defaultEngineType);
|
||||
} else if (assignEngineRoundRobin) {
|
||||
this->gpgpuEngine = &device->getDevice().getNextEngineForCommandQueue();
|
||||
} else {
|
||||
@@ -325,7 +327,13 @@ void CommandQueue::constructBcsEngine(bool internalUsage) {
|
||||
auto bcsEngineType = EngineHelpers::getBcsEngineType(device->getRootDeviceEnvironment(), device->getDeviceBitfield(), selectorCopyEngine, internalUsage);
|
||||
auto bcsIndex = EngineHelpers::getBcsIndex(bcsEngineType);
|
||||
auto engineUsage = (internalUsage && gfxCoreHelper.preferInternalBcsEngine()) ? EngineUsage::Internal : EngineUsage::Regular;
|
||||
bcsEngines[bcsIndex] = neoDevice.tryGetEngine(bcsEngineType, engineUsage);
|
||||
|
||||
if (neoDevice.isMultiRegularContextSelectionAllowed(bcsEngineType, engineUsage)) {
|
||||
bcsEngines[bcsIndex] = &neoDevice.getNextEngineForMultiRegularContextMode(bcsEngineType);
|
||||
} else {
|
||||
bcsEngines[bcsIndex] = neoDevice.tryGetEngine(bcsEngineType, engineUsage);
|
||||
}
|
||||
|
||||
bcsEngineTypes.push_back(bcsEngineType);
|
||||
bcsInitialized = true;
|
||||
if (bcsEngines[bcsIndex]) {
|
||||
@@ -1157,16 +1165,28 @@ void CommandQueue::overrideEngine(aub_stream::EngineType engineType, EngineUsage
|
||||
const EngineGroupType engineGroupType = gfxCoreHelper.getEngineGroupType(engineType, engineUsage, hwInfo);
|
||||
const bool isEngineCopyOnly = EngineHelper::isCopyOnlyEngineType(engineGroupType);
|
||||
|
||||
bool multiRegularContextAllowed = device->getDevice().isMultiRegularContextSelectionAllowed(engineType, engineUsage);
|
||||
|
||||
if (isEngineCopyOnly) {
|
||||
std::fill(bcsEngines.begin(), bcsEngines.end(), nullptr);
|
||||
bcsEngines[EngineHelpers::getBcsIndex(engineType)] = &device->getEngine(engineType, EngineUsage::Regular);
|
||||
auto engineIndex = EngineHelpers::getBcsIndex(engineType);
|
||||
|
||||
if (multiRegularContextAllowed) {
|
||||
bcsEngines[engineIndex] = &device->getDevice().getNextEngineForMultiRegularContextMode(engineType);
|
||||
} else {
|
||||
bcsEngines[engineIndex] = &device->getEngine(engineType, EngineUsage::Regular);
|
||||
}
|
||||
bcsEngineTypes = {engineType};
|
||||
timestampPacketContainer = std::make_unique<TimestampPacketContainer>();
|
||||
deferredTimestampPackets = std::make_unique<TimestampPacketContainer>();
|
||||
isCopyOnly = true;
|
||||
bcsInitialized = true;
|
||||
} else {
|
||||
gpgpuEngine = &device->getEngine(engineType, engineUsage);
|
||||
if (multiRegularContextAllowed) {
|
||||
gpgpuEngine = &device->getDevice().getNextEngineForMultiRegularContextMode(engineType);
|
||||
} else {
|
||||
gpgpuEngine = &device->getEngine(engineType, engineUsage);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -2763,48 +2763,147 @@ HWTEST_F(CommandQueueOnSpecificEngineTests, givenNotInitializedCcsOsContextWhenC
|
||||
EXPECT_TRUE(osContext.isInitialized());
|
||||
}
|
||||
|
||||
HWTEST_F(CommandQueueOnSpecificEngineTests, givenDebugFlagSetWhenCreatingCmdQueueThenAssignNextRegularContext) {
|
||||
DebugManagerStateRestore restore{};
|
||||
DebugManager.flags.NumberOfRegularContextsPerEngine.set(4);
|
||||
DebugManager.flags.NodeOrdinal.set(static_cast<int32_t>(aub_stream::ENGINE_CCS));
|
||||
struct CommandQueueCreateWithMultipleRegularContextsTests : public CommandQueueOnSpecificEngineTests {
|
||||
void SetUp() override {
|
||||
DebugManager.flags.NumberOfRegularContextsPerEngine.set(numberOfRegularContextsPerEngine);
|
||||
DebugManager.flags.EnableMultipleRegularContextForBcs.set(1);
|
||||
DebugManager.flags.NodeOrdinal.set(static_cast<int32_t>(aub_stream::EngineType::ENGINE_CCS));
|
||||
|
||||
MockExecutionEnvironment mockExecutionEnvironment{};
|
||||
backupHwInfo = std::make_unique<VariableBackup<HardwareInfo>>(defaultHwInfo.get());
|
||||
defaultHwInfo->capabilityTable.blitterOperationsSupported = true;
|
||||
defaultHwInfo->featureTable.flags.ftrCCSNode = true;
|
||||
|
||||
class MyMockGfxCoreHelper : public GfxCoreHelperHw<FamilyType> {
|
||||
public:
|
||||
const EngineInstancesContainer getGpgpuEngineInstances(const RootDeviceEnvironment &rootDeviceEnvironment) const override {
|
||||
EngineInstancesContainer result{};
|
||||
CommandQueueOnSpecificEngineTests::SetUp();
|
||||
|
||||
result.push_back({aub_stream::ENGINE_CCS, EngineUsage::Regular});
|
||||
result.push_back({aub_stream::ENGINE_CCS, EngineUsage::Regular});
|
||||
result.push_back({aub_stream::ENGINE_CCS, EngineUsage::Regular});
|
||||
result.push_back({aub_stream::ENGINE_CCS, EngineUsage::Internal});
|
||||
context = std::make_unique<MockContext>();
|
||||
|
||||
return result;
|
||||
uint32_t regularCcsCount = 0;
|
||||
uint32_t regularBcsCount = 0;
|
||||
|
||||
device = static_cast<MockDevice *>(&context->getDevice(0)->getDevice());
|
||||
|
||||
for (auto &engine : device->getAllEngines()) {
|
||||
if (engine.getEngineUsage() == EngineUsage::Regular) {
|
||||
if (engine.getEngineType() == aub_stream::EngineType::ENGINE_CCS) {
|
||||
regularCcsCount++;
|
||||
} else if (engine.getEngineType() == aub_stream::EngineType::ENGINE_BCS) {
|
||||
regularBcsCount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
EngineGroupType getEngineGroupType(aub_stream::EngineType engineType, EngineUsage engineUsage, const HardwareInfo &hwInfo) const override {
|
||||
return EngineGroupType::Compute;
|
||||
if (regularCcsCount < numberOfRegularContextsPerEngine - 1 || regularBcsCount < numberOfRegularContextsPerEngine - 1) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
};
|
||||
|
||||
auto raiiGfxCoreHelper = overrideGfxCoreHelper<FamilyType, MyMockGfxCoreHelper>(*mockExecutionEnvironment.rootDeviceEnvironments[0]);
|
||||
device->regularContextPerBcsEngineAssignmentHelper = 0;
|
||||
|
||||
MockContext context{};
|
||||
auto &device = static_cast<MockDevice &>(context.getDevice(0)->getDevice());
|
||||
EXPECT_EQ(0u, device.defaultEngineIndex);
|
||||
auto &engineGroups = device->getAllEngines();
|
||||
|
||||
uint32_t expectedIndex = 0;
|
||||
for (uint32_t i = 0; i < engineGroups.size(); i++) {
|
||||
if (engineGroups[i].getEngineType() == aub_stream::EngineType::ENGINE_CCS && !computeOrdinalSet) {
|
||||
computeOrdinal = i;
|
||||
computeOrdinalSet = true;
|
||||
} else if (engineGroups[i].getEngineType() == aub_stream::EngineType::ENGINE_BCS && !copyOrdinalSet) {
|
||||
copyOrdinal = i;
|
||||
copyOrdinalSet = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < 8; i++) {
|
||||
MockCommandQueueHw<FamilyType> queue(&context, context.getDevice(0), nullptr);
|
||||
std::unique_ptr<VariableBackup<HardwareInfo>> backupHwInfo;
|
||||
DebugManagerStateRestore restore;
|
||||
std::unique_ptr<MockContext> context;
|
||||
MockDevice *device = nullptr;
|
||||
const uint32_t numberOfRegularContextsPerEngine = 5;
|
||||
uint32_t computeOrdinal = 0;
|
||||
uint32_t copyOrdinal = 0;
|
||||
bool computeOrdinalSet = false;
|
||||
bool copyOrdinalSet = false;
|
||||
};
|
||||
|
||||
HWTEST_F(CommandQueueCreateWithMultipleRegularContextsTests, givenDebugFlagSetWhenCreatingCmdQueueThenAssignNextRegularCcsContext) {
|
||||
constexpr uint32_t iterationCount = 3;
|
||||
|
||||
uint32_t expectedIndex = computeOrdinal;
|
||||
|
||||
// Default queue
|
||||
for (uint32_t i = 0; i < (numberOfRegularContextsPerEngine * iterationCount); i++) {
|
||||
MockCommandQueueHw<FamilyType> queue(context.get(), context->getDevice(0), nullptr);
|
||||
queue.initializeGpgpu();
|
||||
|
||||
EXPECT_EQ(queue.gpgpuEngine, &device.allEngines[expectedIndex]);
|
||||
EXPECT_EQ(queue.gpgpuEngine, &device->allEngines[expectedIndex]);
|
||||
|
||||
expectedIndex++;
|
||||
if (expectedIndex == 3) {
|
||||
expectedIndex = 0;
|
||||
if (expectedIndex == (numberOfRegularContextsPerEngine - 1) + computeOrdinal) {
|
||||
expectedIndex = computeOrdinal;
|
||||
}
|
||||
}
|
||||
|
||||
expectedIndex = computeOrdinal;
|
||||
device->regularContextPerCcsEngineAssignmentHelper = 0;
|
||||
|
||||
cl_queue_properties queueProperties[] = {
|
||||
CL_QUEUE_FAMILY_INTEL,
|
||||
device->getEngineGroupIndexFromEngineGroupType(EngineGroupType::Compute),
|
||||
CL_QUEUE_INDEX_INTEL,
|
||||
0,
|
||||
0,
|
||||
};
|
||||
|
||||
// Explicit selection
|
||||
for (uint32_t i = 0; i < (numberOfRegularContextsPerEngine * iterationCount); i++) {
|
||||
MockCommandQueueHw<FamilyType> queue(context.get(), context->getDevice(0), queueProperties);
|
||||
|
||||
EXPECT_EQ(queue.gpgpuEngine, &device->allEngines[expectedIndex]);
|
||||
|
||||
expectedIndex++;
|
||||
if (expectedIndex == (numberOfRegularContextsPerEngine - 1) + computeOrdinal) {
|
||||
expectedIndex = computeOrdinal;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(CommandQueueCreateWithMultipleRegularContextsTests, givenDebugFlagSetWhenCreatingCmdQueueThenAssignNextRegularBcsContext) {
|
||||
DebugManager.flags.NodeOrdinal.set(-1);
|
||||
|
||||
constexpr uint32_t iterationCount = 3;
|
||||
|
||||
uint32_t expectedIndex = copyOrdinal;
|
||||
|
||||
// Default queue
|
||||
for (uint32_t i = 0; i < (numberOfRegularContextsPerEngine * iterationCount); i++) {
|
||||
MockCommandQueueHw<FamilyType> queue(context.get(), context->getDevice(0), nullptr);
|
||||
|
||||
EXPECT_EQ(queue.bcsEngines[0], &device->allEngines[expectedIndex]);
|
||||
|
||||
expectedIndex++;
|
||||
if (expectedIndex == (numberOfRegularContextsPerEngine - 1) + copyOrdinal) {
|
||||
expectedIndex = copyOrdinal;
|
||||
}
|
||||
}
|
||||
|
||||
expectedIndex = copyOrdinal;
|
||||
device->regularContextPerBcsEngineAssignmentHelper = 0;
|
||||
|
||||
// Explicit selection
|
||||
|
||||
cl_queue_properties queueProperties[] = {
|
||||
CL_QUEUE_FAMILY_INTEL,
|
||||
device->getEngineGroupIndexFromEngineGroupType(EngineGroupType::Copy),
|
||||
CL_QUEUE_INDEX_INTEL,
|
||||
0,
|
||||
0,
|
||||
};
|
||||
|
||||
for (uint32_t i = 0; i < (numberOfRegularContextsPerEngine * iterationCount); i++) {
|
||||
MockCommandQueueHw<FamilyType> queue(context.get(), context->getDevice(0), queueProperties);
|
||||
|
||||
EXPECT_EQ(queue.bcsEngines[0], &device->allEngines[expectedIndex]);
|
||||
|
||||
expectedIndex++;
|
||||
if (expectedIndex == (numberOfRegularContextsPerEngine - 1) + copyOrdinal) {
|
||||
expectedIndex = copyOrdinal;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -249,6 +249,7 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
|
||||
using BaseClass::latestSentEnqueueType;
|
||||
using BaseClass::obtainCommandStream;
|
||||
using BaseClass::obtainNewTimestampPacketNodes;
|
||||
using BaseClass::overrideEngine;
|
||||
using BaseClass::processDispatchForKernels;
|
||||
using BaseClass::requiresCacheFlushAfterWalker;
|
||||
using BaseClass::throttle;
|
||||
|
||||
Reference in New Issue
Block a user