mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-24 12:23:05 +08:00
Capability to create multiple Regular BCS contexts per engine.
Related-To: NEO-7618 Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
286c672ef4
commit
5a5596957a
@@ -1416,8 +1416,12 @@ ze_result_t DeviceImp::getCsrForOrdinalAndIndex(NEO::CommandStreamReceiver **csr
|
|||||||
|
|
||||||
auto &osContext = (*csr)->getOsContext();
|
auto &osContext = (*csr)->getOsContext();
|
||||||
|
|
||||||
if (neoDevice->getNumberOfRegularContextsPerEngine() > 1 && !osContext.isRootDevice() && NEO::EngineHelpers::isCcs(osContext.getEngineType())) {
|
if (neoDevice->getNumberOfRegularContextsPerEngine() > 1 && !osContext.isRootDevice()) {
|
||||||
*csr = neoDevice->getNextEngineForMultiRegularContextMode().commandStreamReceiver;
|
if (NEO::EngineHelpers::isCcs(osContext.getEngineType())) {
|
||||||
|
*csr = neoDevice->getNextEngineForMultiRegularContextMode(aub_stream::EngineType::ENGINE_CCS).commandStreamReceiver;
|
||||||
|
} else if (osContext.getEngineType() == aub_stream::EngineType::ENGINE_BCS) {
|
||||||
|
*csr = neoDevice->getNextEngineForMultiRegularContextMode(aub_stream::EngineType::ENGINE_BCS).commandStreamReceiver;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
auto subDeviceOrdinal = ordinal - numEngineGroups;
|
auto subDeviceOrdinal = ordinal - numEngineGroups;
|
||||||
|
|||||||
@@ -944,31 +944,52 @@ TEST_F(DeviceCreateCommandQueueTest, givenNormalPriorityDescWhenCreateCommandQue
|
|||||||
struct CommandQueueCreateWithMultipleRegularContextsTests : public DeviceCreateCommandQueueTest {
|
struct CommandQueueCreateWithMultipleRegularContextsTests : public DeviceCreateCommandQueueTest {
|
||||||
void SetUp() override {
|
void SetUp() override {
|
||||||
DebugManager.flags.NumberOfRegularContextsPerEngine.set(numberOfRegularContextsPerEngine);
|
DebugManager.flags.NumberOfRegularContextsPerEngine.set(numberOfRegularContextsPerEngine);
|
||||||
|
DebugManager.flags.EnableMultipleRegularContextForBcs.set(1);
|
||||||
DebugManager.flags.NodeOrdinal.set(static_cast<int32_t>(aub_stream::EngineType::ENGINE_CCS));
|
DebugManager.flags.NodeOrdinal.set(static_cast<int32_t>(aub_stream::EngineType::ENGINE_CCS));
|
||||||
|
|
||||||
backupHwInfo = std::make_unique<VariableBackup<HardwareInfo>>(defaultHwInfo.get());
|
backupHwInfo = std::make_unique<VariableBackup<HardwareInfo>>(defaultHwInfo.get());
|
||||||
|
defaultHwInfo->capabilityTable.blitterOperationsSupported = true;
|
||||||
defaultHwInfo->featureTable.flags.ftrCCSNode = true;
|
defaultHwInfo->featureTable.flags.ftrCCSNode = true;
|
||||||
|
|
||||||
DeviceCreateCommandQueueTest::SetUp();
|
DeviceCreateCommandQueueTest::SetUp();
|
||||||
|
|
||||||
if (device->getHwInfo().gtSystemInfo.CCSInfo.NumberOfCCSEnabled == 0) {
|
uint32_t regularCcsCount = 0;
|
||||||
|
uint32_t regularBcsCount = 0;
|
||||||
|
|
||||||
|
for (auto &engine : device->getNEODevice()->getAllEngines()) {
|
||||||
|
if (engine.getEngineUsage() == EngineUsage::Regular) {
|
||||||
|
if (engine.getEngineType() == aub_stream::EngineType::ENGINE_CCS) {
|
||||||
|
regularCcsCount++;
|
||||||
|
} else if (engine.getEngineType() == aub_stream::EngineType::ENGINE_BCS) {
|
||||||
|
regularBcsCount++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (regularCcsCount < numberOfRegularContextsPerEngine - 1 || regularBcsCount < numberOfRegularContextsPerEngine - 1) {
|
||||||
GTEST_SKIP();
|
GTEST_SKIP();
|
||||||
}
|
}
|
||||||
|
|
||||||
auto &engineGroups = device->getNEODevice()->getRegularEngineGroups();
|
auto &engineGroups = device->getNEODevice()->getRegularEngineGroups();
|
||||||
|
|
||||||
for (uint32_t i = 0; i < engineGroups.size(); i++) {
|
for (uint32_t i = 0; i < engineGroups.size(); i++) {
|
||||||
if (engineGroups[i].engineGroupType == EngineGroupType::Compute) {
|
if (engineGroups[i].engineGroupType == EngineGroupType::Compute && !computeOrdinalSet) {
|
||||||
computeOrdinal = i;
|
computeOrdinal = i;
|
||||||
break;
|
computeOrdinalSet = true;
|
||||||
|
} else if (engineGroups[i].engineGroupType == EngineGroupType::Copy && !copyOrdinalSet) {
|
||||||
|
copyOrdinal = i;
|
||||||
|
copyOrdinalSet = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<VariableBackup<HardwareInfo>> backupHwInfo;
|
std::unique_ptr<VariableBackup<HardwareInfo>> backupHwInfo;
|
||||||
|
DebugManagerStateRestore restore;
|
||||||
const uint32_t numberOfRegularContextsPerEngine = 5;
|
const uint32_t numberOfRegularContextsPerEngine = 5;
|
||||||
uint32_t computeOrdinal = 0;
|
uint32_t computeOrdinal = 0;
|
||||||
DebugManagerStateRestore restore;
|
uint32_t copyOrdinal = 0;
|
||||||
|
bool computeOrdinalSet = false;
|
||||||
|
bool copyOrdinalSet = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
HWTEST_F(CommandQueueCreateWithMultipleRegularContextsTests, givenSupportedRequestWhenCreatingCommandQueueThenAssignNextAvailableContext) {
|
HWTEST_F(CommandQueueCreateWithMultipleRegularContextsTests, givenSupportedRequestWhenCreatingCommandQueueThenAssignNextAvailableContext) {
|
||||||
@@ -989,6 +1010,25 @@ HWTEST_F(CommandQueueCreateWithMultipleRegularContextsTests, givenSupportedReque
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST_F(CommandQueueCreateWithMultipleRegularContextsTests, givenSupportedRequestWhenCreatingBcsCommandQueueThenAssignNextAvailableContext) {
|
||||||
|
auto defaultBcsIndex = static_cast<MockDevice *>(device->getNEODevice())->defaultBcsEngineIndex;
|
||||||
|
uint32_t expectedIndex = defaultBcsIndex;
|
||||||
|
constexpr uint32_t iterationCount = 3;
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < (numberOfRegularContextsPerEngine * iterationCount); i++) {
|
||||||
|
NEO::CommandStreamReceiver *csr = nullptr;
|
||||||
|
device->getCsrForOrdinalAndIndex(&csr, copyOrdinal, 0u);
|
||||||
|
ASSERT_NE(nullptr, csr);
|
||||||
|
|
||||||
|
EXPECT_EQ(csr, device->getNEODevice()->getAllEngines()[expectedIndex].commandStreamReceiver);
|
||||||
|
|
||||||
|
expectedIndex++;
|
||||||
|
if ((expectedIndex - defaultBcsIndex) == (numberOfRegularContextsPerEngine - 1)) {
|
||||||
|
expectedIndex = defaultBcsIndex;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
TEST_F(DeviceCreateCommandQueueTest,
|
TEST_F(DeviceCreateCommandQueueTest,
|
||||||
whenCallingGetCsrForOrdinalAndIndexWithInvalidOrdinalThenInvalidArgumentIsReturned) {
|
whenCallingGetCsrForOrdinalAndIndexWithInvalidOrdinalThenInvalidArgumentIsReturned) {
|
||||||
ze_command_queue_desc_t desc{};
|
ze_command_queue_desc_t desc{};
|
||||||
|
|||||||
@@ -179,7 +179,7 @@ void CommandQueue::initializeGpgpu() const {
|
|||||||
engineRoundRobinAvailable;
|
engineRoundRobinAvailable;
|
||||||
|
|
||||||
if (device->getDevice().getNumberOfRegularContextsPerEngine() > 1) {
|
if (device->getDevice().getNumberOfRegularContextsPerEngine() > 1) {
|
||||||
this->gpgpuEngine = &device->getDevice().getNextEngineForMultiRegularContextMode();
|
this->gpgpuEngine = &device->getDevice().getNextEngineForMultiRegularContextMode(aub_stream::EngineType::ENGINE_CCS);
|
||||||
} else if (assignEngineRoundRobin) {
|
} else if (assignEngineRoundRobin) {
|
||||||
this->gpgpuEngine = &device->getDevice().getNextEngineForCommandQueue();
|
this->gpgpuEngine = &device->getDevice().getNextEngineForCommandQueue();
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@@ -225,6 +225,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, AdjustThreadGroupDispatchSize, -1, "-1: default,
|
|||||||
DECLARE_DEBUG_VARIABLE(int32_t, ForceNonblockingExecbufferCalls, -1, "-1: default, 0: make execbuffer call blocking, 1: make execbuffer call nonblocking. Supported only in prelim i915 kernels.")
|
DECLARE_DEBUG_VARIABLE(int32_t, ForceNonblockingExecbufferCalls, -1, "-1: default, 0: make execbuffer call blocking, 1: make execbuffer call nonblocking. Supported only in prelim i915 kernels.")
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, ForceComputeWalkerPostSyncFlush, -1, "-1: default, 0: disable 1: Enable all flushing bits in ComputeWalker->PostSync")
|
DECLARE_DEBUG_VARIABLE(int32_t, ForceComputeWalkerPostSyncFlush, -1, "-1: default, 0: disable 1: Enable all flushing bits in ComputeWalker->PostSync")
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, NumberOfRegularContextsPerEngine, -1, "-1: default, >0: Create more than 1 Regular contexts for the same engine")
|
DECLARE_DEBUG_VARIABLE(int32_t, NumberOfRegularContextsPerEngine, -1, "-1: default, >0: Create more than 1 Regular contexts for the same engine")
|
||||||
|
DECLARE_DEBUG_VARIABLE(int32_t, EnableMultipleRegularContextForBcs, -1, "-1: default, 0: disabled, 1: Use NumberOfRegularContextsPerEngine to create multiple Regular contexts on the same engine")
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, AppendAubStreamContextFlags, -1, "-1: default, >0: Append flags passed during HardwareContext creation.")
|
DECLARE_DEBUG_VARIABLE(int32_t, AppendAubStreamContextFlags, -1, "-1: default, >0: Append flags passed during HardwareContext creation.")
|
||||||
|
|
||||||
/*LOGGING FLAGS*/
|
/*LOGGING FLAGS*/
|
||||||
|
|||||||
@@ -396,6 +396,10 @@ bool Device::createEngine(uint32_t deviceCsrIndex, EngineTypeUsage engineTypeUsa
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (EngineHelpers::isBcs(engineType) && (defaultBcsEngineIndex == std::numeric_limits<uint32_t>::max()) && (engineUsage == EngineUsage::Regular)) {
|
||||||
|
defaultBcsEngineIndex = deviceCsrIndex;
|
||||||
|
}
|
||||||
|
|
||||||
if (preemptionMode == PreemptionMode::MidThread && !commandStreamReceiver->createPreemptionAllocation()) {
|
if (preemptionMode == PreemptionMode::MidThread && !commandStreamReceiver->createPreemptionAllocation()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@@ -926,12 +930,23 @@ BuiltIns *Device::getBuiltIns() const {
|
|||||||
return executionEnvironment->rootDeviceEnvironments[getRootDeviceIndex()]->getBuiltIns();
|
return executionEnvironment->rootDeviceEnvironments[getRootDeviceIndex()]->getBuiltIns();
|
||||||
}
|
}
|
||||||
|
|
||||||
EngineControl &Device::getNextEngineForMultiRegularContextMode() {
|
EngineControl &Device::getNextEngineForMultiRegularContextMode(aub_stream::EngineType engineType) {
|
||||||
UNRECOVERABLE_IF(defaultEngineIndex != 0);
|
UNRECOVERABLE_IF(defaultEngineIndex != 0);
|
||||||
|
UNRECOVERABLE_IF((engineType != aub_stream::EngineType::ENGINE_BCS) && (engineType != aub_stream::EngineType::ENGINE_CCS));
|
||||||
|
|
||||||
auto maxIndex = numberOfRegularContextsPerEngine - 1; // 1 for internal engine
|
const auto maxIndex = numberOfRegularContextsPerEngine - 1; // 1 for internal engine
|
||||||
|
uint32_t atomicOutValue = 0;
|
||||||
|
uint32_t indexOffset = 0;
|
||||||
|
|
||||||
auto indexToAssign = regularContextPerEngineAssignmentHelper.fetch_add(1) % maxIndex;
|
if (engineType == aub_stream::EngineType::ENGINE_CCS) {
|
||||||
|
atomicOutValue = regularContextPerCcsEngineAssignmentHelper.fetch_add(1);
|
||||||
|
indexOffset = defaultEngineIndex;
|
||||||
|
} else {
|
||||||
|
atomicOutValue = regularContextPerBcsEngineAssignmentHelper.fetch_add(1);
|
||||||
|
indexOffset = defaultBcsEngineIndex;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto indexToAssign = (atomicOutValue % maxIndex) + indexOffset;
|
||||||
|
|
||||||
return allEngines[indexToAssign];
|
return allEngines[indexToAssign];
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -81,7 +81,7 @@ class Device : public ReferenceTrackedObject<Device> {
|
|||||||
EngineControl &getEngine(uint32_t index);
|
EngineControl &getEngine(uint32_t index);
|
||||||
EngineControl &getDefaultEngine();
|
EngineControl &getDefaultEngine();
|
||||||
EngineControl &getNextEngineForCommandQueue();
|
EngineControl &getNextEngineForCommandQueue();
|
||||||
EngineControl &getNextEngineForMultiRegularContextMode();
|
EngineControl &getNextEngineForMultiRegularContextMode(aub_stream::EngineType engineType);
|
||||||
EngineControl &getInternalEngine();
|
EngineControl &getInternalEngine();
|
||||||
EngineControl *getInternalCopyEngine();
|
EngineControl *getInternalCopyEngine();
|
||||||
SelectorCopyEngine &getSelectorCopyEngine();
|
SelectorCopyEngine &getSelectorCopyEngine();
|
||||||
@@ -203,9 +203,11 @@ class Device : public ReferenceTrackedObject<Device> {
|
|||||||
ExecutionEnvironment *executionEnvironment = nullptr;
|
ExecutionEnvironment *executionEnvironment = nullptr;
|
||||||
aub_stream::EngineType engineInstancedType = aub_stream::EngineType::NUM_ENGINES;
|
aub_stream::EngineType engineInstancedType = aub_stream::EngineType::NUM_ENGINES;
|
||||||
uint32_t defaultEngineIndex = 0;
|
uint32_t defaultEngineIndex = 0;
|
||||||
|
uint32_t defaultBcsEngineIndex = std::numeric_limits<uint32_t>::max();
|
||||||
uint32_t numSubDevices = 0;
|
uint32_t numSubDevices = 0;
|
||||||
std::atomic_uint32_t regularCommandQueuesCreatedWithinDeviceCount{0};
|
std::atomic_uint32_t regularCommandQueuesCreatedWithinDeviceCount{0};
|
||||||
std::atomic<uint8_t> regularContextPerEngineAssignmentHelper = 0;
|
std::atomic<uint8_t> regularContextPerCcsEngineAssignmentHelper = 0;
|
||||||
|
std::atomic<uint8_t> regularContextPerBcsEngineAssignmentHelper = 0;
|
||||||
std::bitset<8> availableEnginesForCommandQueueusRoundRobin = 0;
|
std::bitset<8> availableEnginesForCommandQueueusRoundRobin = 0;
|
||||||
uint32_t queuesPerEngineCount = 1;
|
uint32_t queuesPerEngineCount = 1;
|
||||||
uint32_t numberOfRegularContextsPerEngine = 1;
|
uint32_t numberOfRegularContextsPerEngine = 1;
|
||||||
|
|||||||
@@ -52,6 +52,7 @@ class MockDevice : public RootDevice {
|
|||||||
using Device::createDeviceInternals;
|
using Device::createDeviceInternals;
|
||||||
using Device::createEngine;
|
using Device::createEngine;
|
||||||
using Device::createSubDevices;
|
using Device::createSubDevices;
|
||||||
|
using Device::defaultBcsEngineIndex;
|
||||||
using Device::deviceBitfield;
|
using Device::deviceBitfield;
|
||||||
using Device::deviceInfo;
|
using Device::deviceInfo;
|
||||||
using Device::engineInstanced;
|
using Device::engineInstanced;
|
||||||
|
|||||||
@@ -495,4 +495,5 @@ OverrideUserFenceStartValue = -1
|
|||||||
DirectSubmissionRelaxedOrderingQueueSizeLimit = -1
|
DirectSubmissionRelaxedOrderingQueueSizeLimit = -1
|
||||||
ExperimentalForceCopyThroughLock = -1
|
ExperimentalForceCopyThroughLock = -1
|
||||||
NumberOfRegularContextsPerEngine = -1
|
NumberOfRegularContextsPerEngine = -1
|
||||||
|
EnableMultipleRegularContextForBcs = -1
|
||||||
AppendAubStreamContextFlags = -1
|
AppendAubStreamContextFlags = -1
|
||||||
@@ -715,3 +715,20 @@ TEST(FailDeviceTest, GivenMidThreadPreemptionAndFailedDeviceWhenCreatingDeviceTh
|
|||||||
|
|
||||||
EXPECT_EQ(nullptr, pDevice);
|
EXPECT_EQ(nullptr, pDevice);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(DeviceTests, whenInitializingDeviceThenSetCorrectDefaultBcsEngineIndex) {
|
||||||
|
if (!defaultHwInfo->capabilityTable.blitterOperationsSupported) {
|
||||||
|
GTEST_SKIP();
|
||||||
|
}
|
||||||
|
|
||||||
|
MockExecutionEnvironment executionEnvironment(defaultHwInfo.get());
|
||||||
|
executionEnvironment.incRefInternal();
|
||||||
|
|
||||||
|
UltDeviceFactory deviceFactory{1, 0, executionEnvironment};
|
||||||
|
|
||||||
|
auto device = deviceFactory.rootDevices[0];
|
||||||
|
auto &engine = device->allEngines[device->defaultBcsEngineIndex];
|
||||||
|
|
||||||
|
EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS, engine.getEngineType());
|
||||||
|
EXPECT_EQ(EngineUsage::Regular, engine.getEngineUsage());
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user