From d42ec1ad8aa23cd4ecb1282c3cbc1f1608a5c8bc Mon Sep 17 00:00:00 2001 From: "Dunajski, Bartosz" Date: Thu, 26 Jan 2023 11:33:18 +0000 Subject: [PATCH] Unify multi regular context selection Signed-off-by: Dunajski, Bartosz Related-To: NEO-7618 --- level_zero/core/source/device/device_imp.cpp | 8 +- opencl/source/command_queue/command_queue.cpp | 36 ++++- .../command_queue/command_queue_tests.cpp | 153 ++++++++++++++---- .../test/unit_test/mocks/mock_command_queue.h | 1 + shared/source/device/device.cpp | 12 ++ shared/source/device/device.h | 1 + shared/test/common/mocks/mock_device.h | 2 + 7 files changed, 172 insertions(+), 41 deletions(-) diff --git a/level_zero/core/source/device/device_imp.cpp b/level_zero/core/source/device/device_imp.cpp index 78d9401f63..28a1de8a40 100644 --- a/level_zero/core/source/device/device_imp.cpp +++ b/level_zero/core/source/device/device_imp.cpp @@ -1416,12 +1416,8 @@ ze_result_t DeviceImp::getCsrForOrdinalAndIndex(NEO::CommandStreamReceiver **csr auto &osContext = (*csr)->getOsContext(); - if (neoDevice->getNumberOfRegularContextsPerEngine() > 1 && !osContext.isRootDevice()) { - if (NEO::EngineHelpers::isCcs(osContext.getEngineType())) { - *csr = neoDevice->getNextEngineForMultiRegularContextMode(aub_stream::EngineType::ENGINE_CCS).commandStreamReceiver; - } else if (osContext.getEngineType() == aub_stream::EngineType::ENGINE_BCS) { - *csr = neoDevice->getNextEngineForMultiRegularContextMode(aub_stream::EngineType::ENGINE_BCS).commandStreamReceiver; - } + if (neoDevice->isMultiRegularContextSelectionAllowed(osContext.getEngineType(), osContext.getEngineUsage())) { + *csr = neoDevice->getNextEngineForMultiRegularContextMode(osContext.getEngineType()).commandStreamReceiver; } } else { auto subDeviceOrdinal = ordinal - numEngineGroups; diff --git a/opencl/source/command_queue/command_queue.cpp b/opencl/source/command_queue/command_queue.cpp index 21491954a0..0e4ffce146 100644 --- a/opencl/source/command_queue/command_queue.cpp +++ b/opencl/source/command_queue/command_queue.cpp @@ -93,6 +93,9 @@ CommandQueue::CommandQueue(Context *context, ClDevice *device, const cl_queue_pr commandQueueProperties = getCmdQueueProperties(properties); flushStamp.reset(new FlushStampTracker(true)); + storeProperties(properties); + processProperties(properties); + if (device) { auto &hwInfo = device->getHardwareInfo(); auto &gfxCoreHelper = device->getGfxCoreHelper(); @@ -122,9 +125,6 @@ CommandQueue::CommandQueue(Context *context, ClDevice *device, const cl_queue_pr device->getDevice().getL0Debugger()->notifyCommandQueueCreated(&device->getDevice()); } } - - storeProperties(properties); - processProperties(properties); } CommandQueue::~CommandQueue() { @@ -181,8 +181,10 @@ void CommandQueue::initializeGpgpu() const { !(getCmdQueueProperties(propertiesVector.data(), CL_QUEUE_PRIORITY_KHR) & static_cast(CL_QUEUE_PRIORITY_LOW_KHR)) && engineRoundRobinAvailable; - if (device->getDevice().getNumberOfRegularContextsPerEngine() > 1) { - this->gpgpuEngine = &device->getDevice().getNextEngineForMultiRegularContextMode(aub_stream::EngineType::ENGINE_CCS); + auto defaultEngineType = device->getDefaultEngine().getEngineType(); + + if (device->getDevice().isMultiRegularContextSelectionAllowed(defaultEngineType, EngineUsage::Regular)) { + this->gpgpuEngine = &device->getDevice().getNextEngineForMultiRegularContextMode(defaultEngineType); } else if (assignEngineRoundRobin) { this->gpgpuEngine = &device->getDevice().getNextEngineForCommandQueue(); } else { @@ -325,7 +327,13 @@ void CommandQueue::constructBcsEngine(bool internalUsage) { auto bcsEngineType = EngineHelpers::getBcsEngineType(device->getRootDeviceEnvironment(), device->getDeviceBitfield(), selectorCopyEngine, internalUsage); auto bcsIndex = EngineHelpers::getBcsIndex(bcsEngineType); auto engineUsage = (internalUsage && gfxCoreHelper.preferInternalBcsEngine()) ? EngineUsage::Internal : EngineUsage::Regular; - bcsEngines[bcsIndex] = neoDevice.tryGetEngine(bcsEngineType, engineUsage); + + if (neoDevice.isMultiRegularContextSelectionAllowed(bcsEngineType, engineUsage)) { + bcsEngines[bcsIndex] = &neoDevice.getNextEngineForMultiRegularContextMode(bcsEngineType); + } else { + bcsEngines[bcsIndex] = neoDevice.tryGetEngine(bcsEngineType, engineUsage); + } + bcsEngineTypes.push_back(bcsEngineType); bcsInitialized = true; if (bcsEngines[bcsIndex]) { @@ -1157,16 +1165,28 @@ void CommandQueue::overrideEngine(aub_stream::EngineType engineType, EngineUsage const EngineGroupType engineGroupType = gfxCoreHelper.getEngineGroupType(engineType, engineUsage, hwInfo); const bool isEngineCopyOnly = EngineHelper::isCopyOnlyEngineType(engineGroupType); + bool multiRegularContextAllowed = device->getDevice().isMultiRegularContextSelectionAllowed(engineType, engineUsage); + if (isEngineCopyOnly) { std::fill(bcsEngines.begin(), bcsEngines.end(), nullptr); - bcsEngines[EngineHelpers::getBcsIndex(engineType)] = &device->getEngine(engineType, EngineUsage::Regular); + auto engineIndex = EngineHelpers::getBcsIndex(engineType); + + if (multiRegularContextAllowed) { + bcsEngines[engineIndex] = &device->getDevice().getNextEngineForMultiRegularContextMode(engineType); + } else { + bcsEngines[engineIndex] = &device->getEngine(engineType, EngineUsage::Regular); + } bcsEngineTypes = {engineType}; timestampPacketContainer = std::make_unique(); deferredTimestampPackets = std::make_unique(); isCopyOnly = true; bcsInitialized = true; } else { - gpgpuEngine = &device->getEngine(engineType, engineUsage); + if (multiRegularContextAllowed) { + gpgpuEngine = &device->getDevice().getNextEngineForMultiRegularContextMode(engineType); + } else { + gpgpuEngine = &device->getEngine(engineType, engineUsage); + } } } diff --git a/opencl/test/unit_test/command_queue/command_queue_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_tests.cpp index c96af955aa..4826d233c4 100644 --- a/opencl/test/unit_test/command_queue/command_queue_tests.cpp +++ b/opencl/test/unit_test/command_queue/command_queue_tests.cpp @@ -2763,48 +2763,147 @@ HWTEST_F(CommandQueueOnSpecificEngineTests, givenNotInitializedCcsOsContextWhenC EXPECT_TRUE(osContext.isInitialized()); } -HWTEST_F(CommandQueueOnSpecificEngineTests, givenDebugFlagSetWhenCreatingCmdQueueThenAssignNextRegularContext) { - DebugManagerStateRestore restore{}; - DebugManager.flags.NumberOfRegularContextsPerEngine.set(4); - DebugManager.flags.NodeOrdinal.set(static_cast(aub_stream::ENGINE_CCS)); +struct CommandQueueCreateWithMultipleRegularContextsTests : public CommandQueueOnSpecificEngineTests { + void SetUp() override { + DebugManager.flags.NumberOfRegularContextsPerEngine.set(numberOfRegularContextsPerEngine); + DebugManager.flags.EnableMultipleRegularContextForBcs.set(1); + DebugManager.flags.NodeOrdinal.set(static_cast(aub_stream::EngineType::ENGINE_CCS)); - MockExecutionEnvironment mockExecutionEnvironment{}; + backupHwInfo = std::make_unique>(defaultHwInfo.get()); + defaultHwInfo->capabilityTable.blitterOperationsSupported = true; + defaultHwInfo->featureTable.flags.ftrCCSNode = true; - class MyMockGfxCoreHelper : public GfxCoreHelperHw { - public: - const EngineInstancesContainer getGpgpuEngineInstances(const RootDeviceEnvironment &rootDeviceEnvironment) const override { - EngineInstancesContainer result{}; + CommandQueueOnSpecificEngineTests::SetUp(); - result.push_back({aub_stream::ENGINE_CCS, EngineUsage::Regular}); - result.push_back({aub_stream::ENGINE_CCS, EngineUsage::Regular}); - result.push_back({aub_stream::ENGINE_CCS, EngineUsage::Regular}); - result.push_back({aub_stream::ENGINE_CCS, EngineUsage::Internal}); + context = std::make_unique(); - return result; + uint32_t regularCcsCount = 0; + uint32_t regularBcsCount = 0; + + device = static_cast(&context->getDevice(0)->getDevice()); + + for (auto &engine : device->getAllEngines()) { + if (engine.getEngineUsage() == EngineUsage::Regular) { + if (engine.getEngineType() == aub_stream::EngineType::ENGINE_CCS) { + regularCcsCount++; + } else if (engine.getEngineType() == aub_stream::EngineType::ENGINE_BCS) { + regularBcsCount++; + } + } } - EngineGroupType getEngineGroupType(aub_stream::EngineType engineType, EngineUsage engineUsage, const HardwareInfo &hwInfo) const override { - return EngineGroupType::Compute; + if (regularCcsCount < numberOfRegularContextsPerEngine - 1 || regularBcsCount < numberOfRegularContextsPerEngine - 1) { + GTEST_SKIP(); } - }; - auto raiiGfxCoreHelper = overrideGfxCoreHelper(*mockExecutionEnvironment.rootDeviceEnvironments[0]); + device->regularContextPerBcsEngineAssignmentHelper = 0; - MockContext context{}; - auto &device = static_cast(context.getDevice(0)->getDevice()); - EXPECT_EQ(0u, device.defaultEngineIndex); + auto &engineGroups = device->getAllEngines(); - uint32_t expectedIndex = 0; + for (uint32_t i = 0; i < engineGroups.size(); i++) { + if (engineGroups[i].getEngineType() == aub_stream::EngineType::ENGINE_CCS && !computeOrdinalSet) { + computeOrdinal = i; + computeOrdinalSet = true; + } else if (engineGroups[i].getEngineType() == aub_stream::EngineType::ENGINE_BCS && !copyOrdinalSet) { + copyOrdinal = i; + copyOrdinalSet = true; + } + } + } - for (uint32_t i = 0; i < 8; i++) { - MockCommandQueueHw queue(&context, context.getDevice(0), nullptr); + std::unique_ptr> backupHwInfo; + DebugManagerStateRestore restore; + std::unique_ptr context; + MockDevice *device = nullptr; + const uint32_t numberOfRegularContextsPerEngine = 5; + uint32_t computeOrdinal = 0; + uint32_t copyOrdinal = 0; + bool computeOrdinalSet = false; + bool copyOrdinalSet = false; +}; + +HWTEST_F(CommandQueueCreateWithMultipleRegularContextsTests, givenDebugFlagSetWhenCreatingCmdQueueThenAssignNextRegularCcsContext) { + constexpr uint32_t iterationCount = 3; + + uint32_t expectedIndex = computeOrdinal; + + // Default queue + for (uint32_t i = 0; i < (numberOfRegularContextsPerEngine * iterationCount); i++) { + MockCommandQueueHw queue(context.get(), context->getDevice(0), nullptr); queue.initializeGpgpu(); - EXPECT_EQ(queue.gpgpuEngine, &device.allEngines[expectedIndex]); + EXPECT_EQ(queue.gpgpuEngine, &device->allEngines[expectedIndex]); expectedIndex++; - if (expectedIndex == 3) { - expectedIndex = 0; + if (expectedIndex == (numberOfRegularContextsPerEngine - 1) + computeOrdinal) { + expectedIndex = computeOrdinal; + } + } + + expectedIndex = computeOrdinal; + device->regularContextPerCcsEngineAssignmentHelper = 0; + + cl_queue_properties queueProperties[] = { + CL_QUEUE_FAMILY_INTEL, + device->getEngineGroupIndexFromEngineGroupType(EngineGroupType::Compute), + CL_QUEUE_INDEX_INTEL, + 0, + 0, + }; + + // Explicit selection + for (uint32_t i = 0; i < (numberOfRegularContextsPerEngine * iterationCount); i++) { + MockCommandQueueHw queue(context.get(), context->getDevice(0), queueProperties); + + EXPECT_EQ(queue.gpgpuEngine, &device->allEngines[expectedIndex]); + + expectedIndex++; + if (expectedIndex == (numberOfRegularContextsPerEngine - 1) + computeOrdinal) { + expectedIndex = computeOrdinal; + } + } +} + +HWTEST_F(CommandQueueCreateWithMultipleRegularContextsTests, givenDebugFlagSetWhenCreatingCmdQueueThenAssignNextRegularBcsContext) { + DebugManager.flags.NodeOrdinal.set(-1); + + constexpr uint32_t iterationCount = 3; + + uint32_t expectedIndex = copyOrdinal; + + // Default queue + for (uint32_t i = 0; i < (numberOfRegularContextsPerEngine * iterationCount); i++) { + MockCommandQueueHw queue(context.get(), context->getDevice(0), nullptr); + + EXPECT_EQ(queue.bcsEngines[0], &device->allEngines[expectedIndex]); + + expectedIndex++; + if (expectedIndex == (numberOfRegularContextsPerEngine - 1) + copyOrdinal) { + expectedIndex = copyOrdinal; + } + } + + expectedIndex = copyOrdinal; + device->regularContextPerBcsEngineAssignmentHelper = 0; + + // Explicit selection + + cl_queue_properties queueProperties[] = { + CL_QUEUE_FAMILY_INTEL, + device->getEngineGroupIndexFromEngineGroupType(EngineGroupType::Copy), + CL_QUEUE_INDEX_INTEL, + 0, + 0, + }; + + for (uint32_t i = 0; i < (numberOfRegularContextsPerEngine * iterationCount); i++) { + MockCommandQueueHw queue(context.get(), context->getDevice(0), queueProperties); + + EXPECT_EQ(queue.bcsEngines[0], &device->allEngines[expectedIndex]); + + expectedIndex++; + if (expectedIndex == (numberOfRegularContextsPerEngine - 1) + copyOrdinal) { + expectedIndex = copyOrdinal; } } } diff --git a/opencl/test/unit_test/mocks/mock_command_queue.h b/opencl/test/unit_test/mocks/mock_command_queue.h index 6a501e2546..9f103a4df9 100644 --- a/opencl/test/unit_test/mocks/mock_command_queue.h +++ b/opencl/test/unit_test/mocks/mock_command_queue.h @@ -249,6 +249,7 @@ class MockCommandQueueHw : public CommandQueueHw { using BaseClass::latestSentEnqueueType; using BaseClass::obtainCommandStream; using BaseClass::obtainNewTimestampPacketNodes; + using BaseClass::overrideEngine; using BaseClass::processDispatchForKernels; using BaseClass::requiresCacheFlushAfterWalker; using BaseClass::throttle; diff --git a/shared/source/device/device.cpp b/shared/source/device/device.cpp index 26fc7c2888..6f7467cf62 100644 --- a/shared/source/device/device.cpp +++ b/shared/source/device/device.cpp @@ -953,4 +953,16 @@ EngineControl &Device::getNextEngineForMultiRegularContextMode(aub_stream::Engin return allEngines[indexToAssign]; } + +bool Device::isMultiRegularContextSelectionAllowed(aub_stream::EngineType engineType, EngineUsage engineUsage) const { + if (this->numberOfRegularContextsPerEngine <= 1 || getNumGenericSubDevices() > 1 || engineUsage != EngineUsage::Regular) { + return false; + } + + if (engineType == aub_stream::EngineType::ENGINE_BCS && DebugManager.flags.EnableMultipleRegularContextForBcs.get() == 1) { + return true; + } + + return EngineHelpers::isCcs(engineType); +} } // namespace NEO diff --git a/shared/source/device/device.h b/shared/source/device/device.h index 8b0aba56be..77f8652515 100644 --- a/shared/source/device/device.h +++ b/shared/source/device/device.h @@ -156,6 +156,7 @@ class Device : public ReferenceTrackedObject { const ProductHelper &getProductHelper() const; const CompilerProductHelper &getCompilerProductHelper() const; uint32_t getNumberOfRegularContextsPerEngine() const { return numberOfRegularContextsPerEngine; } + bool isMultiRegularContextSelectionAllowed(aub_stream::EngineType engineType, EngineUsage engineUsage) const; std::atomic debugExecutionCounter = 0; diff --git a/shared/test/common/mocks/mock_device.h b/shared/test/common/mocks/mock_device.h index 37dfc47248..a2c1972a83 100644 --- a/shared/test/common/mocks/mock_device.h +++ b/shared/test/common/mocks/mock_device.h @@ -62,6 +62,8 @@ class MockDevice : public RootDevice { using Device::getGlobalMemorySize; using Device::initializeCaps; using Device::isDebuggerActive; + using Device::regularContextPerBcsEngineAssignmentHelper; + using Device::regularContextPerCcsEngineAssignmentHelper; using Device::regularEngineGroups; using Device::rootCsrCreated; using Device::rtMemoryBackedBuffer;