From 6e35d055f2bb35e36e357a0b80c7c856310a5062 Mon Sep 17 00:00:00 2001 From: Mateusz Hoppe Date: Fri, 31 Jan 2025 15:26:12 +0000 Subject: [PATCH] feature: make contextGroupSize dependent on number of processes Related-To: NEO-12952 Signed-off-by: Mateusz Hoppe --- shared/source/device/device.cpp | 21 +++-- .../execution_environment.cpp | 11 ++- shared/source/os_interface/linux/drm_neo.cpp | 4 + shared/source/os_interface/linux/drm_neo.h | 1 + .../source/os_interface/linux/ioctl_helper.h | 1 + .../os_interface/linux/os_context_linux.cpp | 2 +- .../os_interface/linux/os_interface_linux.cpp | 7 ++ .../product_helper_uuid_xehp_and_later.inl | 6 +- shared/source/os_interface/os_interface.h | 2 + .../os_interface/windows/os_context_win.cpp | 4 +- .../os_interface/windows/os_interface_win.cpp | 4 + shared/test/common/libult/linux/drm_mock.h | 5 ++ shared/test/common/mocks/mock_os_interface.h | 22 +++++ .../unit_test/device/neo_device_tests.cpp | 88 +++++++++++++++++++ .../linux/os_interface_linux_tests.cpp | 14 ++- .../windows/os_interface_win_tests.cpp | 5 ++ 16 files changed, 182 insertions(+), 15 deletions(-) create mode 100644 shared/test/common/mocks/mock_os_interface.h diff --git a/shared/source/device/device.cpp b/shared/source/device/device.cpp index a3ad7e1096..b3c3609caa 100644 --- a/shared/source/device/device.cpp +++ b/shared/source/device/device.cpp @@ -326,15 +326,22 @@ bool Device::createEngines() { highPriorityContextCount = static_cast(debugManager.flags.OverrideNumHighPriorityContexts.get()); } - if (engineGroupType == EngineGroupType::compute && hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled > 1) { - contextCount = contextCount / hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled; - highPriorityContextCount = highPriorityContextCount / hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled; - } + if (getRootDeviceEnvironment().osInterface && getRootDeviceEnvironment().osInterface->getAggregatedProcessCount() > 1) { + const auto numProcesses = getRootDeviceEnvironment().osInterface->getAggregatedProcessCount(); - if (engineGroupType == EngineGroupType::copy || engineGroupType == EngineGroupType::linkedCopy) { - gfxCoreHelper.adjustCopyEngineRegularContextCount(engineGroup->engines.size(), contextCount); - } + contextCount = std::max(contextCount / numProcesses, 2u); + highPriorityContextCount = std::max(contextCount / 2, 1u); + } else { + if (engineGroupType == EngineGroupType::compute && hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled > 1) { + contextCount = contextCount / hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled; + highPriorityContextCount = highPriorityContextCount / hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled; + } + + if (engineGroupType == EngineGroupType::copy || engineGroupType == EngineGroupType::linkedCopy) { + gfxCoreHelper.adjustCopyEngineRegularContextCount(engineGroup->engines.size(), contextCount); + } + } for (uint32_t engineIndex = 0; engineIndex < static_cast(engineGroup->engines.size()); engineIndex++) { auto engineType = engineGroup->engines[engineIndex].getEngineType(); diff --git a/shared/source/execution_environment/execution_environment.cpp b/shared/source/execution_environment/execution_environment.cpp index 405d5b11f7..8b145cd5b2 100644 --- a/shared/source/execution_environment/execution_environment.cpp +++ b/shared/source/execution_environment/execution_environment.cpp @@ -116,11 +116,16 @@ void ExecutionEnvironment::calculateMaxOsContextCount() { uint32_t numRootContexts = hasRootCsr ? 1 : 0; uint32_t numSecondaryContexts = 0; if (gfxCoreHelper.areSecondaryContextsSupported()) { - numSecondaryContexts += numRegularEngines * gfxCoreHelper.getContextGroupContextsCount(); - numSecondaryContexts += numHpEngines * gfxCoreHelper.getContextGroupContextsCount(); + + auto groupCount = gfxCoreHelper.getContextGroupContextsCount(); + if (rootDeviceEnvironment->osInterface && rootDeviceEnvironment->osInterface->getAggregatedProcessCount() > 1) { + groupCount = std::min(groupCount / rootDeviceEnvironment->osInterface->getAggregatedProcessCount(), 2u); + } + numSecondaryContexts += numRegularEngines * groupCount; + numSecondaryContexts += numHpEngines * groupCount; osContextCount -= (numRegularEngines + numHpEngines); - numRootContexts *= gfxCoreHelper.getContextGroupContextsCount(); + numRootContexts *= groupCount; } MemoryManager::maxOsContextCount += (numSecondaryContexts + osContextCount) * subDevicesCount + numRootContexts; diff --git a/shared/source/os_interface/linux/drm_neo.cpp b/shared/source/os_interface/linux/drm_neo.cpp index 81d39c95bd..f8827d23e9 100644 --- a/shared/source/os_interface/linux/drm_neo.cpp +++ b/shared/source/os_interface/linux/drm_neo.cpp @@ -1766,6 +1766,10 @@ bool Drm::queryDeviceIdAndRevision() { return IoctlHelperI915::queryDeviceIdAndRevision(*this); } +uint32_t Drm::getAggregatedProcessCount() const { + return ioctlHelper->getNumProcesses(); +} + template std::vector Drm::query(uint32_t queryId, uint32_t queryItemFlags); template std::vector Drm::query(uint32_t queryId, uint32_t queryItemFlags); template std::vector Drm::query(uint32_t queryId, uint32_t queryItemFlags); diff --git a/shared/source/os_interface/linux/drm_neo.h b/shared/source/os_interface/linux/drm_neo.h index 4bfac6cb51..29c6334709 100644 --- a/shared/source/os_interface/linux/drm_neo.h +++ b/shared/source/os_interface/linux/drm_neo.h @@ -268,6 +268,7 @@ class Drm : public DriverModel { template std::vector query(uint32_t queryId, uint32_t queryItemFlags); static std::string getDrmVersion(int fileDescriptor); + MOCKABLE_VIRTUAL uint32_t getAggregatedProcessCount() const; protected: Drm(std::unique_ptr &&hwDeviceIdIn, RootDeviceEnvironment &rootDeviceEnvironment); diff --git a/shared/source/os_interface/linux/ioctl_helper.h b/shared/source/os_interface/linux/ioctl_helper.h index 8cfa7fb4ff..a1c106a2aa 100644 --- a/shared/source/os_interface/linux/ioctl_helper.h +++ b/shared/source/os_interface/linux/ioctl_helper.h @@ -235,6 +235,7 @@ class IoctlHelper { virtual bool queryDeviceParams(uint32_t *moduleId, uint16_t *serverType) { return false; } virtual bool isTimestampsRefreshEnabled() { return false; } + virtual uint32_t getNumProcesses() { return 1; } virtual bool makeResidentBeforeLockNeeded() const { return false; } diff --git a/shared/source/os_interface/linux/os_context_linux.cpp b/shared/source/os_interface/linux/os_context_linux.cpp index ac15ce512e..e1a158c4ea 100644 --- a/shared/source/os_interface/linux/os_context_linux.cpp +++ b/shared/source/os_interface/linux/os_context_linux.cpp @@ -23,7 +23,7 @@ namespace NEO { OsContext *OsContextLinux::create(OSInterface *osInterface, uint32_t rootDeviceIndex, uint32_t contextId, const EngineDescriptor &engineDescriptor) { - if (osInterface) { + if (osInterface && osInterface->getDriverModel()->getDriverModelType() == DriverModelType::drm) { return new OsContextLinux(*osInterface->getDriverModel()->as(), rootDeviceIndex, contextId, engineDescriptor); } return new OsContext(rootDeviceIndex, contextId, engineDescriptor); diff --git a/shared/source/os_interface/linux/os_interface_linux.cpp b/shared/source/os_interface/linux/os_interface_linux.cpp index 230bc84fce..e48ab2fa33 100644 --- a/shared/source/os_interface/linux/os_interface_linux.cpp +++ b/shared/source/os_interface/linux/os_interface_linux.cpp @@ -58,4 +58,11 @@ bool initDrmOsInterface(std::unique_ptr &&hwDeviceId, uint32_t rootD return true; } +uint32_t OSInterface::getAggregatedProcessCount() const { + if (driverModel && driverModel->getDriverModelType() == DriverModelType::drm) { + return driverModel->as()->getAggregatedProcessCount(); + } + return 0; +} + } // namespace NEO diff --git a/shared/source/os_interface/linux/product_helper_uuid_xehp_and_later.inl b/shared/source/os_interface/linux/product_helper_uuid_xehp_and_later.inl index 99d7edf2e9..d2187052db 100644 --- a/shared/source/os_interface/linux/product_helper_uuid_xehp_and_later.inl +++ b/shared/source/os_interface/linux/product_helper_uuid_xehp_and_later.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2023 Intel Corporation + * Copyright (C) 2021-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -12,6 +12,10 @@ static inline bool uuidReadFromTelem(std::string_view telemDir, std::array bool ProductHelperHw::getUuid(DriverModel *driverModel, const uint32_t subDeviceCount, const uint32_t deviceIndex, std::array &uuid) const { + if (driverModel->getDriverModelType() != DriverModelType::drm) { + return false; + } + auto pDrm = driverModel->as(); std::optional rootPciPath = getPciRootPath(pDrm->getFileDescriptor()); if (!rootPciPath.has_value()) { diff --git a/shared/source/os_interface/os_interface.h b/shared/source/os_interface/os_interface.h index 8a21919597..69fde85c52 100644 --- a/shared/source/os_interface/os_interface.h +++ b/shared/source/os_interface/os_interface.h @@ -117,6 +117,8 @@ class OSInterface : public NonCopyableClass { MOCKABLE_VIRTUAL bool isDebugAttachAvailable() const; MOCKABLE_VIRTUAL bool isLockablePointer(bool isLockable) const; + MOCKABLE_VIRTUAL uint32_t getAggregatedProcessCount() const; + static bool osEnabled64kbPages; static bool osEnableLocalMemory; static bool are64kbPagesEnabled(); diff --git a/shared/source/os_interface/windows/os_context_win.cpp b/shared/source/os_interface/windows/os_context_win.cpp index be58dceb40..b4ab8588d2 100644 --- a/shared/source/os_interface/windows/os_context_win.cpp +++ b/shared/source/os_interface/windows/os_context_win.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2024 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -18,7 +18,7 @@ namespace NEO { OsContext *OsContextWin::create(OSInterface *osInterface, uint32_t rootDeviceIndex, uint32_t contextId, const EngineDescriptor &engineDescriptor) { - if (osInterface) { + if (osInterface && osInterface->getDriverModel()->getDriverModelType() == DriverModelType::wddm) { return new OsContextWin(*osInterface->getDriverModel()->as(), rootDeviceIndex, contextId, engineDescriptor); } return new OsContext(rootDeviceIndex, contextId, engineDescriptor); diff --git a/shared/source/os_interface/windows/os_interface_win.cpp b/shared/source/os_interface/windows/os_interface_win.cpp index e91b3fe402..8e794ae829 100644 --- a/shared/source/os_interface/windows/os_interface_win.cpp +++ b/shared/source/os_interface/windows/os_interface_win.cpp @@ -26,4 +26,8 @@ bool OSInterface::isLockablePointer(bool isLockable) const { return isLockable; } +uint32_t OSInterface::getAggregatedProcessCount() const { + return 0; +} + } // namespace NEO diff --git a/shared/test/common/libult/linux/drm_mock.h b/shared/test/common/libult/linux/drm_mock.h index 16135c392b..2a669d7ebf 100644 --- a/shared/test/common/libult/linux/drm_mock.h +++ b/shared/test/common/libult/linux/drm_mock.h @@ -186,6 +186,10 @@ class DrmMock : public Drm { return storedRetValForGetGttSize; } + uint32_t getAggregatedProcessCount() const override { + return mockProcessCount; + } + static const int mockFd = 33; bool failRetTopology = false; @@ -215,6 +219,7 @@ class DrmMock : public Drm { int storedRetValForVmId = 1; int storedCsTimestampFrequency = 1000; int storedOaTimestampFrequency = 123456; + uint32_t mockProcessCount = 1; bool disableSomeTopology = false; bool allowDebugAttach = false; bool allowDebugAttachCallBase = false; diff --git a/shared/test/common/mocks/mock_os_interface.h b/shared/test/common/mocks/mock_os_interface.h new file mode 100644 index 0000000000..e8701ee47b --- /dev/null +++ b/shared/test/common/mocks/mock_os_interface.h @@ -0,0 +1,22 @@ +/* + * Copyright (C) 2025 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once + +#include "shared/source/os_interface/os_interface.h" + +namespace NEO { + +struct MockOsInterface : public OSInterface { + MockOsInterface() = default; + uint32_t getAggregatedProcessCount() const override { + return numberOfProcesses; + } + + uint32_t numberOfProcesses = 1; +}; +} // namespace NEO diff --git a/shared/test/unit_test/device/neo_device_tests.cpp b/shared/test/unit_test/device/neo_device_tests.cpp index 5904c765b3..4d2b271ccc 100644 --- a/shared/test/unit_test/device/neo_device_tests.cpp +++ b/shared/test/unit_test/device/neo_device_tests.cpp @@ -34,6 +34,7 @@ #include "shared/test/common/mocks/mock_driver_model.h" #include "shared/test/common/mocks/mock_io_functions.h" #include "shared/test/common/mocks/mock_memory_manager.h" +#include "shared/test/common/mocks/mock_os_interface.h" #include "shared/test/common/mocks/mock_product_helper.h" #include "shared/test/common/mocks/mock_release_helper.h" #include "shared/test/common/mocks/ult_device_factory.h" @@ -1567,6 +1568,93 @@ HWTEST_F(DeviceTests, givenContextGroupSizeEnabledWhenMoreHpEnginesCreatedThenFr } } +HWTEST_F(DeviceTests, givenDeviceWithCCSEngineAndAggregatedProcessesWhenDeviceIsCreatedThenNumberOfSecondaryEnginesIsLimited) { + if (defaultHwInfo->capabilityTable.defaultEngineType != aub_stream::EngineType::ENGINE_CCS) { + GTEST_SKIP(); + } + DebugManagerStateRestore dbgRestorer; + + HardwareInfo hwInfo = *defaultHwInfo; + hwInfo.featureTable.flags.ftrCCSNode = true; + hwInfo.featureTable.ftrBcsInfo = 0; + hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_CCS; + hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 2; + + const auto numProcesses = 4u; + auto executionEnvironment = new MockExecutionEnvironment(&hwInfo, false, 1); + executionEnvironment->incRefInternal(); + auto osInterface = new MockOsInterface(); + auto driverModelMock = std::make_unique(); + osInterface->setDriverModel(std::move(driverModelMock)); + + executionEnvironment->rootDeviceEnvironments[0]->osInterface.reset(osInterface); + osInterface->numberOfProcesses = numProcesses; + + uint32_t testedContextGroupSizes[] = {10, 22}; + uint32_t expectedRegularCounts[] = {1u, 3u}; + uint32_t expectedHpCounts[] = {1u, 2u}; + + for (uint32_t contextGroupSizeIndex = 0; contextGroupSizeIndex < 2; contextGroupSizeIndex++) { + debugManager.flags.ContextGroupSize.set(testedContextGroupSizes[contextGroupSizeIndex]); + + auto device = std::unique_ptr(MockDevice::createWithExecutionEnvironment(&hwInfo, executionEnvironment, 0)); + auto &engineGroups = device->getRegularEngineGroups(); + + auto engineGroupType = EngineGroupType::compute; + size_t computeEnginesCount = 0; + for (const auto &engine : engineGroups) { + if (engine.engineGroupType == engineGroupType) { + computeEnginesCount = engine.engines.size(); + } + } + + ASSERT_EQ(computeEnginesCount, device->secondaryEngines.size()); + ASSERT_EQ(testedContextGroupSizes[contextGroupSizeIndex] / numProcesses, device->secondaryEngines[aub_stream::EngineType::ENGINE_CCS].engines.size()); + + auto defaultEngine = device->getDefaultEngine(); + EXPECT_EQ(defaultEngine.commandStreamReceiver, device->secondaryEngines[aub_stream::EngineType::ENGINE_CCS].engines[0].commandStreamReceiver); + + const uint32_t regularContextCount = expectedRegularCounts[contextGroupSizeIndex]; + const uint32_t hpContextCount = expectedHpCounts[contextGroupSizeIndex]; + + for (uint32_t ccsIndex = 0; ccsIndex < computeEnginesCount; ccsIndex++) { + auto &secondaryEngines = device->secondaryEngines[EngineHelpers::mapCcsIndexToEngineType(ccsIndex)]; + + EXPECT_TRUE(secondaryEngines.engines[0].osContext->isPartOfContextGroup()); + EXPECT_EQ(nullptr, secondaryEngines.engines[0].osContext->getPrimaryContext()); + + for (size_t i = 1; i < device->secondaryEngines[aub_stream::EngineType::ENGINE_CCS].engines.size(); i++) { + EXPECT_EQ(secondaryEngines.engines[0].osContext, secondaryEngines.engines[i].osContext->getPrimaryContext()); + EXPECT_TRUE(secondaryEngines.engines[i].osContext->isPartOfContextGroup()); + } + + EXPECT_EQ(regularContextCount, secondaryEngines.regularEnginesTotal); + EXPECT_EQ(hpContextCount, secondaryEngines.highPriorityEnginesTotal); + + for (size_t contextId = 0; contextId < regularContextCount + 1; contextId++) { + auto engine = device->getSecondaryEngineCsr({EngineHelpers::mapCcsIndexToEngineType(ccsIndex), EngineUsage::regular}, false); + ASSERT_NE(nullptr, engine); + + if (contextId == regularContextCount) { + EXPECT_EQ(&secondaryEngines.engines[0], engine); + } + } + + for (size_t contextId = 0; contextId < hpContextCount; contextId++) { + auto engine = device->getSecondaryEngineCsr({EngineHelpers::mapCcsIndexToEngineType(ccsIndex), EngineUsage::highPriority}, false); + ASSERT_NE(nullptr, engine); + + EXPECT_EQ(contextId + 1, secondaryEngines.highPriorityCounter.load()); + if (contextId == testedContextGroupSizes[contextGroupSizeIndex] - regularContextCount) { + EXPECT_EQ(&secondaryEngines.engines[regularContextCount], engine); + } + } + } + executionEnvironment->memoryManager->reInitLatestContextId(); + } + executionEnvironment->decRefInternal(); +} + HWTEST_F(DeviceTests, givenDebugFlagSetWhenCreatingSecondaryEnginesThenCreateCorrectNumberOfHighPriorityContexts) { DebugManagerStateRestore dbgRestorer; constexpr uint32_t contextGroupSize = 16; diff --git a/shared/test/unit_test/os_interface/linux/os_interface_linux_tests.cpp b/shared/test/unit_test/os_interface/linux/os_interface_linux_tests.cpp index 46347834c2..050acca370 100644 --- a/shared/test/unit_test/os_interface/linux/os_interface_linux_tests.cpp +++ b/shared/test/unit_test/os_interface/linux/os_interface_linux_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -55,6 +55,18 @@ TEST(OsInterfaceTest, GivenLinuxOsInterfaceWhenCallingIsDebugAttachAvailableThen EXPECT_FALSE(osInterface.isDebugAttachAvailable()); } +TEST(OsInterfaceTest, GivenLinuxOsInterfaceWhenCallingGetAggregatedProcessCountThenCallRedirectedToDriverModel) { + OSInterface osInterface; + EXPECT_EQ(0u, osInterface.getAggregatedProcessCount()); + + auto executionEnvironment = std::make_unique(); + + DrmMock *drm = new DrmMock(*executionEnvironment->rootDeviceEnvironments[0]); + osInterface.setDriverModel(std::unique_ptr(drm)); + drm->mockProcessCount = 5; + EXPECT_EQ(5u, osInterface.getAggregatedProcessCount()); +} + TEST(OsInterfaceTest, whenOsInterfaceSetupGmmInputArgsThenArgsAreSet) { MockExecutionEnvironment executionEnvironment{}; auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0]; diff --git a/shared/test/unit_test/os_interface/windows/os_interface_win_tests.cpp b/shared/test/unit_test/os_interface/windows/os_interface_win_tests.cpp index 46573f7cb3..b6f6c6b755 100644 --- a/shared/test/unit_test/os_interface/windows/os_interface_win_tests.cpp +++ b/shared/test/unit_test/os_interface/windows/os_interface_win_tests.cpp @@ -62,6 +62,11 @@ TEST_F(OsInterfaceTest, GivenDefaultOsInterfaceThenLocalMemoryEnabled) { EXPECT_TRUE(OSInterface::osEnableLocalMemory); } +TEST(OsInterfaceSimpleTest, GivenOsInterfaceWhenCallingGetAggregatedProcessCountThenCallReturnsZero) { + OSInterface osInterface; + EXPECT_EQ(0u, osInterface.getAggregatedProcessCount()); +} + TEST_F(OsInterfaceTest, whenOsInterfaceSetupGmmInputArgsThenArgsAreSet) { MockExecutionEnvironment executionEnvironment; auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0];