fix: Check if provided CCS number is correct

Check if `ZEX_NUMBER_OF_CCS` env variable provided by the user is
correct. If it isn't then return false and print debug message.

Related-To: NEO-15230, GSD-11251
Signed-off-by: Kindracki, Jakub Tomasz <jakub.tomasz.kindracki@intel.com>
This commit is contained in:
Kindracki, Jakub Tomasz
2025-10-30 14:13:12 +00:00
committed by Compute-Runtime-Automation
parent b4983f234d
commit 94be8023dc
13 changed files with 175 additions and 27 deletions

View File

@@ -354,8 +354,10 @@ void ExecutionEnvironment::adjustCcsCountImpl(RootDeviceEnvironment *rootDeviceE
productHelper.adjustNumberOfCcs(*hwInfo);
}
void ExecutionEnvironment::adjustCcsCount() {
parseCcsCountLimitations();
bool ExecutionEnvironment::adjustCcsCount() {
if (!parseCcsCountLimitations()) {
return false;
}
for (auto rootDeviceIndex = 0u; rootDeviceIndex < rootDeviceEnvironments.size(); rootDeviceIndex++) {
auto &rootDeviceEnvironment = rootDeviceEnvironments[rootDeviceIndex];
@@ -364,32 +366,42 @@ void ExecutionEnvironment::adjustCcsCount() {
adjustCcsCountImpl(rootDeviceEnvironment.get());
}
}
return true;
}
void ExecutionEnvironment::adjustCcsCount(const uint32_t rootDeviceIndex) const {
bool ExecutionEnvironment::adjustCcsCount(const uint32_t rootDeviceIndex) const {
auto &rootDeviceEnvironment = rootDeviceEnvironments[rootDeviceIndex];
UNRECOVERABLE_IF(!rootDeviceEnvironment);
if (rootDeviceNumCcsMap.find(rootDeviceIndex) != rootDeviceNumCcsMap.end()) {
rootDeviceEnvironment->setNumberOfCcs(rootDeviceNumCcsMap.at(rootDeviceIndex));
if (!rootDeviceEnvironment->setNumberOfCcs(rootDeviceNumCcsMap.at(rootDeviceIndex))) {
return false;
}
} else {
adjustCcsCountImpl(rootDeviceEnvironment.get());
}
return true;
}
void ExecutionEnvironment::parseCcsCountLimitations() {
bool ExecutionEnvironment::parseCcsCountLimitations() {
const auto &numberOfCcsString = debugManager.flags.ZEX_NUMBER_OF_CCS.get();
if (numberOfCcsString.compare("default") == 0 ||
numberOfCcsString.empty()) {
return;
return true;
}
for (auto rootDeviceIndex = 0u; rootDeviceIndex < rootDeviceEnvironments.size(); rootDeviceIndex++) {
auto &rootDeviceEnvironment = rootDeviceEnvironments[rootDeviceIndex];
UNRECOVERABLE_IF(!rootDeviceEnvironment);
auto &productHelper = rootDeviceEnvironment->getHelper<ProductHelper>();
productHelper.parseCcsMode(numberOfCcsString, rootDeviceNumCcsMap, rootDeviceIndex, rootDeviceEnvironment.get());
if (!productHelper.parseCcsMode(numberOfCcsString, rootDeviceNumCcsMap, rootDeviceIndex, rootDeviceEnvironment.get())) {
return false;
}
}
return true;
}
void ExecutionEnvironment::configureNeoEnvironment() {

View File

@@ -37,8 +37,8 @@ class ExecutionEnvironment : public ReferenceTrackedObject<ExecutionEnvironment>
virtual void prepareRootDeviceEnvironments(uint32_t numRootDevices);
void prepareRootDeviceEnvironment(const uint32_t rootDeviceIndexForReInit);
void parseAffinityMask();
void adjustCcsCount();
void adjustCcsCount(const uint32_t rootDeviceIndex) const;
bool adjustCcsCount();
bool adjustCcsCount(const uint32_t rootDeviceIndex) const;
void sortNeoDevices();
void setDeviceHierarchyMode(const GfxCoreHelper &gfxCoreHelper);
void setDeviceHierarchyMode(const DeviceHierarchyMode deviceHierarchyMode) {
@@ -47,7 +47,7 @@ class ExecutionEnvironment : public ReferenceTrackedObject<ExecutionEnvironment>
DeviceHierarchyMode getDeviceHierarchyMode() const { return deviceHierarchyMode; }
void adjustRootDeviceEnvironments();
void prepareForCleanup() const;
void configureCcsMode();
MOCKABLE_VIRTUAL void configureCcsMode();
void setDebuggingMode(DebuggingMode debuggingMode) {
debuggingEnabledMode = debuggingMode;
}
@@ -97,7 +97,7 @@ class ExecutionEnvironment : public ReferenceTrackedObject<ExecutionEnvironment>
protected:
static bool comparePciIdBusNumber(std::unique_ptr<RootDeviceEnvironment> &rootDeviceEnvironment1, std::unique_ptr<RootDeviceEnvironment> &rootDeviceEnvironment2);
void parseCcsCountLimitations();
bool parseCcsCountLimitations();
void adjustCcsCountImpl(RootDeviceEnvironment *rootDeviceEnvironment) const;
void configureNeoEnvironment();
void restoreCcsMode();

View File

@@ -246,12 +246,19 @@ BuiltIns *RootDeviceEnvironment::getBuiltIns() {
return this->builtins.get();
}
void RootDeviceEnvironment::setNumberOfCcs(uint32_t numberOfCcs) {
hwInfo->gtSystemInfo.CCSInfo.NumberOfCCSEnabled = std::min(hwInfo->gtSystemInfo.CCSInfo.NumberOfCCSEnabled, numberOfCcs);
bool RootDeviceEnvironment::setNumberOfCcs(uint32_t numberOfCcs) {
if (hwInfo->gtSystemInfo.CCSInfo.NumberOfCCSEnabled < numberOfCcs || numberOfCcs == 0) {
NEO::printDebugString(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Error: Invalid number of CCS: %u. Maximum available number of CCS: %u\n", numberOfCcs, hwInfo->gtSystemInfo.CCSInfo.NumberOfCCSEnabled);
return false;
}
hwInfo->gtSystemInfo.CCSInfo.NumberOfCCSEnabled = numberOfCcs;
limitedNumberOfCcs = true;
if (aubCenter) {
aubCenter->getAubManager()->setCCSMode(hwInfo->gtSystemInfo.CCSInfo.NumberOfCCSEnabled);
}
return true;
}
uint32_t RootDeviceEnvironment::getNumberOfCcs() const {

View File

@@ -81,7 +81,7 @@ struct RootDeviceEnvironment : NonCopyableClass {
BindlessHeapsHelper *getBindlessHeapsHelper() const;
AssertHandler *getAssertHandler(Device *neoDevice);
void createBindlessHeapsHelper(Device *rootDevice, bool availableDevices);
void setNumberOfCcs(uint32_t numberOfCcs);
bool setNumberOfCcs(uint32_t numberOfCcs);
uint32_t getNumberOfCcs() const;
bool isNumberOfCcsLimited() const;
void setRcsExposure();

View File

@@ -164,7 +164,9 @@ bool DeviceFactory::prepareDeviceEnvironmentsForProductFamilyOverride(ExecutionE
executionEnvironment.setDeviceHierarchyMode(executionEnvironment.rootDeviceEnvironments[0]->getHelper<GfxCoreHelper>());
executionEnvironment.parseAffinityMask();
executionEnvironment.adjustCcsCount();
if (!executionEnvironment.adjustCcsCount()) {
return false;
}
executionEnvironment.calculateMaxOsContextCount();
return true;
}
@@ -272,7 +274,9 @@ bool DeviceFactory::prepareDeviceEnvironments(ExecutionEnvironment &executionEnv
executionEnvironment.sortNeoDevices();
executionEnvironment.parseAffinityMask();
executionEnvironment.adjustRootDeviceEnvironments();
executionEnvironment.adjustCcsCount();
if (!executionEnvironment.adjustCcsCount()) {
return false;
}
executionEnvironment.calculateMaxOsContextCount();
return true;
@@ -294,7 +298,9 @@ bool DeviceFactory::prepareDeviceEnvironment(ExecutionEnvironment &executionEnvi
return false;
}
executionEnvironment.adjustCcsCount(rootDeviceIndex);
if (!executionEnvironment.adjustCcsCount(rootDeviceIndex)) {
return false;
}
return true;
}

View File

@@ -227,7 +227,7 @@ class ProductHelper {
virtual void fillPipelineSelectPropertiesSupportStructure(PipelineSelectPropertiesSupport &propertiesSupport, const HardwareInfo &hwInfo) const = 0;
virtual void fillStateBaseAddressPropertiesSupportStructure(StateBaseAddressPropertiesSupport &propertiesSupport) const = 0;
virtual void parseCcsMode(std::string ccsModeString, std::unordered_map<uint32_t, uint32_t> &rootDeviceNumCcsMap, uint32_t rootDeviceIndex, RootDeviceEnvironment *rootDeviceEnvironment) const = 0;
virtual bool parseCcsMode(std::string ccsModeString, std::unordered_map<uint32_t, uint32_t> &rootDeviceNumCcsMap, uint32_t rootDeviceIndex, RootDeviceEnvironment *rootDeviceEnvironment) const = 0;
virtual bool isFusedEuDisabledForDpas(bool kernelHasDpasInstructions, const uint32_t *lws, const uint32_t *groupCount, const HardwareInfo &hwInfo) const = 0;
virtual bool isCalculationForDisablingEuFusionWithDpasNeeded(const HardwareInfo &hwInfo) const = 0;

View File

@@ -715,12 +715,16 @@ void ProductHelperHw<gfxProduct>::fillStateBaseAddressPropertiesSupportStructure
}
template <PRODUCT_FAMILY gfxProduct>
void ProductHelperHw<gfxProduct>::parseCcsMode(std::string ccsModeString, std::unordered_map<uint32_t, uint32_t> &rootDeviceNumCcsMap, uint32_t rootDeviceIndex, RootDeviceEnvironment *rootDeviceEnvironment) const {
bool ProductHelperHw<gfxProduct>::parseCcsMode(std::string ccsModeString, std::unordered_map<uint32_t, uint32_t> &rootDeviceNumCcsMap, uint32_t rootDeviceIndex, RootDeviceEnvironment *rootDeviceEnvironment) const {
auto ccsCount = StringHelpers::toUint32t(ccsModeString);
rootDeviceNumCcsMap.insert({rootDeviceIndex, ccsCount});
rootDeviceEnvironment->setNumberOfCcs(ccsCount);
if (!rootDeviceEnvironment->setNumberOfCcs(ccsCount)) {
return false;
}
return true;
}
template <PRODUCT_FAMILY gfxProduct>

View File

@@ -165,7 +165,7 @@ class ProductHelperHw : public ProductHelper {
void fillFrontEndPropertiesSupportStructure(FrontEndPropertiesSupport &propertiesSupport, const HardwareInfo &hwInfo) const override;
void fillPipelineSelectPropertiesSupportStructure(PipelineSelectPropertiesSupport &propertiesSupport, const HardwareInfo &hwInfo) const override;
void fillStateBaseAddressPropertiesSupportStructure(StateBaseAddressPropertiesSupport &propertiesSupport) const override;
void parseCcsMode(std::string ccsModeString, std::unordered_map<uint32_t, uint32_t> &rootDeviceNumCcsMap, uint32_t rootDeviceIndex, RootDeviceEnvironment *rootDeviceEnvironment) const override;
bool parseCcsMode(std::string ccsModeString, std::unordered_map<uint32_t, uint32_t> &rootDeviceNumCcsMap, uint32_t rootDeviceIndex, RootDeviceEnvironment *rootDeviceEnvironment) const override;
bool isFusedEuDisabledForDpas(bool kernelHasDpasInstructions, const uint32_t *lws, const uint32_t *groupCount, const HardwareInfo &hwInfo) const override;
bool isCalculationForDisablingEuFusionWithDpasNeeded(const HardwareInfo &hwInfo) const override;

View File

@@ -153,21 +153,28 @@ bool ProductHelperHw<gfxProduct>::isBlitCopyRequiredForLocalMemory(const RootDev
}
template <>
void ProductHelperHw<gfxProduct>::parseCcsMode(std::string ccsModeString, std::unordered_map<uint32_t, uint32_t> &rootDeviceNumCcsMap, uint32_t rootDeviceIndex, RootDeviceEnvironment *rootDeviceEnvironment) const {
bool ProductHelperHw<gfxProduct>::parseCcsMode(std::string ccsModeString, std::unordered_map<uint32_t, uint32_t> &rootDeviceNumCcsMap, uint32_t rootDeviceIndex, RootDeviceEnvironment *rootDeviceEnvironment) const {
auto numberOfCcsEntries = StringHelpers::split(ccsModeString, ",");
for (const auto &entry : numberOfCcsEntries) {
auto subEntries = StringHelpers::split(entry, ":");
if (subEntries.size() < 2) {
NEO::printDebugString(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Error: Invalid ZEX_NUMBER_OF_CCS format '%s'\n", ccsModeString.c_str());
return false;
}
uint32_t rootDeviceIndexParsed = StringHelpers::toUint32t(subEntries[0]);
if (rootDeviceIndexParsed == rootDeviceIndex) {
if (subEntries.size() > 1) {
uint32_t maxCcsCount = StringHelpers::toUint32t(subEntries[1]);
rootDeviceNumCcsMap.insert({rootDeviceIndex, maxCcsCount});
rootDeviceEnvironment->setNumberOfCcs(maxCcsCount);
uint32_t maxCcsCount = StringHelpers::toUint32t(subEntries[1]);
rootDeviceNumCcsMap.insert({rootDeviceIndex, maxCcsCount});
if (!rootDeviceEnvironment->setNumberOfCcs(maxCcsCount)) {
return false;
}
}
}
return true;
}
template <>

View File

@@ -91,4 +91,8 @@ void MockExecutionEnvironment::initGmm() {
}
}
void MockExecutionEnvironment::addToRootDeviceNumCcsMap(uint32_t rootDeviceIndex, uint32_t numCcs) {
this->rootDeviceNumCcsMap.insert({rootDeviceIndex, numCcs});
}
} // namespace NEO

View File

@@ -52,6 +52,7 @@ struct MockExecutionEnvironment : ExecutionEnvironment {
MockExecutionEnvironment(const HardwareInfo *hwInfo);
MockExecutionEnvironment(const HardwareInfo *hwInfo, bool useMockAubCenter, uint32_t numRootDevices);
void initGmm();
void addToRootDeviceNumCcsMap(uint32_t rootDeviceIndex, uint32_t numCcs);
};
} // namespace NEO

View File

@@ -780,6 +780,49 @@ TEST(ExecutionEnvironment, givenExecutionEnvironmentWhenSetDevicePermissionError
EXPECT_FALSE(executionEnvironment.isDevicePermissionError());
}
TEST(ExecutionEnvironment, givenExecutionEnvironmentWhenCcsNumberIsInvalidThenAdjustCcsCountReturnsFalse) {
{
DebugManagerStateRestore restorer;
MockExecutionEnvironment executionEnvironment;
debugManager.flags.ZEX_NUMBER_OF_CCS.set("0");
EXPECT_FALSE(executionEnvironment.adjustCcsCount());
}
{
DebugManagerStateRestore restorer;
MockExecutionEnvironment executionEnvironment;
debugManager.flags.ZEX_NUMBER_OF_CCS.set("0:0");
EXPECT_FALSE(executionEnvironment.adjustCcsCount());
}
{
DebugManagerStateRestore restorer;
MockExecutionEnvironment executionEnvironment;
debugManager.flags.ZEX_NUMBER_OF_CCS.set("100");
EXPECT_FALSE(executionEnvironment.adjustCcsCount());
}
{
DebugManagerStateRestore restorer;
MockExecutionEnvironment executionEnvironment;
debugManager.flags.ZEX_NUMBER_OF_CCS.set("0:100");
EXPECT_FALSE(executionEnvironment.adjustCcsCount());
}
{
MockExecutionEnvironment executionEnvironment;
executionEnvironment.addToRootDeviceNumCcsMap(0, 0);
EXPECT_FALSE(executionEnvironment.adjustCcsCount(0));
}
{
MockExecutionEnvironment executionEnvironment;
executionEnvironment.addToRootDeviceNumCcsMap(0, 100);
EXPECT_FALSE(executionEnvironment.adjustCcsCount(0));
}
}
void ExecutionEnvironmentSortTests::SetUp() {
executionEnvironment.prepareRootDeviceEnvironments(numRootDevices);
for (uint32_t rootDeviceIndex = 0; rootDeviceIndex < numRootDevices; rootDeviceIndex++) {

View File

@@ -8,12 +8,14 @@
#include "shared/source/helpers/constants.h"
#include "shared/source/helpers/product_config_helper.h"
#include "shared/source/os_interface/device_factory.h"
#include "shared/source/os_interface/os_interface.h"
#include "shared/source/release_helper/release_helper.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/helpers/default_hw_info.h"
#include "shared/test/common/helpers/device_caps_reader_test_helper.h"
#include "shared/test/common/helpers/gtest_helpers.h"
#include "shared/test/common/helpers/stream_capture.h"
#include "shared/test/common/mocks/mock_driver_model.h"
#include "shared/test/common/mocks/mock_execution_environment.h"
#include "shared/test/common/mocks/mock_product_helper.h"
#include "shared/test/common/test_macros/hw_test.h"
@@ -54,6 +56,30 @@ TEST_F(DeviceFactoryTests, givenHwIpVersionOverrideWhenPrepareDeviceEnvironments
EXPECT_NE(0u, executionEnvironment.rootDeviceEnvironments[0]->getHardwareInfo()->platform.usDeviceID);
}
TEST_F(DeviceFactoryTests, givenHwIpVersionOverrideWhenPrepareDeviceEnvironmentsForProductFamilyOverrideIsCalledWithNumberOfCcsSetToZeroThenFalseIsReturned) {
ExecutionEnvironment executionEnvironment{};
auto config = defaultHwInfo.get()->ipVersion.value;
debugManager.flags.OverrideHwIpVersion.set(config);
debugManager.flags.ZEX_NUMBER_OF_CCS.set("0");
bool success = DeviceFactory::prepareDeviceEnvironmentsForProductFamilyOverride(executionEnvironment);
EXPECT_FALSE(success);
EXPECT_EQ(config, executionEnvironment.rootDeviceEnvironments[0]->getHardwareInfo()->ipVersion.value);
EXPECT_NE(0u, executionEnvironment.rootDeviceEnvironments[0]->getHardwareInfo()->platform.usDeviceID);
}
TEST_F(DeviceFactoryTests, givenHwIpVersionOverrideWhenPrepareDeviceEnvironmentsForProductFamilyOverrideIsCalledWithNumberOfCcsSetToZeroColonZeroThenFalseIsReturned) {
ExecutionEnvironment executionEnvironment{};
auto config = defaultHwInfo.get()->ipVersion.value;
debugManager.flags.OverrideHwIpVersion.set(config);
debugManager.flags.ZEX_NUMBER_OF_CCS.set("0:0");
bool success = DeviceFactory::prepareDeviceEnvironmentsForProductFamilyOverride(executionEnvironment);
EXPECT_FALSE(success);
EXPECT_EQ(config, executionEnvironment.rootDeviceEnvironments[0]->getHardwareInfo()->ipVersion.value);
EXPECT_NE(0u, executionEnvironment.rootDeviceEnvironments[0]->getHardwareInfo()->platform.usDeviceID);
}
TEST_F(DeviceFactoryTests, givenHwIpVersionOverrideWhenPrepareDeviceEnvironmentsForProductFamilyOverrideIsCalledThenReleaseHelperContainsCorrectIpVersion) {
ExecutionEnvironment executionEnvironment{};
auto config = defaultHwInfo.get()->ipVersion.value;
@@ -156,6 +182,44 @@ TEST_F(DeviceFactoryTests, givenMultipleDevicesWhenInitializeResourcesSucceedsFo
EXPECT_EQ(2u, rootDeviceEnvironment1->initOsInterfaceCalled);
}
class MockExecutionEnvironmentConfigureCssMode : public MockExecutionEnvironment {
public:
using MockExecutionEnvironment::MockExecutionEnvironment;
using MockExecutionEnvironment::rootDeviceEnvironments;
void configureCcsMode() override {
return;
}
};
TEST_F(DeviceFactoryTests, givenDeviceWhenInitializeResourcesSucceedsButCcsNumberIsZeroThenFalseIsReturned) {
debugManager.flags.CreateMultipleRootDevices.set(1);
debugManager.flags.ZEX_NUMBER_OF_CCS.set("0");
MockExecutionEnvironmentConfigureCssMode executionEnvironment(defaultHwInfo.get(), true, 1u);
EXPECT_EQ(1u, executionEnvironment.rootDeviceEnvironments.size());
auto rootDeviceEnvironment = static_cast<MockRootDeviceEnvironment *>(executionEnvironment.rootDeviceEnvironments[0].get());
rootDeviceEnvironment->initOsInterfaceResults.push_back(true);
bool success = DeviceFactory::prepareDeviceEnvironments(executionEnvironment);
EXPECT_FALSE(success);
}
TEST_F(DeviceFactoryTests, givenDeviceWhenInitializeResourcesSucceedsButCcsNumberIsZeroColonZeroThenFalseIsReturned) {
debugManager.flags.CreateMultipleRootDevices.set(1);
debugManager.flags.ZEX_NUMBER_OF_CCS.set("0:0");
MockExecutionEnvironmentConfigureCssMode executionEnvironment(defaultHwInfo.get(), true, 1u);
EXPECT_EQ(1u, executionEnvironment.rootDeviceEnvironments.size());
auto rootDeviceEnvironment = static_cast<MockRootDeviceEnvironment *>(executionEnvironment.rootDeviceEnvironments[0].get());
rootDeviceEnvironment->initOsInterfaceResults.push_back(true);
bool success = DeviceFactory::prepareDeviceEnvironments(executionEnvironment);
EXPECT_FALSE(success);
}
TEST_F(DeviceFactoryTests, givenMultipleDevicesWhenInitializeResourcesFailsForAllDevicesThenFailureIsReturned) {
DebugManagerStateRestore restorer;
debugManager.flags.CreateMultipleRootDevices.set(3);
@@ -386,4 +450,4 @@ HWTEST_F(DeviceFactoryOverrideTest, GivenAubModeWhenValidateDeviceFlagsThenIsPro
EXPECT_FALSE(hasSubstr(capturedStderr, expectedMissingProductFamilyStderrSubstr));
EXPECT_FALSE(hasSubstr(capturedStderr, expectedMissingHardwareInfoStderrSubstr));
}
}
}