fix: Fail device init if kernel debugging is misconfigured

Also print error to stderr

Related-to: GSD-10780

Signed-off-by: Brandon Yates <brandon.yates@intel.com>
This commit is contained in:
Brandon Yates 2025-03-07 13:45:49 +00:00 committed by Compute-Runtime-Automation
parent 80168b194f
commit b0c92ea425
7 changed files with 44 additions and 10 deletions

View File

@ -138,7 +138,9 @@ bool Device::createDeviceImpl() {
}
// initialize common resources once
initializeCommonResources();
if (!initializeCommonResources()) {
return false;
}
}
// create engines
@ -172,15 +174,14 @@ bool Device::initDeviceWithEngines() {
return createEngines();
}
void Device::initializeCommonResources() {
bool Device::initializeCommonResources() {
if (getExecutionEnvironment()->isDebuggingEnabled()) {
const auto rootDeviceIndex = getRootDeviceIndex();
auto rootDeviceEnvironment = getExecutionEnvironment()->rootDeviceEnvironments[rootDeviceIndex].get();
rootDeviceEnvironment->initDebuggerL0(this);
if (rootDeviceEnvironment->debugger == nullptr) {
NEO::printDebugString(NEO::debugManager.flags.PrintDebugMessages.get(), stderr,
"Debug mode is not enabled in the system.\n");
getExecutionEnvironment()->setDebuggingMode(DebuggingMode::disabled);
NEO::printDebugString(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Debug mode is not enabled in the system.\n");
return false;
}
}
@ -209,6 +210,7 @@ void Device::initializeCommonResources() {
deviceUsmMemAllocPoolsManager.reset(new UsmMemAllocPoolsManager(getMemoryManager(), rootDeviceIndices, deviceBitfields, this, InternalMemoryType::deviceUnifiedMemory));
}
initUsmReuseMaxSize();
return true;
}
void Device::initUsmReuseMaxSize() {

View File

@ -277,7 +277,7 @@ class Device : public ReferenceTrackedObject<Device>, NEO::NonCopyableAndNonMova
MOCKABLE_VIRTUAL bool createDeviceImpl();
bool initDeviceWithEngines();
void initializeCommonResources();
bool initializeCommonResources();
bool initDeviceFully();
void initUsmReuseMaxSize();
virtual bool createEngines();

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2024 Intel Corporation
* Copyright (C) 2020-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -19,7 +19,12 @@ std::unique_ptr<NEO::Debugger> DebuggerL0::create(NEO::Device *device) {
return nullptr;
}
auto osInterface = device->getRootDeviceEnvironment().osInterface.get();
if (!osInterface || !osInterface->isDebugAttachAvailable()) {
if (!osInterface) {
return nullptr;
}
if (!osInterface->isDebugAttachAvailable()) {
auto cardName = osInterface->getDriverModel()->as<Drm>()->getSysFsPciPathBaseName();
IoFunctions::fprintf(stderr, "Kernel debug mode is not enabled for %s. Device is not available for use\n", cardName.c_str());
return nullptr;
}

View File

@ -179,6 +179,15 @@ int Drm::getEnabledPooledEu(int &enabled) {
return getParamIoctl(DrmParam::paramHasPooledEu, &enabled);
}
std::string Drm::getSysFsPciPathBaseName() {
auto fullPath = getSysFsPciPath();
size_t pos = fullPath.rfind("/");
if (std::string::npos == pos) {
return fullPath;
}
return fullPath.substr(pos + 1, std::string::npos);
}
std::string Drm::getSysFsPciPath() {
std::string path = std::string(Os::sysFsPciPathPrefix) + hwDeviceId->getPciPath() + "/drm";
std::string expectedFilePrefix = path + "/card";

View File

@ -264,6 +264,7 @@ class Drm : public DriverModel {
void cleanup() override;
bool readSysFsAsString(const std::string &relativeFilePath, std::string &readString);
MOCKABLE_VIRTUAL std::string getSysFsPciPath();
MOCKABLE_VIRTUAL std::string getSysFsPciPathBaseName();
std::unique_ptr<HwDeviceIdDrm> &getHwDeviceId() { return hwDeviceId; }
template <typename DataType>

View File

@ -1983,7 +1983,7 @@ TEST_F(DeviceTests, GivenDebuggingEnabledWhenDeviceIsInitializedThenL0DebuggerIs
EXPECT_NE(nullptr, device->getL0Debugger());
}
TEST_F(DeviceTests, givenDebuggerRequestedByUserAndNotAvailableWhenDeviceIsInitializedThenErrorIsPrintedButNotReturned) {
TEST_F(DeviceTests, givenDebuggerRequestedByUserAndNotAvailableWhenDeviceIsInitializedThenDeviceIsNullAndErrorIsPrinted) {
extern bool forceCreateNullptrDebugger;
VariableBackup backupForceCreateNullptrDebugger{&forceCreateNullptrDebugger, true};
@ -1998,7 +1998,7 @@ TEST_F(DeviceTests, givenDebuggerRequestedByUserAndNotAvailableWhenDeviceIsIniti
auto output = testing::internal::GetCapturedStderr();
EXPECT_EQ(std::string("Debug mode is not enabled in the system.\n"), output);
EXPECT_EQ(nullptr, device->getL0Debugger());
EXPECT_EQ(nullptr, device);
}
TEST_F(DeviceTests, givenDebuggerRequestedByUserWhenDeviceWithSubDevicesCreatedThenInitializeDebuggerOncePerRootDevice) {

View File

@ -2267,3 +2267,20 @@ TEST(DrmTest, GivenProductSpecificIoctlHelperAvailableAndDebugFlagToIgnoreIsSetW
EXPECT_EQ(0u, customFuncCalled);
}
TEST(DrmTest, GivenSysFsPciPathWhenCallinggetSysFsPciPathBaseNameThenResultIsCorrect) {
auto executionEnvironment = std::make_unique<MockExecutionEnvironment>();
class DrmMockPciPath : public DrmMock {
public:
DrmMockPciPath(RootDeviceEnvironment &rootDeviceEnvironment) : DrmMock(rootDeviceEnvironment) {}
std::string mockSysFsPciPath = "/sys/devices/pci0000:00/0000:00:02.0/drm/card0";
std::string getSysFsPciPath() override { return mockSysFsPciPath; }
};
DrmMockPciPath drm{*executionEnvironment->rootDeviceEnvironments[0]};
EXPECT_STREQ("card0", drm.getSysFsPciPathBaseName().c_str());
drm.mockSysFsPciPath = "/sys/devices/pci0000:00/0000:00:02.0/drm/card7";
EXPECT_STREQ("card7", drm.getSysFsPciPathBaseName().c_str());
drm.mockSysFsPciPath = "card8";
EXPECT_STREQ("card8", drm.getSysFsPciPathBaseName().c_str());
}