fix: Fail device init if kernel debugging is misconfigured

Also print error to stderr

Related-to: GSD-10780

Signed-off-by: Brandon Yates <brandon.yates@intel.com>
This commit is contained in:
Brandon Yates 2025-03-07 13:45:49 +00:00 committed by Compute-Runtime-Automation
parent 504440fc4d
commit 4651e72b0b
7 changed files with 44 additions and 10 deletions

View File

@ -141,7 +141,9 @@ bool Device::createDeviceImpl() {
}
// initialize common resources once
initializeCommonResources();
if (!initializeCommonResources()) {
return false;
}
}
// create engines
@ -175,15 +177,14 @@ bool Device::initDeviceWithEngines() {
return createEngines();
}
void Device::initializeCommonResources() {
bool Device::initializeCommonResources() {
if (getExecutionEnvironment()->isDebuggingEnabled()) {
const auto rootDeviceIndex = getRootDeviceIndex();
auto rootDeviceEnvironment = getExecutionEnvironment()->rootDeviceEnvironments[rootDeviceIndex].get();
rootDeviceEnvironment->initDebuggerL0(this);
if (rootDeviceEnvironment->debugger == nullptr) {
NEO::printDebugString(NEO::debugManager.flags.PrintDebugMessages.get(), stderr,
"Debug mode is not enabled in the system.\n");
getExecutionEnvironment()->setDebuggingMode(DebuggingMode::disabled);
NEO::printDebugString(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Debug mode is not enabled in the system.\n");
return false;
}
}
@ -211,6 +212,7 @@ void Device::initializeCommonResources() {
deviceBitfields.emplace(getRootDeviceIndex(), getDeviceBitfield());
deviceUsmMemAllocPoolsManager.reset(new UsmMemAllocPoolsManager(getMemoryManager(), rootDeviceIndices, deviceBitfields, this, InternalMemoryType::deviceUnifiedMemory));
}
return true;
}
void Device::initUsmReuseLimits() {

View File

@ -270,7 +270,7 @@ class Device : public ReferenceTrackedObject<Device>, NEO::NonCopyableAndNonMova
MOCKABLE_VIRTUAL bool createDeviceImpl();
bool initDeviceWithEngines();
void initializeCommonResources();
bool initializeCommonResources();
bool initDeviceFully();
void initUsmReuseLimits();
virtual bool createEngines();

View File

@ -19,7 +19,12 @@ std::unique_ptr<NEO::Debugger> DebuggerL0::create(NEO::Device *device) {
return nullptr;
}
auto osInterface = device->getRootDeviceEnvironment().osInterface.get();
if (!osInterface || !osInterface->isDebugAttachAvailable()) {
if (!osInterface) {
return nullptr;
}
if (!osInterface->isDebugAttachAvailable()) {
auto cardName = osInterface->getDriverModel()->as<Drm>()->getSysFsPciPathBaseName();
IoFunctions::fprintf(stderr, "Kernel debug mode is not enabled for %s. Device is not available for use\n", cardName.c_str());
return nullptr;
}

View File

@ -179,6 +179,15 @@ int Drm::getEnabledPooledEu(int &enabled) {
return getParamIoctl(DrmParam::paramHasPooledEu, &enabled);
}
std::string Drm::getSysFsPciPathBaseName() {
auto fullPath = getSysFsPciPath();
size_t pos = fullPath.rfind("/");
if (std::string::npos == pos) {
return fullPath;
}
return fullPath.substr(pos + 1, std::string::npos);
}
std::string Drm::getSysFsPciPath() {
std::string path = std::string(Os::sysFsPciPathPrefix) + hwDeviceId->getPciPath() + "/drm";
std::string expectedFilePrefix = path + "/card";

View File

@ -264,6 +264,7 @@ class Drm : public DriverModel {
void cleanup() override;
bool readSysFsAsString(const std::string &relativeFilePath, std::string &readString);
MOCKABLE_VIRTUAL std::string getSysFsPciPath();
MOCKABLE_VIRTUAL std::string getSysFsPciPathBaseName();
std::unique_ptr<HwDeviceIdDrm> &getHwDeviceId() { return hwDeviceId; }
template <typename DataType>

View File

@ -2096,7 +2096,7 @@ TEST_F(DeviceTests, GivenDebuggingEnabledWhenDeviceIsInitializedThenL0DebuggerIs
EXPECT_NE(nullptr, device->getL0Debugger());
}
TEST_F(DeviceTests, givenDebuggerRequestedByUserAndNotAvailableWhenDeviceIsInitializedThenErrorIsPrintedButNotReturned) {
TEST_F(DeviceTests, givenDebuggerRequestedByUserAndNotAvailableWhenDeviceIsInitializedThenDeviceIsNullAndErrorIsPrinted) {
extern bool forceCreateNullptrDebugger;
VariableBackup backupForceCreateNullptrDebugger{&forceCreateNullptrDebugger, true};
@ -2111,7 +2111,7 @@ TEST_F(DeviceTests, givenDebuggerRequestedByUserAndNotAvailableWhenDeviceIsIniti
auto output = testing::internal::GetCapturedStderr();
EXPECT_EQ(std::string("Debug mode is not enabled in the system.\n"), output);
EXPECT_EQ(nullptr, device->getL0Debugger());
EXPECT_EQ(nullptr, device);
}
TEST_F(DeviceTests, givenDebuggerRequestedByUserWhenDeviceWithSubDevicesCreatedThenInitializeDebuggerOncePerRootDevice) {

View File

@ -2269,6 +2269,23 @@ TEST(DrmTest, GivenProductSpecificIoctlHelperAvailableAndDebugFlagToIgnoreIsSetW
EXPECT_EQ(0u, customFuncCalled);
}
TEST(DrmTest, GivenSysFsPciPathWhenCallinggetSysFsPciPathBaseNameThenResultIsCorrect) {
auto executionEnvironment = std::make_unique<MockExecutionEnvironment>();
class DrmMockPciPath : public DrmMock {
public:
DrmMockPciPath(RootDeviceEnvironment &rootDeviceEnvironment) : DrmMock(rootDeviceEnvironment) {}
std::string mockSysFsPciPath = "/sys/devices/pci0000:00/0000:00:02.0/drm/card0";
std::string getSysFsPciPath() override { return mockSysFsPciPath; }
};
DrmMockPciPath drm{*executionEnvironment->rootDeviceEnvironments[0]};
EXPECT_STREQ("card0", drm.getSysFsPciPathBaseName().c_str());
drm.mockSysFsPciPath = "/sys/devices/pci0000:00/0000:00:02.0/drm/card7";
EXPECT_STREQ("card7", drm.getSysFsPciPathBaseName().c_str());
drm.mockSysFsPciPath = "card8";
EXPECT_STREQ("card8", drm.getSysFsPciPathBaseName().c_str());
}
using DrmHwTest = ::testing::Test;
HWTEST_F(DrmHwTest, GivenDrmWhenSetupHardwareInfoCalledThenGfxCoreHelperIsInitializedFromProductHelper) {
DebugManagerStateRestore restore;
@ -2291,4 +2308,4 @@ HWTEST_F(DrmHwTest, GivenDrmWhenSetupHardwareInfoCalledThenGfxCoreHelperIsInitia
drm.setupHardwareInfo(&device, false);
EXPECT_TRUE(raii.mockGfxCoreHelper->initFromProductHelperCalled);
}
}