fix: Skip adding device to list if context creation fails

Propogate error codes from ioctl failure properly up the layers
so that we skip exposing bad root devices.

Related-To: NEO-7709

Signed-off-by: Aravind Gopalakrishnan <aravind.gopalakrishnan@intel.com>
This commit is contained in:
Aravind Gopalakrishnan 2023-02-11 22:03:06 +00:00 committed by Compute-Runtime-Automation
parent 95bea7c92b
commit d75c4d3ec7
24 changed files with 132 additions and 33 deletions

View File

@ -215,7 +215,9 @@ CommandQueue *CommandQueue::create(uint32_t productFamily, Device *device, NEO::
osContext.setUmdPowerHintValue(driverHandleImp->powerHint);
osContext.reInitializeContext();
}
csr->initializeResources();
csr->initDirectSubmission();
return commandQueue;
}

View File

@ -610,7 +610,7 @@ TEST_F(DrmTests, GivenFailOnContextCreateWhenCreatingDrmThenDrmIsCreated) {
auto drm = DrmWrap::createDrm(*rootDeviceEnvironment);
EXPECT_NE(drm, nullptr);
failOnContextCreate = -1;
EXPECT_THROW(drm->createDrmContext(1, false, false), std::exception);
EXPECT_EQ(-1, drm->createDrmContext(1, false, false));
EXPECT_FALSE(drm->isPreemptionSupported());
failOnContextCreate = 0;
}

View File

@ -266,15 +266,19 @@ void CommandStreamReceiver::fillReusableAllocationsList() {
}
}
void CommandStreamReceiver::initializeResources() {
bool CommandStreamReceiver::initializeResources() {
if (!resourcesInitialized) {
auto lock = obtainUniqueOwnership();
if (!resourcesInitialized) {
osContext->ensureContextInitialized();
if (!osContext->ensureContextInitialized()) {
return false;
}
this->fillReusableAllocationsList();
this->resourcesInitialized = true;
}
}
return true;
}
MemoryManager *CommandStreamReceiver::getMemoryManager() const {

View File

@ -223,7 +223,7 @@ class CommandStreamReceiver {
virtual void fillReusableAllocationsList();
virtual void setupContext(OsContext &osContext) { this->osContext = &osContext; }
OsContext &getOsContext() const { return *osContext; }
void initializeResources();
bool initializeResources();
TagAllocatorBase *getEventTsAllocator();
TagAllocatorBase *getEventPerfCountAllocator(const uint32_t tagSize);
virtual TagAllocatorBase *getTimestampPacketAllocator() = 0;

View File

@ -370,7 +370,9 @@ bool Device::createEngine(uint32_t deviceCsrIndex, EngineTypeUsage engineTypeUsa
auto osContext = executionEnvironment->memoryManager->createAndRegisterOsContext(commandStreamReceiver.get(), engineDescriptor);
commandStreamReceiver->setupContext(*osContext);
if (osContext->isImmediateContextInitializationEnabled(isDefaultEngine)) {
commandStreamReceiver->initializeResources();
if (!commandStreamReceiver->initializeResources()) {
return false;
}
}
if (!commandStreamReceiver->initializeTagAllocation()) {

View File

@ -53,8 +53,6 @@ bool RootDevice::createEngines() {
}
void RootDevice::initializeRootCommandStreamReceiver() {
rootCsrCreated = true;
std::unique_ptr<CommandStreamReceiver> rootCommandStreamReceiver(createCommandStream(*executionEnvironment, rootDeviceIndex, getDeviceBitfield()));
auto &hwInfo = getHardwareInfo();
@ -67,6 +65,7 @@ void RootDevice::initializeRootCommandStreamReceiver() {
rootCommandStreamReceiver->setupContext(*osContext);
rootCommandStreamReceiver->initializeResources();
rootCsrCreated = true;
rootCommandStreamReceiver->initializeTagAllocation();
rootCommandStreamReceiver->createGlobalFenceAllocation();
rootCommandStreamReceiver->createWorkPartitionAllocation(*this);

View File

@ -339,7 +339,7 @@ void Drm::setUnrecoverableContext(uint32_t drmContextId) {
ioctlHelper->ioctl(DrmIoctl::GemContextSetparam, &contextParam);
}
uint32_t Drm::createDrmContext(uint32_t drmVmId, bool isDirectSubmissionRequested, bool isCooperativeContextRequested) {
int Drm::createDrmContext(uint32_t drmVmId, bool isDirectSubmissionRequested, bool isCooperativeContextRequested) {
GemContextCreateExt gcc{};
if (DebugManager.flags.DirectSubmissionDrmContext.get() != -1) {
@ -371,7 +371,11 @@ uint32_t Drm::createDrmContext(uint32_t drmVmId, bool isDirectSubmissionRequeste
}
auto ioctlResult = ioctlHelper->ioctl(DrmIoctl::GemContextCreateExt, &gcc);
UNRECOVERABLE_IF(ioctlResult != 0);
if (ioctlResult < 0) {
PRINT_DEBUG_STRING(DebugManager.flags.PrintDebugMessages.get(), stderr, "%s", "WARNING: GemContextCreateExt ioctl failed. Not exposing this root device\n");
return ioctlResult;
}
return gcc.contextId;
}

View File

@ -107,7 +107,7 @@ class Drm : public DriverModel {
int queryAdapterBDF();
int createDrmVirtualMemory(uint32_t &drmVmId);
void destroyDrmVirtualMemory(uint32_t drmVmId);
MOCKABLE_VIRTUAL uint32_t createDrmContext(uint32_t drmVmId, bool isDirectSubmissionRequested, bool isCooperativeContextRequested);
MOCKABLE_VIRTUAL int createDrmContext(uint32_t drmVmId, bool isDirectSubmissionRequested, bool isCooperativeContextRequested);
void destroyDrmContext(uint32_t drmContextId);
int queryVmId(uint32_t drmContextId, uint32_t &vmId);
void setLowPriorityContextParam(uint32_t drmContextId);

View File

@ -82,12 +82,16 @@ void IoctlHelper::logExecBuffer(const ExecBuffer &execBuffer, std::stringstream
<< " }\n";
}
uint32_t IoctlHelper::createDrmContext(Drm &drm, OsContextLinux &osContext, uint32_t drmVmId, uint32_t deviceIndex) {
int IoctlHelper::createDrmContext(Drm &drm, OsContextLinux &osContext, uint32_t drmVmId, uint32_t deviceIndex) {
const auto numberOfCCS = drm.getRootDeviceEnvironment().getHardwareInfo()->gtSystemInfo.CCSInfo.NumberOfCCSEnabled;
const bool debuggableContext = drm.isContextDebugSupported() && drm.getRootDeviceEnvironment().executionEnvironment.isDebuggingEnabled() && !osContext.isInternalEngine();
const bool debuggableContextCooperative = debuggableContext && numberOfCCS > 0;
auto drmContextId = drm.createDrmContext(drmVmId, drm.isVmBindAvailable(), osContext.isCooperativeEngine() || debuggableContextCooperative);
if (drmContextId < 0) {
return drmContextId;
}
if (drm.areNonPersistentContextsSupported()) {
drm.setNonPersistentContext(drmContextId);
}

View File

@ -119,7 +119,7 @@ class IoctlHelper {
virtual bool checkIfIoctlReinvokeRequired(int error, DrmIoctl ioctlRequest) const;
virtual std::vector<MemoryRegion> translateToMemoryRegions(const std::vector<uint8_t> &regionInfo);
virtual uint32_t createDrmContext(Drm &drm, OsContextLinux &osContext, uint32_t drmVmId, uint32_t deviceIndex);
virtual int createDrmContext(Drm &drm, OsContextLinux &osContext, uint32_t drmVmId, uint32_t deviceIndex);
std::vector<EngineCapabilities> translateToEngineCaps(const std::vector<uint8_t> &data);
void fillExecObject(ExecObject &execObject, uint32_t handle, uint64_t gpuAddress, uint32_t drmContextId, bool bindInfo, bool isMarkedForCapture);

View File

@ -31,7 +31,7 @@ OsContextLinux::OsContextLinux(Drm &drm, uint32_t rootDeviceIndex, uint32_t cont
: OsContext(rootDeviceIndex, contextId, engineDescriptor),
drm(drm) {}
void OsContextLinux::initializeContext() {
bool OsContextLinux::initializeContext() {
auto hwInfo = drm.getRootDeviceEnvironment().getHardwareInfo();
auto defaultEngineType = getChosenEngineType(*hwInfo);
@ -50,6 +50,9 @@ void OsContextLinux::initializeContext() {
if (deviceBitfield.test(deviceIndex)) {
auto drmVmId = drm.getVirtualMemoryAddressSpace(deviceIndex);
auto drmContextId = drm.getIoctlHelper()->createDrmContext(drm, *this, drmVmId, deviceIndex);
if (drmContextId < 0) {
return false;
}
this->drmContextIds.push_back(drmContextId);
@ -63,6 +66,7 @@ void OsContextLinux::initializeContext() {
}
}
}
return true;
}
bool OsContextLinux::isDirectSubmissionSupported(const HardwareInfo &hwInfo) const {

View File

@ -51,7 +51,7 @@ class OsContextLinux : public OsContext {
}
protected:
void initializeContext() override;
bool initializeContext() override;
std::atomic<uint32_t> tlbFlushCounter{0};
std::atomic<uint32_t> lastFlushedTlbFlushCounter{0};

View File

@ -1081,7 +1081,7 @@ void IoctlHelperXe::xeShowBindTable() {
#endif
}
uint32_t IoctlHelperXe::createDrmContext(Drm &drm, OsContextLinux &osContext, uint32_t drmVmId, uint32_t deviceIndex) {
int IoctlHelperXe::createDrmContext(Drm &drm, OsContextLinux &osContext, uint32_t drmVmId, uint32_t deviceIndex) {
struct drm_xe_engine_create create = {};
uint32_t drmContextId = 0;
struct drm_xe_engine_class_instance *currentEngine = nullptr;

View File

@ -85,7 +85,7 @@ class IoctlHelperXe : public IoctlHelper {
int getDrmParamValue(DrmParam drmParam) const override;
int getDrmParamValueBase(DrmParam drmParam) const override;
std::string getIoctlString(DrmIoctl ioctlRequest) const override;
uint32_t createDrmContext(Drm &drm, OsContextLinux &osContext, uint32_t drmVmId, uint32_t deviceIndex) override;
int createDrmContext(Drm &drm, OsContextLinux &osContext, uint32_t drmVmId, uint32_t deviceIndex) override;
std::string getDrmParamString(DrmParam param) const override;
std::string getFileForMaxGpuFrequency() const override;

View File

@ -45,7 +45,7 @@ bool OsContext::isImmediateContextInitializationEnabled(bool isDefaultEngine) co
return false;
}
void OsContext::ensureContextInitialized() {
bool OsContext::ensureContextInitialized() {
std::call_once(contextInitializedFlag, [this] {
if (DebugManager.flags.PrintOsContextInitializations.get()) {
printf("OsContext initialization: contextId=%d usage=%s type=%s isRootDevice=%d\n",
@ -55,9 +55,13 @@ void OsContext::ensureContextInitialized() {
static_cast<int>(rootDevice));
}
initializeContext();
contextInitialized = true;
if (!initializeContext()) {
contextInitialized = false;
} else {
contextInitialized = true;
}
});
return contextInitialized;
}
bool OsContext::isDirectSubmissionAvailable(const HardwareInfo &hwInfo, bool &submitOnInit) {

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2022 Intel Corporation
* Copyright (C) 2018-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -24,7 +24,7 @@ class OsContext : public ReferenceTrackedObject<OsContext> {
bool isImmediateContextInitializationEnabled(bool isDefaultEngine) const;
bool isInitialized() const { return contextInitialized; }
void ensureContextInitialized();
bool ensureContextInitialized();
uint32_t getContextId() const { return contextId; }
uint32_t getNumSupportedDevices() const { return numSupportedDevices; }
@ -57,7 +57,7 @@ class OsContext : public ReferenceTrackedObject<OsContext> {
uint32_t getRootDeviceIndex() { return rootDeviceIndex; }
protected:
virtual void initializeContext() {}
virtual bool initializeContext() { return true; }
const uint32_t rootDeviceIndex;
const uint32_t contextId;

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2022 Intel Corporation
* Copyright (C) 2020-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -28,7 +28,7 @@ OsContextWin::OsContextWin(Wddm &wddm, uint32_t rootDeviceIndex, uint32_t contex
wddm(wddm) {
}
void OsContextWin::initializeContext() {
bool OsContextWin::initializeContext() {
if (wddm.getRootDeviceEnvironment().executionEnvironment.isDebuggingEnabled()) {
debuggableContext = wddm.getRootDeviceEnvironment().osInterface->isDebugAttachAvailable() && !isInternalEngine();
@ -43,6 +43,8 @@ void OsContextWin::initializeContext() {
residencyController.registerCallback();
UNRECOVERABLE_IF(!residencyController.isInitialized());
return true;
};
void OsContextWin::reInitializeContext() {

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2022 Intel Corporation
* Copyright (C) 2018-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -39,7 +39,7 @@ class OsContextWin : public OsContext {
uint32_t getDeviceNodeMask();
protected:
void initializeContext() override;
bool initializeContext() override;
WddmResidencyController residencyController;
HardwareQueue hardwareQueue;

View File

@ -112,7 +112,7 @@ class DrmMock : public Drm {
queryPageFaultSupportCalled = true;
}
uint32_t createDrmContext(uint32_t drmVmId, bool isDirectSubmissionRequested, bool isCooperativeContextRequested) override {
int createDrmContext(uint32_t drmVmId, bool isDirectSubmissionRequested, bool isCooperativeContextRequested) override {
capturedCooperativeContextRequest = isCooperativeContextRequested;
if (callBaseCreateDrmContext) {
return Drm::createDrmContext(drmVmId, isDirectSubmissionRequested, isCooperativeContextRequested);

View File

@ -18,6 +18,7 @@
#include "shared/source/memory_manager/unified_memory_manager.h"
#include "shared/source/os_interface/device_factory.h"
#include "shared/source/os_interface/hw_info_config.h"
#include "shared/source/os_interface/os_context.h"
#include "shared/source/os_interface/os_interface.h"
#include "shared/source/utilities/tag_allocator.h"
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
@ -111,6 +112,28 @@ HWTEST_F(CommandStreamReceiverTest, WhenInitializeResourcesThenCallFillReusableA
EXPECT_EQ(1u, pDevice->getUltCommandStreamReceiver<FamilyType>().fillReusableAllocationsListCalled);
}
HWTEST_F(CommandStreamReceiverTest, whenContextCreateReturnsFalseThenExpectCSRInitializeResourcesFail) {
struct MyOsContext : OsContext {
MyOsContext(uint32_t contextId,
const EngineDescriptor &engineDescriptor) : OsContext(0, contextId, engineDescriptor) {}
bool initializeContext() override {
initializeContextCalled++;
return false;
}
size_t initializeContextCalled = 0u;
};
const EngineTypeUsage engineTypeUsageRegular{aub_stream::ENGINE_RCS, EngineUsage::Regular};
MyOsContext osContext{0, EngineDescriptorHelper::getDefaultDescriptor(engineTypeUsageRegular)};
auto &ultCsr = pDevice->getUltCommandStreamReceiver<FamilyType>();
ultCsr.resourcesInitialized = false;
ultCsr.setupContext(osContext);
bool ret = ultCsr.initializeResources();
EXPECT_FALSE(ret);
}
HWTEST_F(CommandStreamReceiverTest, givenCsrWhenCallFillReusableAllocationsListThenAllocateCommandBufferAndMakeItResident) {
DebugManagerStateRestore stateRestore;
DebugManager.flags.SetAmountOfReusableAllocations.set(1);

View File

@ -108,7 +108,7 @@ TEST(DrmQueryTest, givenCreateContextWithAccessCountersWhenDrmContextIsCreatedTh
DrmQueryMock drm{*executionEnvironment->rootDeviceEnvironments[0]};
auto ret = drm.createDrmContext(0, false, false);
EXPECT_EQ(0u, ret);
EXPECT_EQ(0, ret);
EXPECT_TRUE(drm.receivedContextCreateFlags & I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS);
@ -130,6 +130,15 @@ TEST(DrmQueryTest, givenCreateContextWithAccessCountersWhenDrmContextIsCreatedTh
EXPECT_EQ(static_cast<uint8_t>(DrmPrelimHelper::getContextAcgValues()[1]), paramAcc->granularity);
}
TEST(DrmQueryTest, GivenDrmWhenAskedForContextThatFailsThenFalseIsReturned) {
auto executionEnvironment = std::make_unique<MockExecutionEnvironment>();
DrmMock *pDrm = new DrmMock(*executionEnvironment->rootDeviceEnvironments[0]);
pDrm->storedRetVal = -1;
EXPECT_EQ(-1, pDrm->createDrmContext(1, false, false));
pDrm->storedRetVal = 0;
delete pDrm;
}
TEST(DrmQueryTest, givenCreateContextWithAccessCounterWhenDrmContextIsCreatedThenProgramAccessCountersWithSpecifiedTriggeringThreshold) {
DebugManagerStateRestore restorer;
DebugManager.flags.CreateContextWithAccessCounters.set(0);
@ -141,7 +150,7 @@ TEST(DrmQueryTest, givenCreateContextWithAccessCounterWhenDrmContextIsCreatedThe
DebugManager.flags.AccessCountersTrigger.set(threshold);
auto ret = drm.createDrmContext(0, false, false);
EXPECT_EQ(0u, ret);
EXPECT_EQ(0, ret);
EXPECT_TRUE(drm.receivedContextCreateFlags & I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS);
@ -175,7 +184,7 @@ TEST(DrmQueryTest, givenCreateContextWithAccessCounterWhenDrmContextIsCreatedThe
DebugManager.flags.AccessCountersGranularity.set(granularity);
auto ret = drm.createDrmContext(0, false, false);
EXPECT_EQ(0u, ret);
EXPECT_EQ(0, ret);
EXPECT_TRUE(drm.receivedContextCreateFlags & I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS);

View File

@ -348,11 +348,19 @@ TEST(DrmTest, GivenDrmWhenAskedForContextThatFailsThenFalseIsReturned) {
auto executionEnvironment = std::make_unique<MockExecutionEnvironment>();
DrmMock *pDrm = new DrmMock(*executionEnvironment->rootDeviceEnvironments[0]);
pDrm->storedRetVal = -1;
EXPECT_THROW(pDrm->createDrmContext(1, false, false), std::exception);
EXPECT_EQ(-1, pDrm->createDrmContext(1, false, false));
pDrm->storedRetVal = 0;
delete pDrm;
}
TEST(DrmTest, GivenDrmWhenAskedForContextThatIsSuccessThenTrueIsReturned) {
auto executionEnvironment = std::make_unique<MockExecutionEnvironment>();
DrmMock *pDrm = new DrmMock(*executionEnvironment->rootDeviceEnvironments[0]);
pDrm->storedRetVal = 0;
EXPECT_EQ(0, pDrm->createDrmContext(1, false, false));
delete pDrm;
}
TEST(DrmTest, givenDrmWhenOsContextIsCreatedThenCreateAndDestroyNewDrmOsContext) {
auto executionEnvironment = std::make_unique<MockExecutionEnvironment>();
DrmMock drmMock(*executionEnvironment->rootDeviceEnvironments[0]);

View File

@ -8,6 +8,7 @@
#include "shared/source/os_interface/linux/drm_memory_operations_handler.h"
#include "shared/source/os_interface/linux/os_context_linux.h"
#include "shared/test/common/helpers/engine_descriptor_helper.h"
#include "shared/test/common/libult/linux/drm_mock.h"
#include "shared/test/common/mocks/mock_execution_environment.h"
#include "shared/test/common/os_interface/linux/device_command_stream_fixture.h"
@ -25,3 +26,14 @@ TEST(OSContextLinux, givenReinitializeContextWhenContextIsInitThenContextIsStill
EXPECT_NO_THROW(osContext.reInitializeContext());
EXPECT_NO_THROW(osContext.ensureContextInitialized());
}
TEST(OSContextLinux, givenInitializeContextWhenContextCreateIoctlFailsThenContextNotInitialized) {
auto executionEnvironment = std::make_unique<MockExecutionEnvironment>();
DrmMock *pDrm = new DrmMock(*executionEnvironment->rootDeviceEnvironments[0]);
pDrm->storedRetVal = -1;
EXPECT_EQ(-1, pDrm->createDrmContext(1, false, false));
OsContextLinux osContext(*pDrm, 0, 0u, EngineDescriptorHelper::getDefaultDescriptor());
EXPECT_EQ(false, osContext.ensureContextInitialized());
delete pDrm;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2019-2022 Intel Corporation
* Copyright (C) 2019-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -166,13 +166,35 @@ TEST_F(DeferredOsContextCreationTests, givenBlitterEngineWhenCreatingOsContextTh
expectImmediateContextCreation(engineTypeUsageBlitter, false);
}
TEST_F(DeferredOsContextCreationTests, givenEnsureContextInitializeCalledAndReturnsErrorThenOsContextIsNotInitialized) {
struct MyOsContext : OsContext {
MyOsContext(uint32_t contextId,
const EngineDescriptor &engineDescriptor) : OsContext(0, contextId, engineDescriptor) {}
bool initializeContext() override {
initializeContextCalled++;
return false;
}
size_t initializeContextCalled = 0u;
};
MyOsContext osContext{0, EngineDescriptorHelper::getDefaultDescriptor(engineTypeUsageRegular)};
EXPECT_FALSE(osContext.isInitialized());
osContext.ensureContextInitialized();
EXPECT_FALSE(osContext.isInitialized());
EXPECT_EQ(1u, osContext.initializeContextCalled);
}
TEST_F(DeferredOsContextCreationTests, givenEnsureContextInitializeCalledMultipleTimesWhenOsContextIsCreatedThenInitializeOnlyOnce) {
struct MyOsContext : OsContext {
MyOsContext(uint32_t contextId,
const EngineDescriptor &engineDescriptor) : OsContext(0, contextId, engineDescriptor) {}
void initializeContext() override {
bool initializeContext() override {
initializeContextCalled++;
return true;
}
size_t initializeContextCalled = 0u;