fix: allow fork() after zeInit()

- do not release resources derived from parent process
- zeInit() in child should initilize new driver

Related-To: NEO-11761

Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
Mateusz Hoppe
2024-07-23 12:50:37 +00:00
committed by Compute-Runtime-Automation
parent bdb60afa2b
commit 8a7923c6ee
14 changed files with 118 additions and 6 deletions

View File

@@ -12,6 +12,7 @@
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/os_interface/debug_env_reader.h"
#include "shared/source/os_interface/device_factory.h"
#include "shared/source/os_interface/sys_calls_common.h"
#include "shared/source/pin/pin.h"
#include "level_zero/core/source/device/device.h"
@@ -23,6 +24,7 @@
#include "log_manager.h"
#include <memory>
#include <mutex>
#include <thread>
namespace L0 {
@@ -33,6 +35,7 @@ uint32_t driverCount = 0;
void DriverImp::initialize(ze_result_t *result) {
*result = ZE_RESULT_ERROR_UNINITIALIZED;
pid = NEO::SysCalls::getCurrentProcessId();
NEO::EnvironmentVariableReader envReader;
L0EnvVariables envVariables = {};
@@ -133,13 +136,26 @@ ze_result_t driverHandleGet(uint32_t *pCount, ze_driver_handle_t *phDriverHandle
static DriverImp driverImp;
Driver *Driver::driver = &driverImp;
std::mutex driverInitMutex;
ze_result_t init(ze_init_flags_t flags) {
if (flags && !(flags & ZE_INIT_FLAG_GPU_ONLY)) {
L0::levelZeroDriverInitialized = false;
return ZE_RESULT_ERROR_UNINITIALIZED;
} else {
auto pid = NEO::SysCalls::getCurrentProcessId();
ze_result_t result = Driver::get()->driverInit(flags);
if (Driver::get()->getPid() != pid) {
std::lock_guard<std::mutex> lock(driverInitMutex);
if (Driver::get()->getPid() != pid) {
ze_result_t result;
Driver::get()->initialize(&result);
}
}
if (result == ZE_RESULT_SUCCESS) {
L0::levelZeroDriverInitialized = true;
} else {

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2023 Intel Corporation
* Copyright (C) 2020-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -16,6 +16,8 @@ struct Driver {
static Driver *get() { return driver; }
virtual ~Driver() = default;
virtual unsigned int getPid() const = 0;
protected:
static Driver *driver;
};

View File

@@ -227,6 +227,8 @@ void DriverHandleImp::updateRootDeviceBitFields(std::unique_ptr<NEO::Device> &ne
ze_result_t DriverHandleImp::initialize(std::vector<std::unique_ptr<NEO::Device>> neoDevices) {
bool multiOsContextDriver = false;
this->pid = NEO::SysCalls::getCurrentProcessId();
for (auto &neoDevice : neoDevices) {
ze_result_t returnValue = ZE_RESULT_SUCCESS;
if (!neoDevice->getHardwareInfo().capabilityTable.levelZeroSupported) {

View File

@@ -144,6 +144,7 @@ struct DriverHandleImp : public DriverHandle {
static const std::vector<std::pair<std::string, uint32_t>> extensionsSupported;
uint64_t uuidTimestamp = 0u;
unsigned int pid = 0;
NEO::MemoryManager *memoryManager = nullptr;
NEO::SVMAllocsManager *svmAllocsManager = nullptr;

View File

@@ -19,8 +19,12 @@ class DriverImp : public Driver {
ze_result_t driverInit(ze_init_flags_t flags) override;
void initialize(ze_result_t *result) override;
unsigned int getPid() const override {
return pid;
}
protected:
uint32_t pid = 0;
std::once_flag initDriverOnce;
static ze_result_t initStatus;
};

View File

@@ -1,10 +1,12 @@
/*
* Copyright (C) 2023 Intel Corporation
* Copyright (C) 2023-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/os_interface/sys_calls_common.h"
#include "level_zero/core/source/driver/driver_handle_imp.h"
#include "level_zero/sysman/source/driver/sysman_driver_handle_imp.h"
@@ -12,7 +14,10 @@ namespace L0 {
void globalDriverTeardown() {
if (globalDriver != nullptr) {
delete globalDriver;
if (globalDriver->pid == NEO::SysCalls::getCurrentProcessId()) {
delete globalDriver;
}
globalDriver = nullptr;
}
if (Sysman::globalSysmanDriver != nullptr) {

View File

@@ -1,11 +1,13 @@
/*
* Copyright (C) 2020-2023 Intel Corporation
* Copyright (C) 2020-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/source/os_interface/sys_calls_common.h"
#include "level_zero/core/source/driver/driver_imp.h"
#include "level_zero/core/test/unit_tests/mock.h"
#include "level_zero/core/test/unit_tests/white_box.h"
@@ -17,6 +19,7 @@ namespace ult {
template <>
struct WhiteBox<::L0::DriverImp> : public ::L0::DriverImp {
using ::L0::DriverImp::pid;
};
using Driver = WhiteBox<::L0::DriverImp>;
@@ -28,12 +31,27 @@ struct Mock<Driver> : public Driver {
ze_result_t driverInit(ze_init_flags_t flag) override {
initCalledCount++;
if (initCalledCount == 1) {
pid = NEO::SysCalls::getCurrentProcessId();
}
if (failInitDriver) {
return ZE_RESULT_ERROR_UNINITIALIZED;
}
return ZE_RESULT_SUCCESS;
}
void initialize(ze_result_t *result) override {
pid = NEO::SysCalls::getCurrentProcessId();
if (failInitDriver) {
*result = ZE_RESULT_ERROR_UNINITIALIZED;
}
*result = ZE_RESULT_SUCCESS;
}
Driver *previousDriver = nullptr;
uint32_t initCalledCount = 0;
bool failInitDriver = false;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2023 Intel Corporation
* Copyright (C) 2023-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -8,10 +8,12 @@
#include "level_zero/core/test/unit_tests/os_interface/global_teardown_tests.h"
#include "shared/source/os_interface/os_library.h"
#include "shared/test/common/helpers/variable_backup.h"
#include "shared/test/common/test_macros/test.h"
#include "level_zero/core/source/driver/driver.h"
#include "level_zero/core/source/driver/driver_handle_imp.h"
#include "level_zero/core/source/driver/driver_imp.h"
#include "level_zero/core/source/global_teardown.h"
#include "level_zero/sysman/source/driver/sysman_driver_handle_imp.h"
@@ -51,5 +53,28 @@ TEST(GlobalTearDownTests, givenCallToGlobalTearDownFunctionWithNullSysManDriverT
EXPECT_EQ(globalDriver, nullptr);
EXPECT_EQ(Sysman::globalSysmanDriver, nullptr);
}
TEST(GlobalTearDownTests, givenForkedProcessWhenGlobalTearDownFunctionCalledThenGlobalDriverIsNotDeleted) {
VariableBackup<uint32_t> driverCountBackup{&L0::driverCount};
VariableBackup<_ze_driver_handle_t *> globalDriverHandleBackup{&L0::globalDriverHandle};
ze_result_t result = ZE_RESULT_ERROR_UNINITIALIZED;
DriverImp driverImp;
driverImp.initialize(&result);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(L0::globalDriver, nullptr);
// change pid in driver
L0::globalDriver->pid = L0::globalDriver->pid + 5;
auto tempDriver = L0::globalDriver;
globalDriverTeardown();
EXPECT_EQ(L0::globalDriver, nullptr);
EXPECT_EQ(Sysman::globalSysmanDriver, nullptr);
delete tempDriver;
}
} // namespace ult
} // namespace L0

View File

@@ -98,6 +98,28 @@ TEST(zeInit, whenCallingZeInitWithoutGpuOnlyFlagThenInitializeOnDriverIsNotCalle
EXPECT_EQ(0u, driver.initCalledCount);
}
TEST(zeInit, givenZeInitCalledWhenCallingZeInitInForkedProcessThenNewDriverIsInitialized) {
Mock<Driver> driver;
driver.pid = NEO::SysCalls::getCurrentProcessId();
auto result = zeInit(ZE_INIT_FLAG_GPU_ONLY);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
// change pid in driver
driver.pid = NEO::SysCalls::getCurrentProcessId() - 1;
result = zeInit(ZE_INIT_FLAG_GPU_ONLY);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_TRUE(levelZeroDriverInitialized);
EXPECT_EQ(2u, driver.initCalledCount);
// pid updated to current pid
auto expectedPid = NEO::SysCalls::getCurrentProcessId();
EXPECT_EQ(expectedPid, driver.pid);
}
using DriverHandleImpTest = Test<DeviceFixture>;
TEST_F(DriverHandleImpTest, givenDriverImpWhenCallingupdateRootDeviceBitFieldsThendeviceBitfieldsAreUpdatedInAccordanceWithNeoDevice) {
auto hwInfo = *NEO::defaultHwInfo;