fix(sysman): Changes in the Device Reset Implementation

- The unbind and bind operations in the device reset are now used
for the FLR and Cold Reset and not for Warm reset.

Related-To: NEO-10452

Signed-off-by: Bari, Pratik <pratik.bari@intel.com>
This commit is contained in:
Bari, Pratik
2024-02-23 09:10:26 +00:00
committed by Compute-Runtime-Automation
parent c3d884e548
commit 317e4d84d4
4 changed files with 62 additions and 22 deletions

View File

@@ -311,11 +311,12 @@ ze_result_t LinuxGlobalOperationsImp::resetImpl(ze_bool_t force, zes_reset_type_
std::string flrPath = resetName + functionLevelReset;
resetName = pFsAccess->getBaseName(resetName);
// Unbind the device from the kernel driver.
result = pSysfsAccess->unbindDevice(resetName);
if (ZE_RESULT_SUCCESS != result) {
NEO::printDebugString(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Failed to unbind device:%s and returning error:0x%x \n", __FUNCTION__, resetName.c_str(), result);
return result;
if (resetType == ZES_RESET_TYPE_FLR || resetType == ZES_RESET_TYPE_COLD) {
result = pSysfsAccess->unbindDevice(resetName);
if (ZE_RESULT_SUCCESS != result) {
NEO::printDebugString(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Failed to unbind device:%s and returning error:0x%x \n", __FUNCTION__, resetName.c_str(), result);
return result;
}
}
std::vector<::pid_t> processes;
@@ -372,11 +373,12 @@ ze_result_t LinuxGlobalOperationsImp::resetImpl(ze_bool_t force, zes_reset_type_
return result;
}
// Rebind the device to the kernel driver.
result = pSysfsAccess->bindDevice(resetName);
if (ZE_RESULT_SUCCESS != result) {
NEO::printDebugString(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Failed to rebind the device to the kernel driver and returning error:0x%x \n", __FUNCTION__, result);
return result;
if (resetType == ZES_RESET_TYPE_FLR || resetType == ZES_RESET_TYPE_COLD) {
result = pSysfsAccess->bindDevice(resetName);
if (ZE_RESULT_SUCCESS != result) {
NEO::printDebugString(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Failed to bind the device to the kernel driver and returning error:0x%x \n", __FUNCTION__, result);
return result;
}
}
return pLinuxSysmanImp->reInitSysmanDeviceResources();

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2023 Intel Corporation
* Copyright (C) 2023-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -798,6 +798,25 @@ TEST_F(SysmanGlobalOperationsFixture, GivenForceTrueWhenCallingResetThenSuccessI
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
}
TEST_F(SysmanGlobalOperationsFixture,
GivenPermissionDeniedWhenCallingGetDeviceStateThenZeResultErrorInsufficientPermissionsIsReturned) {
pSysfsAccess->isRootSet = false;
ze_result_t result = zesDeviceReset(device, true);
EXPECT_EQ(ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS, result);
}
TEST_F(SysmanGlobalOperationsFixture, GivenDeviceInUseWhenCallingResetThenZeResultErrorHandleObjectInUseIsReturned) {
pProcfsAccess->ourDevicePid = pProcfsAccess->extraPid;
pProcfsAccess->ourDeviceFd = pProcfsAccess->extraFd;
pProcfsAccess->mockListProcessCall.push_back(DEVICE_IN_USE);
pProcfsAccess->isRepeated.push_back(true);
ze_result_t result = zesDeviceReset(device, false);
EXPECT_EQ(ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE, result);
}
TEST_F(SysmanGlobalOperationsIntegratedFixture,
GivenPermissionDeniedWhenCallingGetDeviceStateThenZeResultErrorInsufficientPermissionsIsReturned) {

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2023 Intel Corporation
* Copyright (C) 2020-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -231,11 +231,12 @@ ze_result_t LinuxGlobalOperationsImp::resetImpl(ze_bool_t force, zes_reset_type_
std::string flrPath = resetName + functionLevelReset;
resetName = pFsAccess->getBaseName(resetName);
// Unbind the device from the kernel driver.
result = pSysfsAccess->unbindDevice(resetName);
if (ZE_RESULT_SUCCESS != result) {
NEO::printDebugString(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Failed to unbind device:%s and returning error:0x%x \n", __FUNCTION__, resetName.c_str(), result);
return result;
if (resetType == ZES_RESET_TYPE_FLR || resetType == ZES_RESET_TYPE_COLD) {
result = pSysfsAccess->unbindDevice(resetName);
if (ZE_RESULT_SUCCESS != result) {
NEO::printDebugString(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Failed to unbind device:%s and returning error:0x%x \n", __FUNCTION__, resetName.c_str(), result);
return result;
}
}
std::vector<::pid_t> processes;
@@ -292,10 +293,12 @@ ze_result_t LinuxGlobalOperationsImp::resetImpl(ze_bool_t force, zes_reset_type_
return result;
}
result = pSysfsAccess->bindDevice(resetName);
if (ZE_RESULT_SUCCESS != result) {
NEO::printDebugString(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Failed to bind the device to the kernel driver and returning error:0x%x \n", __FUNCTION__, result);
return result;
if (resetType == ZES_RESET_TYPE_FLR || resetType == ZES_RESET_TYPE_COLD) {
result = pSysfsAccess->bindDevice(resetName);
if (ZE_RESULT_SUCCESS != result) {
NEO::printDebugString(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Error@ %s(): Failed to bind the device to the kernel driver and returning error:0x%x \n", __FUNCTION__, result);
return result;
}
}
return pLinuxSysmanImp->initDevice();

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2023 Intel Corporation
* Copyright (C) 2020-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -805,6 +805,22 @@ TEST_F(SysmanGlobalOperationsFixture, GivenForceTrueWhenCallingResetThenSuccessI
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
}
TEST_F(SysmanGlobalOperationsFixture, GivenPermissionDeniedWhenCallingGetDeviceStateThenZeResultErrorInsufficientPermissionsIsReturned) {
pSysfsAccess->isRootSet = false;
ze_result_t result = zesDeviceReset(device, true);
EXPECT_EQ(ZE_RESULT_ERROR_INSUFFICIENT_PERMISSIONS, result);
}
TEST_F(SysmanGlobalOperationsFixture, GivenDeviceInUseWhenCallingResetThenZeResultErrorHandleObjectInUseIsReturned) {
pProcfsAccess->ourDevicePid = pProcfsAccess->extraPid;
pProcfsAccess->ourDeviceFd = pProcfsAccess->extraFd;
pProcfsAccess->mockListProcessCall.push_back(DEVICE_IN_USE);
pProcfsAccess->isRepeated.push_back(true);
ze_result_t result = zesDeviceReset(device, false);
EXPECT_EQ(ZE_RESULT_ERROR_HANDLE_OBJECT_IN_USE, result);
}
TEST_F(SysmanGlobalOperationsIntegratedFixture, GivenPermissionDeniedWhenCallingGetDeviceStateThenZeResultErrorInsufficientPermissionsIsReturned) {
pSysfsAccess->isRootSet = false;
ze_result_t result = zesDeviceReset(device, true);