fix(sysman): Fix the Warm Device Reset

The Workaround added earlier to resize the VF Bar has been removed.

Related-To: NEO-13775

Signed-off-by: Pratik Bari <pratik.bari@intel.com>
This commit is contained in:
Pratik Bari
2025-02-18 06:54:58 +00:00
committed by Compute-Runtime-Automation
parent b33b4233ad
commit 568d90902d
4 changed files with 39 additions and 170 deletions

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2023-2024 Intel Corporation
* Copyright (C) 2023-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -341,50 +341,6 @@ void LinuxSysmanImp::clearHPIE(int fd) {
NEO::sleep(std::chrono::seconds(10)); // Sleep for 10seconds just to make sure the change is propagated.
}
// Function to adjust VF BAR size i.e Modify VF BAR Control register.
// size param is an encoded value described as follows:
// 0 - 1 MB (2^20 bytes)
// 1 - 2 MB (2^21 bytes)
// 2 - 4 MB (2^22 bytes)
// 3 - 8 MB (2^23 bytes)
// .
// .
// .
// b - 2 GB (2^31 bytes)
// 43 - 8 EB (2^63 bytes)
ze_result_t LinuxSysmanImp::resizeVfBar(uint8_t size) {
std::string pciConfigNode;
pciConfigNode = gtDevicePath + "/config";
auto fdConfig = NEO::FileDescriptor(pciConfigNode.c_str(), O_RDWR);
if (fdConfig < 0) {
NEO::printDebugString(NEO::debugManager.flags.PrintDebugMessages.get(), stdout,
"Config node open failed\n");
return ZE_RESULT_ERROR_UNKNOWN;
}
std::unique_ptr<uint8_t[]> configMemory = std::make_unique<uint8_t[]>(PCI_CFG_SPACE_EXP_SIZE);
memset(configMemory.get(), 0, PCI_CFG_SPACE_EXP_SIZE);
if (this->preadFunction(fdConfig, configMemory.get(), PCI_CFG_SPACE_EXP_SIZE, 0) < 0) {
NEO::printDebugString(NEO::debugManager.flags.PrintDebugMessages.get(), stdout,
"Read to get config space failed\n");
return ZE_RESULT_ERROR_UNKNOWN;
}
auto reBarCapPos = L0::Sysman::LinuxPciImp::getRebarCapabilityPos(configMemory.get(), true);
if (!reBarCapPos) {
NEO::printDebugString(NEO::debugManager.flags.PrintDebugMessages.get(), stdout,
"VF BAR capability not found\n");
return ZE_RESULT_ERROR_UNKNOWN;
}
auto barSizePos = reBarCapPos + PCI_REBAR_CTRL + 1; // position of VF(0) BAR SIZE.
if (this->pwriteFunction(fdConfig, &size, 0x01, barSizePos) < 0) {
NEO::printDebugString(NEO::debugManager.flags.PrintDebugMessages.get(), stdout,
"Write to change VF bar size failed\n");
return ZE_RESULT_ERROR_UNKNOWN;
}
return ZE_RESULT_SUCCESS;
}
// A 'warm reset' is a conventional reset that is triggered across a PCI express link.
// A warm reset is triggered either when a link is forced into electrical idle or
// by sending TS1 and TS2 ordered sets with the hot reset bit set.
@@ -443,33 +399,6 @@ ze_result_t LinuxSysmanImp::osWarmReset() {
}
NEO::sleep(std::chrono::seconds(10)); // Sleep for 10seconds, allows the rescan to complete on all devices attached to the root port.
// PCIe port driver uses the BIOS allocated VF bars on bootup. A known bug exists in pcie port driver
// and is causing VF bar allocation failure in PCIe port driver after an SBR - https://bugzilla.kernel.org/show_bug.cgi?id=216795
// WA to adjust VF bar size to 2GB. The default VF bar size is 8GB and for 63VFs, 504GB need to be allocated which is failing on SBR.
// When configured VF bar size to 2GB, an allocation of 126GB is successful. This WA resizes VF0 bar to 2GB. Once pcie port driver
// issue is resolved, this WA may not be necessary. Description for 0xb is explained at function definition - resizeVfVar.
if (NEO::debugManager.flags.VfBarResourceAllocationWa.get()) {
if (ZE_RESULT_SUCCESS != (result = resizeVfBar(0xb))) {
return result;
}
result = pFsAccess->write(devicePath + '/' + "remove", "1");
if (ZE_RESULT_SUCCESS != result) {
NEO::printDebugString(NEO::debugManager.flags.PrintDebugMessages.get(), stdout,
"Card Bus remove after resizing VF bar failed\n");
return result;
}
NEO::sleep(std::chrono::seconds(10)); // Sleep for 10seconds to make sure that the config spaces of all devices are saved correctly.
result = pFsAccess->write(rootPortPath + '/' + "rescan", "1");
if (ZE_RESULT_SUCCESS != result) {
NEO::printDebugString(NEO::debugManager.flags.PrintDebugMessages.get(), stdout,
"Rescanning root port failed after resizing VF bar failed\n");
return result;
}
NEO::sleep(std::chrono::seconds(10)); // Sleep for 10seconds, allows the rescan to complete on all devices attached to the root port.
}
return result;
}