From 3c072a6cd11a6e4baea3775c36283ceb2ee78117 Mon Sep 17 00:00:00 2001 From: Bellekallu Rajkiran Date: Thu, 1 Jun 2023 11:45:02 +0000 Subject: [PATCH] fix: WA for VF bar resource allocation post Warm reset On Warm reset, With default bar size set by bios, VF bar allocation is getting failed because of bug in pci driver which impacts SRIOV functionality. Resize VF bar size for succesful allocation of VF bar post warm reset. Related-To: LOCI-4481 Signed-off-by: Bellekallu Rajkiran --- .../sysman/source/linux/zes_os_sysman_imp.cpp | 78 +++++++- .../sysman/source/linux/zes_os_sysman_imp.h | 1 + .../source/pci/linux/sysman_os_pci_imp.cpp | 21 ++- .../source/pci/linux/sysman_os_pci_imp.h | 3 +- .../linux/test_zes_global_operations.cpp | 3 +- .../source/sysman/linux/os_sysman_imp.cpp | 76 ++++++++ .../tools/source/sysman/linux/os_sysman_imp.h | 1 + .../source/sysman/pci/linux/os_pci_imp.cpp | 21 ++- .../source/sysman/pci/linux/os_pci_imp.h | 4 +- .../linux/mock_zes_sysman_diagnostics.h | 1 + .../linux/test_zes_sysman_diagnostics.cpp | 173 +++++++++++++++++- .../linux/test_zes_global_operations.cpp | 2 + .../debug_settings/debug_variables_base.inl | 1 + shared/test/common/test_files/igdrcl.config | 1 + 14 files changed, 362 insertions(+), 24 deletions(-) diff --git a/level_zero/sysman/source/linux/zes_os_sysman_imp.cpp b/level_zero/sysman/source/linux/zes_os_sysman_imp.cpp index b54a795ee7..964dadab5c 100644 --- a/level_zero/sysman/source/linux/zes_os_sysman_imp.cpp +++ b/level_zero/sysman/source/linux/zes_os_sysman_imp.cpp @@ -20,6 +20,7 @@ #include "level_zero/sysman/source/linux/pmt/sysman_pmt.h" #include "level_zero/sysman/source/linux/pmu/sysman_pmu.h" #include "level_zero/sysman/source/linux/sysman_fs_access.h" +#include "level_zero/sysman/source/pci/linux/sysman_os_pci_imp.h" #include @@ -343,6 +344,56 @@ void LinuxSysmanImp::clearHPIE(int fd) { NEO::sleep(std::chrono::seconds(10)); // Sleep for 10seconds just to make sure the change is propagated. } +// Function to adjust VF BAR size i.e Modify VF BAR Control register. +// size param is an encoded value described as follows: +// 0 - 1 MB (2^20 bytes) +// 1 - 2 MB (2^21 bytes) +// 2 - 4 MB (2^22 bytes) +// 3 - 8 MB (2^23 bytes) +// . +// . +// . +// b - 2 GB (2^31 bytes) +// 43 - 8 EB (2^63 bytes) +ze_result_t LinuxSysmanImp::resizeVfBar(uint8_t size) { + std::string pciConfigNode; + pciConfigNode = gtDevicePath + "/config"; + + int fdConfig = -1; + fdConfig = this->openFunction(pciConfigNode.c_str(), O_RDWR); + if (fdConfig < 0) { + NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stdout, + "Config node open failed\n"); + return ZE_RESULT_ERROR_UNKNOWN; + } + std::unique_ptr configMemory = std::make_unique(PCI_CFG_SPACE_EXP_SIZE); + memset(configMemory.get(), 0, PCI_CFG_SPACE_EXP_SIZE); + if (this->preadFunction(fdConfig, configMemory.get(), PCI_CFG_SPACE_EXP_SIZE, 0) < 0) { + NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stdout, + "Read to get config space failed\n"); + return ZE_RESULT_ERROR_UNKNOWN; + } + auto reBarCapPos = L0::Sysman::LinuxPciImp::getRebarCapabilityPos(configMemory.get(), true); + if (!reBarCapPos) { + NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stdout, + "VF BAR capability not found\n"); + return ZE_RESULT_ERROR_UNKNOWN; + } + + auto barSizePos = reBarCapPos + PCI_REBAR_CTRL + 1; // position of VF(0) BAR SIZE. + if (this->pwriteFunction(fdConfig, &size, 0x01, barSizePos) < 0) { + NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stdout, + "Write to change VF bar size failed\n"); + return ZE_RESULT_ERROR_UNKNOWN; + } + if (this->closeFunction(fdConfig) < 0) { + NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stdout, + "Config node close failed\n"); + return ZE_RESULT_ERROR_UNKNOWN; + } + return ZE_RESULT_SUCCESS; +} + // A 'warm reset' is a conventional reset that is triggered across a PCI express link. // A warm reset is triggered either when a link is forced into electrical idle or // by sending TS1 and TS2 ordered sets with the hot reset bit set. @@ -377,8 +428,8 @@ ze_result_t LinuxSysmanImp::osWarmReset() { this->pwriteFunction(fd, &resetValue, 0x01, offset); NEO::sleep(std::chrono::seconds(10)); // Sleep for 10seconds just to make sure the change is propagated. this->pwriteFunction(fd, &value, 0x01, offset); - NEO::sleep(std::chrono::seconds(10)); // Sleep for 10seconds to make sure the change is propagated. before rescan is done. + NEO::sleep(std::chrono::seconds(10)); // Sleep for 10seconds to make sure the change is propagated. before rescan is done. result = pFsAccess->write(rootPortPath + '/' + "rescan", "1"); if (ZE_RESULT_SUCCESS != result) { return result; @@ -390,6 +441,31 @@ ze_result_t LinuxSysmanImp::osWarmReset() { return ZE_RESULT_ERROR_UNKNOWN; } + // PCIe port driver uses the BIOS allocated VF bars on bootup. A known bug exists in pcie port driver + // and is causing VF bar allocation failure in PCIe port driver after an SBR - https://bugzilla.kernel.org/show_bug.cgi?id=216795 + + // WA to adjust VF bar size to 2GB. The default VF bar size is 8GB and for 63VFs, 504GB need to be allocated which is failing on SBR. + // When configured VF bar size to 2GB, an allocation of 126GB is successful. This WA resizes VF0 bar to 2GB. Once pcie port driver + // issue is resolved, this WA may not be necessary. Description for 0xb is explained at function definition - resizeVfVar. + if (NEO::DebugManager.flags.VfBarResourceAllocationWa.get()) { + if (ZE_RESULT_SUCCESS != (result = resizeVfBar(0xb))) { + return result; + } + + result = pFsAccess->write(cardBusPath + '/' + "remove", "1"); + if (ZE_RESULT_SUCCESS != result) { + NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stdout, + "Card Bus remove after resizing VF bar failed\n"); + return result; + } + + result = pFsAccess->write(rootPortPath + '/' + "rescan", "1"); + if (ZE_RESULT_SUCCESS != result) { + NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stdout, + "Rescanning root port failed after resizing VF bar failed\n"); + return result; + } + } return result; } diff --git a/level_zero/sysman/source/linux/zes_os_sysman_imp.h b/level_zero/sysman/source/linux/zes_os_sysman_imp.h index 9d1296510c..6aa5af490d 100644 --- a/level_zero/sysman/source/linux/zes_os_sysman_imp.h +++ b/level_zero/sysman/source/linux/zes_os_sysman_imp.h @@ -91,6 +91,7 @@ class LinuxSysmanImp : public OsSysman, NEO::NonCopyableOrMovableClass { static const std::string deviceDir; void createFwUtilInterface(); void clearHPIE(int fd); + ze_result_t resizeVfBar(uint8_t size); std::mutex fwLock; }; diff --git a/level_zero/sysman/source/pci/linux/sysman_os_pci_imp.cpp b/level_zero/sysman/source/pci/linux/sysman_os_pci_imp.cpp index 8775d617ae..8a2b6e8f6c 100644 --- a/level_zero/sysman/source/pci/linux/sysman_os_pci_imp.cpp +++ b/level_zero/sysman/source/pci/linux/sysman_os_pci_imp.cpp @@ -116,7 +116,7 @@ ze_result_t LinuxPciImp::initializeBarProperties(std::vector 0) { - if (PCI_EXT_CAP_ID(header) == PCI_EXT_CAP_ID_REBAR) { + if (PCI_EXT_CAP_ID(header) == capId) { return pos; } pos = PCI_EXT_CAP_NEXT(header); if (pos < PCI_CFG_SPACE_SIZE) { return 0; } - header = getDwordFromConfig(pos); + header = getDwordFromConfig(pos, configMemory); } return 0; } @@ -189,14 +192,14 @@ uint16_t LinuxPciImp::getLinkCapabilityPos() { // Parse PCIe configuration space to see if resizable Bar is supported bool LinuxPciImp::resizableBarSupported() { - return (getRebarCapabilityPos() > 0); + return (L0::Sysman::LinuxPciImp::getRebarCapabilityPos(configMemory.get(), false) > 0); } bool LinuxPciImp::resizableBarEnabled(uint32_t barIndex) { bool isBarResizable = false; uint32_t capabilityRegister = 0, controlRegister = 0; uint32_t nBars = 1; - auto rebarCapabilityPos = getRebarCapabilityPos(); + auto rebarCapabilityPos = L0::Sysman::LinuxPciImp::getRebarCapabilityPos(configMemory.get(), false); // If resizable Bar is not supported then return false. if (!rebarCapabilityPos) { @@ -221,11 +224,11 @@ bool LinuxPciImp::resizableBarEnabled(uint32_t barIndex) { // -------------------------------------------------------------| // Only first Control register(at offset 008h, as shown above), could tell about number of resizable Bars - controlRegister = getDwordFromConfig(rebarCapabilityPos + PCI_REBAR_CTRL); + controlRegister = getDwordFromConfig(rebarCapabilityPos + PCI_REBAR_CTRL, configMemory.get()); nBars = BITS(controlRegister, 5, 3); // control register's bits 5,6 and 7 contain number of resizable bars information for (auto barNumber = 0u; barNumber < nBars; barNumber++) { uint32_t barId = 0; - controlRegister = getDwordFromConfig(rebarCapabilityPos + PCI_REBAR_CTRL); + controlRegister = getDwordFromConfig(rebarCapabilityPos + PCI_REBAR_CTRL, configMemory.get()); barId = BITS(controlRegister, 0, 3); // Control register's bit 0,1,2 tells the index of bar if (barId == barIndex) { isBarResizable = true; @@ -238,7 +241,7 @@ bool LinuxPciImp::resizableBarEnabled(uint32_t barIndex) { return false; } - capabilityRegister = getDwordFromConfig(rebarCapabilityPos + PCI_REBAR_CAP); + capabilityRegister = getDwordFromConfig(rebarCapabilityPos + PCI_REBAR_CAP, configMemory.get()); // Capability register's bit 4 to 31 indicates supported Bar sizes. // In possibleBarSizes, position of each set bit indicates supported bar size. Example, if set bit // position of possibleBarSizes is from 0 to n, then this indicates BAR size from 2^0 MB to 2^n MB diff --git a/level_zero/sysman/source/pci/linux/sysman_os_pci_imp.h b/level_zero/sysman/source/pci/linux/sysman_os_pci_imp.h index 2ed09aae77..01a16a935b 100644 --- a/level_zero/sysman/source/pci/linux/sysman_os_pci_imp.h +++ b/level_zero/sysman/source/pci/linux/sysman_os_pci_imp.h @@ -30,6 +30,7 @@ class LinuxPciImp : public OsPci, NEO::NonCopyableOrMovableClass { bool resizableBarSupported() override; bool resizableBarEnabled(uint32_t barIndex) override; ze_result_t initializeBarProperties(std::vector &pBarProperties) override; + static uint32_t getRebarCapabilityPos(uint8_t *configMemory, bool isVfBar); LinuxPciImp() = default; LinuxPciImp(OsSysman *pOsSysman); ~LinuxPciImp() override = default; @@ -51,7 +52,7 @@ class LinuxPciImp : public OsPci, NEO::NonCopyableOrMovableClass { static const std::string maxLinkSpeedFile; static const std::string maxLinkWidthFile; bool isIntegratedDevice = false; - uint32_t getDwordFromConfig(uint32_t pos) { + static inline uint32_t getDwordFromConfig(uint32_t pos, uint8_t *configMemory) { return configMemory[pos] | (configMemory[pos + 1] << 8) | (configMemory[pos + 2] << 16) | (configMemory[pos + 3] << 24); } diff --git a/level_zero/sysman/test/unit_tests/sources/global_operations/linux/test_zes_global_operations.cpp b/level_zero/sysman/test/unit_tests/sources/global_operations/linux/test_zes_global_operations.cpp index b93b5684a1..790a0e2ee9 100644 --- a/level_zero/sysman/test/unit_tests/sources/global_operations/linux/test_zes_global_operations.cpp +++ b/level_zero/sysman/test/unit_tests/sources/global_operations/linux/test_zes_global_operations.cpp @@ -729,7 +729,8 @@ TEST_F(SysmanGlobalOperationsFixture, GivenGemCreateIoctlFailsWithEINVALWhenCall } TEST_F(SysmanGlobalOperationsFixture, GivenForceTrueWhenCallingResetThenSuccessIsReturned) { - + DebugManagerStateRestore dbgRestore; + DebugManager.flags.VfBarResourceAllocationWa.set(false); ze_result_t result = zesDeviceReset(device, true); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } diff --git a/level_zero/tools/source/sysman/linux/os_sysman_imp.cpp b/level_zero/tools/source/sysman/linux/os_sysman_imp.cpp index 0da9fa1d2f..f62dc7b206 100644 --- a/level_zero/tools/source/sysman/linux/os_sysman_imp.cpp +++ b/level_zero/tools/source/sysman/linux/os_sysman_imp.cpp @@ -18,6 +18,7 @@ #include "level_zero/core/source/driver/driver_handle_imp.h" #include "level_zero/tools/source/sysman/firmware_util/firmware_util.h" #include "level_zero/tools/source/sysman/linux/fs_access.h" +#include "level_zero/tools/source/sysman/pci/linux/os_pci_imp.h" namespace L0 { @@ -378,6 +379,56 @@ void LinuxSysmanImp::clearHPIE(int fd) { NEO::sleep(std::chrono::seconds(10)); // Sleep for 10seconds just to make sure the change is propagated. } +// Function to adjust VF BAR size i.e Modify VF BAR Control register. +// size param is an encoded value described as follows: +// 0 - 1 MB (2^20 bytes) +// 1 - 2 MB (2^21 bytes) +// 2 - 4 MB (2^22 bytes) +// 3 - 8 MB (2^23 bytes) +// . +// . +// . +// b - 2 GB (2^31 bytes) +// 43 - 8 EB (2^63 bytes) +ze_result_t LinuxSysmanImp::resizeVfBar(uint8_t size) { + std::string pciConfigNode; + pciConfigNode = gtDevicePath + "/config"; + + int fdConfig = -1; + fdConfig = this->openFunction(pciConfigNode.c_str(), O_RDWR); + if (fdConfig < 0) { + NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stdout, + "Config node open failed\n"); + return ZE_RESULT_ERROR_UNKNOWN; + } + std::unique_ptr configMemory = std::make_unique(PCI_CFG_SPACE_EXP_SIZE); + memset(configMemory.get(), 0, PCI_CFG_SPACE_EXP_SIZE); + if (this->preadFunction(fdConfig, configMemory.get(), PCI_CFG_SPACE_EXP_SIZE, 0) < 0) { + NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stdout, + "Read to get config space failed\n"); + return ZE_RESULT_ERROR_UNKNOWN; + } + auto reBarCapPos = L0::LinuxPciImp::getRebarCapabilityPos(configMemory.get(), true); + if (!reBarCapPos) { + NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stdout, + "VF BAR capability not found\n"); + return ZE_RESULT_ERROR_UNKNOWN; + } + + auto barSizePos = reBarCapPos + PCI_REBAR_CTRL + 1; // position of VF(0) BAR SIZE. + if (this->pwriteFunction(fdConfig, &size, 0x01, barSizePos) < 0) { + NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stdout, + "Write to change VF bar size failed\n"); + return ZE_RESULT_ERROR_UNKNOWN; + } + if (this->closeFunction(fdConfig) < 0) { + NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stdout, + "Config node close failed\n"); + return ZE_RESULT_ERROR_UNKNOWN; + } + return ZE_RESULT_SUCCESS; +} + // A 'warm reset' is a conventional reset that is triggered across a PCI express link. // A warm reset is triggered either when a link is forced into electrical idle or // by sending TS1 and TS2 ordered sets with the hot reset bit set. @@ -436,6 +487,31 @@ ze_result_t LinuxSysmanImp::osWarmReset() { return ZE_RESULT_ERROR_UNKNOWN; } + // PCIe port driver uses the BIOS allocated VF bars on bootup. A known bug exists in pcie port driver + // and is causing VF bar allocation failure in PCIe port driver after an SBR - https://bugzilla.kernel.org/show_bug.cgi?id=216795 + + // WA to adjust VF bar size to 2GB. The default VF bar size is 8GB and for 63VFs, 504GB need to be allocated which is failing on SBR. + // When configured VF bar size to 2GB, an allocation of 126GB is successful. This WA resizes VF0 bar to 2GB. Once pcie port driver + // issue is resolved, this WA may not be necessary. Description for 0xb is explained at function definition - resizeVfVar. + if (NEO::DebugManager.flags.VfBarResourceAllocationWa.get()) { + if (ZE_RESULT_SUCCESS != (result = resizeVfBar(0xb))) { + return result; + } + + result = pFsAccess->write(cardBusPath + '/' + "remove", "1"); + if (ZE_RESULT_SUCCESS != result) { + NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stdout, + "Card Bus remove after resizing VF bar failed\n"); + return result; + } + + result = pFsAccess->write(rootPortPath + '/' + "rescan", "1"); + if (ZE_RESULT_SUCCESS != result) { + NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stdout, + "Rescanning root port failed after resizing VF bar failed\n"); + return result; + } + } return result; } diff --git a/level_zero/tools/source/sysman/linux/os_sysman_imp.h b/level_zero/tools/source/sysman/linux/os_sysman_imp.h index bef9a3a34e..0b2c28f88d 100644 --- a/level_zero/tools/source/sysman/linux/os_sysman_imp.h +++ b/level_zero/tools/source/sysman/linux/os_sysman_imp.h @@ -106,6 +106,7 @@ class LinuxSysmanImp : public OsSysman, NEO::NonCopyableOrMovableClass { SysmanDeviceImp *pParentSysmanDeviceImp = nullptr; static const std::string deviceDir; void clearHPIE(int fd); + ze_result_t resizeVfBar(uint8_t size); std::mutex fwLock; }; diff --git a/level_zero/tools/source/sysman/pci/linux/os_pci_imp.cpp b/level_zero/tools/source/sysman/pci/linux/os_pci_imp.cpp index 4801b5cdd3..0d129230d6 100644 --- a/level_zero/tools/source/sysman/pci/linux/os_pci_imp.cpp +++ b/level_zero/tools/source/sysman/pci/linux/os_pci_imp.cpp @@ -117,7 +117,7 @@ ze_result_t LinuxPciImp::initializeBarProperties(std::vector 0) { - if (PCI_EXT_CAP_ID(header) == PCI_EXT_CAP_ID_REBAR) { + if (PCI_EXT_CAP_ID(header) == capId) { return pos; } pos = PCI_EXT_CAP_NEXT(header); if (pos < PCI_CFG_SPACE_SIZE) { return 0; } - header = getDwordFromConfig(pos); + header = getDwordFromConfig(pos, configMemory); } return 0; } @@ -189,14 +192,14 @@ uint16_t LinuxPciImp::getLinkCapabilityPos() { // Parse PCIe configuration space to see if resizable Bar is supported bool LinuxPciImp::resizableBarSupported() { - return (getRebarCapabilityPos() > 0); + return (L0::LinuxPciImp::getRebarCapabilityPos(configMemory.get(), false) > 0); } bool LinuxPciImp::resizableBarEnabled(uint32_t barIndex) { bool isBarResizable = false; uint32_t capabilityRegister = 0, controlRegister = 0; uint32_t nBars = 1; - auto rebarCapabilityPos = getRebarCapabilityPos(); + auto rebarCapabilityPos = L0::LinuxPciImp::getRebarCapabilityPos(configMemory.get(), false); // If resizable Bar is not supported then return false. if (!rebarCapabilityPos) { @@ -221,11 +224,11 @@ bool LinuxPciImp::resizableBarEnabled(uint32_t barIndex) { // -------------------------------------------------------------| // Only first Control register(at offset 008h, as shown above), could tell about number of resizable Bars - controlRegister = getDwordFromConfig(rebarCapabilityPos + PCI_REBAR_CTRL); + controlRegister = getDwordFromConfig(rebarCapabilityPos + PCI_REBAR_CTRL, configMemory.get()); nBars = BITS(controlRegister, 5, 3); // control register's bits 5,6 and 7 contain number of resizable bars information for (auto barNumber = 0u; barNumber < nBars; barNumber++) { uint32_t barId = 0; - controlRegister = getDwordFromConfig(rebarCapabilityPos + PCI_REBAR_CTRL); + controlRegister = getDwordFromConfig(rebarCapabilityPos + PCI_REBAR_CTRL, configMemory.get()); barId = BITS(controlRegister, 0, 3); // Control register's bit 0,1,2 tells the index of bar if (barId == barIndex) { isBarResizable = true; @@ -238,7 +241,7 @@ bool LinuxPciImp::resizableBarEnabled(uint32_t barIndex) { return false; } - capabilityRegister = getDwordFromConfig(rebarCapabilityPos + PCI_REBAR_CAP); + capabilityRegister = getDwordFromConfig(rebarCapabilityPos + PCI_REBAR_CAP, configMemory.get()); // Capability register's bit 4 to 31 indicates supported Bar sizes. // In possibleBarSizes, position of each set bit indicates supported bar size. Example, if set bit // position of possibleBarSizes is from 0 to n, then this indicates BAR size from 2^0 MB to 2^n MB diff --git a/level_zero/tools/source/sysman/pci/linux/os_pci_imp.h b/level_zero/tools/source/sysman/pci/linux/os_pci_imp.h index 9782bcdfaa..9a574bcbf6 100644 --- a/level_zero/tools/source/sysman/pci/linux/os_pci_imp.h +++ b/level_zero/tools/source/sysman/pci/linux/os_pci_imp.h @@ -27,6 +27,7 @@ class LinuxPciImp : public OsPci, NEO::NonCopyableOrMovableClass { bool resizableBarSupported() override; bool resizableBarEnabled(uint32_t barIndex) override; ze_result_t initializeBarProperties(std::vector &pBarProperties) override; + static uint32_t getRebarCapabilityPos(uint8_t *configMemory, bool isVfBar); LinuxPciImp() = default; LinuxPciImp(OsSysman *pOsSysman); ~LinuxPciImp() override = default; @@ -48,7 +49,7 @@ class LinuxPciImp : public OsPci, NEO::NonCopyableOrMovableClass { static const std::string maxLinkSpeedFile; static const std::string maxLinkWidthFile; bool isLmemSupported = false; - uint32_t getDwordFromConfig(uint32_t pos) { + static inline uint32_t getDwordFromConfig(uint32_t pos, uint8_t *configMemory) { return configMemory[pos] | (configMemory[pos + 1] << 8) | (configMemory[pos + 2] << 16) | (configMemory[pos + 3] << 24); } @@ -58,7 +59,6 @@ class LinuxPciImp : public OsPci, NEO::NonCopyableOrMovableClass { uint8_t getByteFromConfig(uint32_t pos, uint8_t *configMem) { return configMem[pos]; } - uint32_t getRebarCapabilityPos(); uint16_t getLinkCapabilityPos(); }; diff --git a/level_zero/tools/test/unit_tests/sources/sysman/diagnostics/linux/mock_zes_sysman_diagnostics.h b/level_zero/tools/test/unit_tests/sources/sysman/diagnostics/linux/mock_zes_sysman_diagnostics.h index 27810bf31b..b8d5b499e8 100644 --- a/level_zero/tools/test/unit_tests/sources/sysman/diagnostics/linux/mock_zes_sysman_diagnostics.h +++ b/level_zero/tools/test/unit_tests/sources/sysman/diagnostics/linux/mock_zes_sysman_diagnostics.h @@ -19,6 +19,7 @@ const std::vector mockSupportedDiagTypes = {"MOCKSUITE1", "MOCKSUIT const std::string deviceDirDiag("device"); const std::string mockRealPathConfig("/sys/devices/pci0000:89/0000:89:02.0/config"); const std::string mockdeviceDirDiag("/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:01.0/0000:8c:00.0"); +const std::string mockdeviceDirConfig("/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:01.0/0000:8c:00.0/config"); const std::string mockDeviceName("/MOCK_DEVICE_NAME"); const std::string mockRemove("remove"); const std::string mockRescan("rescan"); diff --git a/level_zero/tools/test/unit_tests/sources/sysman/diagnostics/linux/test_zes_sysman_diagnostics.cpp b/level_zero/tools/test/unit_tests/sources/sysman/diagnostics/linux/test_zes_sysman_diagnostics.cpp index 415348a0f3..615d85524e 100644 --- a/level_zero/tools/test/unit_tests/sources/sysman/diagnostics/linux/test_zes_sysman_diagnostics.cpp +++ b/level_zero/tools/test/unit_tests/sources/sysman/diagnostics/linux/test_zes_sysman_diagnostics.cpp @@ -14,10 +14,13 @@ namespace L0 { namespace ult { static int mockFileDescriptor = 123; +static int mockGtPciConfigFd = 124; inline static int openMockDiag(const char *pathname, int flags) { if (strcmp(pathname, mockRealPathConfig.c_str()) == 0) { return mockFileDescriptor; + } else if (strcmp(pathname, mockdeviceDirConfig.c_str()) == 0) { + return mockGtPciConfigFd; } return -1; } @@ -28,8 +31,17 @@ void mockSleepFunctionSecs(int64_t secs) { inline static int openMockDiagFail(const char *pathname, int flags) { return -1; } + +inline static int gtPciConfigOpenFail(const char *pathname, int flags) { + if (strcmp(pathname, mockRealPathConfig.c_str()) == 0) { + return mockFileDescriptor; + } else { + return -1; + } +} + inline static int closeMockDiag(int fd) { - if (fd == mockFileDescriptor) { + if ((fd == mockFileDescriptor) || (fd == mockGtPciConfigFd)) { return 0; } return -1; @@ -38,7 +50,65 @@ inline static int closeMockDiagFail(int fd) { return -1; } +inline static int mockGtConfigcloseFail(int fd) { + if (fd == mockGtPciConfigFd) { + return -1; + } + return 0; +} + ssize_t preadMockDiag(int fd, void *buf, size_t count, off_t offset) { + uint8_t *mockBuf = static_cast(buf); + if (fd == mockGtPciConfigFd) { + mockBuf[0x006] = 0x24; + mockBuf[0x034] = 0x40; + mockBuf[0x040] = 0x0d; + mockBuf[0x041] = 0x50; + mockBuf[0x050] = 0x10; + mockBuf[0x051] = 0x70; + mockBuf[0x052] = 0x90; + mockBuf[0x070] = 0x10; + mockBuf[0x071] = 0xac; + mockBuf[0x072] = 0xa0; + mockBuf[0x0ac] = 0x10; + mockBuf[0x0b8] = 0x11; + mockBuf[0x100] = 0x0e; + mockBuf[0x102] = 0x24; + mockBuf[0x103] = 0x42; + mockBuf[0x420] = 0x15; + mockBuf[0x422] = 0x01; + mockBuf[0x423] = 0x22; + mockBuf[0x425] = 0xf0; + mockBuf[0x426] = 0x3f; + mockBuf[0x428] = 0x22; + mockBuf[0x429] = 0x11; + mockBuf[0x220] = 0x24; + mockBuf[0x222] = 0x24; + mockBuf[0x223] = 0x24; + mockBuf[0x320] = 0x10; + mockBuf[0x322] = 0x01; + mockBuf[0x323] = 0x40; + mockBuf[0x400] = 0x18; + mockBuf[0x402] = 0x01; + } + return count; +} + +ssize_t mockGtConfigPreadInvalid(int fd, void *buf, size_t count, off_t offset) { + return count; +} + +ssize_t mockGtConfigPreadFail(int fd, void *buf, size_t count, off_t offset) { + if (fd == mockGtPciConfigFd) { + return -1; + } + return count; +} + +ssize_t mockGtConfigPwriteFail(int fd, const void *buf, size_t count, off_t offset) { + if (fd == mockGtPciConfigFd) { + return -1; + } return count; } @@ -519,6 +589,94 @@ TEST_F(ZesDiagnosticsFixture, GivenValidDiagnosticsHandleWhenInvalidateLmemFails } TEST_F(ZesDiagnosticsFixture, GivenValidSysmanImpPointerWhenCallingWarmResetThenCallSucceeds) { + DebugManagerStateRestore dbgRestore; + DebugManager.flags.VfBarResourceAllocationWa.set(false); + pLinuxSysmanImp->gtDevicePath = "/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:01.0/0000:8c:00.0"; + pLinuxSysmanImp->openFunction = openMockDiag; + pLinuxSysmanImp->closeFunction = closeMockDiag; + pLinuxSysmanImp->preadFunction = preadMockDiag; + pLinuxSysmanImp->pwriteFunction = pwriteMockDiag; + + EXPECT_EQ(ZE_RESULT_SUCCESS, pLinuxSysmanImp->osWarmReset()); +} + +TEST_F(ZesDiagnosticsFixture, GivenValidSysmanImpPointerAndVfBarIsResizedWhenCallingWarmResetAndGtPciConfigOpenFailsThenCallReturnsFailure) { + pLinuxSysmanImp->gtDevicePath = "/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:01.0/0000:8c:00.0"; + pLinuxSysmanImp->openFunction = gtPciConfigOpenFail; + pLinuxSysmanImp->closeFunction = closeMockDiag; + pLinuxSysmanImp->preadFunction = preadMockDiag; + pLinuxSysmanImp->pwriteFunction = pwriteMockDiag; + + EXPECT_EQ(ZE_RESULT_ERROR_UNKNOWN, pLinuxSysmanImp->osWarmReset()); +} + +TEST_F(ZesDiagnosticsFixture, GivenValidSysmanImpPointerAndVfBarIsResizedWhenCallingWarmResetAndConfigHeaderIsInvalidThenCallReturnsFailure) { + pLinuxSysmanImp->gtDevicePath = "/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:01.0/0000:8c:00.0"; + pLinuxSysmanImp->openFunction = openMockDiag; + pLinuxSysmanImp->closeFunction = closeMockDiag; + pLinuxSysmanImp->preadFunction = mockGtConfigPreadInvalid; + pLinuxSysmanImp->pwriteFunction = pwriteMockDiag; + + EXPECT_EQ(ZE_RESULT_ERROR_UNKNOWN, pLinuxSysmanImp->osWarmReset()); +} + +TEST_F(ZesDiagnosticsFixture, GivenValidSysmanImpPointerAndVfBarIsResizedWhenCallingWarmResetAndGtConfigPreadFailsThenCallReturnsFailure) { + pLinuxSysmanImp->gtDevicePath = "/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:01.0/0000:8c:00.0"; + pLinuxSysmanImp->openFunction = openMockDiag; + pLinuxSysmanImp->closeFunction = closeMockDiag; + pLinuxSysmanImp->preadFunction = mockGtConfigPreadFail; + pLinuxSysmanImp->pwriteFunction = pwriteMockDiag; + + EXPECT_EQ(ZE_RESULT_ERROR_UNKNOWN, pLinuxSysmanImp->osWarmReset()); +} + +TEST_F(ZesDiagnosticsFixture, GivenValidSysmanImpPointerAndVfBarIsResizedWhenCallingWarmResetAndGtConfigPwriteFailsThenCallReturnsFailure) { + pLinuxSysmanImp->gtDevicePath = "/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:01.0/0000:8c:00.0"; + pLinuxSysmanImp->openFunction = openMockDiag; + pLinuxSysmanImp->closeFunction = closeMockDiag; + pLinuxSysmanImp->preadFunction = preadMockDiag; + pLinuxSysmanImp->pwriteFunction = mockGtConfigPwriteFail; + + EXPECT_EQ(ZE_RESULT_ERROR_UNKNOWN, pLinuxSysmanImp->osWarmReset()); +} + +TEST_F(ZesDiagnosticsFixture, GivenValidSysmanImpPointerAndVfBarIsResizedWhenCallingWarmResetAndGtConfigCloseFailsThenCallReturnsFailure) { + pLinuxSysmanImp->gtDevicePath = "/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:01.0/0000:8c:00.0"; + pLinuxSysmanImp->openFunction = openMockDiag; + pLinuxSysmanImp->closeFunction = mockGtConfigcloseFail; + pLinuxSysmanImp->preadFunction = preadMockDiag; + pLinuxSysmanImp->pwriteFunction = pwriteMockDiag; + + EXPECT_EQ(ZE_RESULT_ERROR_UNKNOWN, pLinuxSysmanImp->osWarmReset()); +} + +TEST_F(ZesDiagnosticsFixture, GivenValidSysmanImpPointerAndVfBarIsResizedWhenCallingWarmResetAndCardBusRemoveFailsThenCallReturnsFailure) { + pLinuxSysmanImp->gtDevicePath = "/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:01.0/0000:8c:00.0"; + pLinuxSysmanImp->openFunction = openMockDiag; + pLinuxSysmanImp->closeFunction = closeMockDiag; + pLinuxSysmanImp->preadFunction = preadMockDiag; + pLinuxSysmanImp->pwriteFunction = pwriteMockDiag; + + pMockFsAccess->checkErrorAfterCount = 2; + pMockFsAccess->mockWriteError = ZE_RESULT_ERROR_NOT_AVAILABLE; + + EXPECT_EQ(ZE_RESULT_ERROR_NOT_AVAILABLE, pLinuxSysmanImp->osWarmReset()); +} + +TEST_F(ZesDiagnosticsFixture, GivenValidSysmanImpPointerAndVfBarIsResizedWhenCallingWarmResetAndRootPortRescanFailsThenCallReturnsFailure) { + pLinuxSysmanImp->gtDevicePath = "/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:01.0/0000:8c:00.0"; + pLinuxSysmanImp->openFunction = openMockDiag; + pLinuxSysmanImp->closeFunction = closeMockDiag; + pLinuxSysmanImp->preadFunction = preadMockDiag; + pLinuxSysmanImp->pwriteFunction = pwriteMockDiag; + + pMockFsAccess->checkErrorAfterCount = 3; + pMockFsAccess->mockWriteError = ZE_RESULT_ERROR_NOT_AVAILABLE; + + EXPECT_EQ(ZE_RESULT_ERROR_NOT_AVAILABLE, pLinuxSysmanImp->osWarmReset()); +} + +TEST_F(ZesDiagnosticsFixture, GivenValidSysmanImpPointerAndVfBarIsResizedWhenCallingWarmResetThenCallSucceeds) { pLinuxSysmanImp->gtDevicePath = "/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:01.0/0000:8c:00.0"; pLinuxSysmanImp->openFunction = openMockDiag; pLinuxSysmanImp->closeFunction = closeMockDiag; @@ -529,6 +687,8 @@ TEST_F(ZesDiagnosticsFixture, GivenValidSysmanImpPointerWhenCallingWarmResetThen } TEST_F(ZesDiagnosticsFixture, GivenValidSysmanImpPointerWhenCallingWarmResetfromDiagnosticsThenCallSucceeds) { + DebugManagerStateRestore dbgRestore; + DebugManager.flags.VfBarResourceAllocationWa.set(false); pLinuxSysmanImp->gtDevicePath = "/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:01.0/0000:8c:00.0"; pLinuxSysmanImp->openFunction = openMockDiag; pLinuxSysmanImp->closeFunction = closeMockDiag; @@ -540,6 +700,8 @@ TEST_F(ZesDiagnosticsFixture, GivenValidSysmanImpPointerWhenCallingWarmResetfrom } TEST_F(ZesDiagnosticsFixture, GivenValidSysmanImpPointerWhenCallingWarmResetfromHBMDiagnosticsThenCallSucceeds) { + DebugManagerStateRestore dbgRestore; + DebugManager.flags.VfBarResourceAllocationWa.set(false); pLinuxSysmanImp->gtDevicePath = "/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:01.0/0000:8c:00.0"; pLinuxSysmanImp->openFunction = openMockDiag; pLinuxSysmanImp->closeFunction = closeMockDiag; @@ -554,6 +716,7 @@ TEST_F(ZesDiagnosticsFixture, GivenValidSysmanImpPointerWhenCallingWarmResetfrom TEST_F(ZesDiagnosticsFixture, GivenValidSysmanImpPointerAndDelayForPPRWhenCallingWarmResetfromHBMDiagnosticsThenCallSucceeds) { DebugManagerStateRestore dbgRestore; DebugManager.flags.DebugSetMemoryDiagnosticsDelay.set(7); + DebugManager.flags.VfBarResourceAllocationWa.set(false); pLinuxSysmanImp->gtDevicePath = "/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:01.0/0000:8c:00.0"; pLinuxSysmanImp->openFunction = openMockDiag; pLinuxSysmanImp->closeFunction = closeMockDiag; @@ -566,6 +729,8 @@ TEST_F(ZesDiagnosticsFixture, GivenValidSysmanImpPointerAndDelayForPPRWhenCallin } TEST_F(ZesDiagnosticsFixture, GivenValidSysmanImpPointerWhenCallingWarmResetAndRootPortConfigFileFailsToOpenThenCallFails) { + DebugManagerStateRestore dbgRestore; + DebugManager.flags.VfBarResourceAllocationWa.set(false); pLinuxSysmanImp->gtDevicePath = "/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:01.0/0000:8c:00.0"; pLinuxSysmanImp->openFunction = openMockDiagFail; pLinuxSysmanImp->closeFunction = closeMockDiag; @@ -576,6 +741,8 @@ TEST_F(ZesDiagnosticsFixture, GivenValidSysmanImpPointerWhenCallingWarmResetAndR } TEST_F(ZesDiagnosticsFixture, GivenValidSysmanImpPointerWhenCallingWarmResetAndRootPortConfigFileFailsToCloseThenCallFails) { + DebugManagerStateRestore dbgRestore; + DebugManager.flags.VfBarResourceAllocationWa.set(false); pLinuxSysmanImp->gtDevicePath = "/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:01.0/0000:8c:00.0"; pLinuxSysmanImp->openFunction = openMockDiag; pLinuxSysmanImp->closeFunction = closeMockDiagFail; @@ -586,6 +753,8 @@ TEST_F(ZesDiagnosticsFixture, GivenValidSysmanImpPointerWhenCallingWarmResetAndR } TEST_F(ZesDiagnosticsFixture, GivenValidSysmanImpPointerWhenCallingWarmResetAndCardbusRemoveFailsThenCallFails) { + DebugManagerStateRestore dbgRestore; + DebugManager.flags.VfBarResourceAllocationWa.set(false); pLinuxSysmanImp->gtDevicePath = "/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:01.0/0000:8c:00.0"; pLinuxSysmanImp->openFunction = openMockDiag; pLinuxSysmanImp->closeFunction = closeMockDiag; @@ -597,6 +766,8 @@ TEST_F(ZesDiagnosticsFixture, GivenValidSysmanImpPointerWhenCallingWarmResetAndC } TEST_F(ZesDiagnosticsFixture, GivenValidSysmanImpPointerWhenCallingWarmResetAndRootPortRescanFailsThenCallFails) { + DebugManagerStateRestore dbgRestore; + DebugManager.flags.VfBarResourceAllocationWa.set(false); pLinuxSysmanImp->gtDevicePath = "/sys/devices/pci0000:89/0000:89:02.0/0000:8a:00.0/0000:8b:01.0/0000:8c:00.0"; pLinuxSysmanImp->openFunction = openMockDiag; pLinuxSysmanImp->closeFunction = closeMockDiag; diff --git a/level_zero/tools/test/unit_tests/sources/sysman/global_operations/linux/test_zes_global_operations.cpp b/level_zero/tools/test/unit_tests/sources/sysman/global_operations/linux/test_zes_global_operations.cpp index 779e423d97..aff011ca98 100644 --- a/level_zero/tools/test/unit_tests/sources/sysman/global_operations/linux/test_zes_global_operations.cpp +++ b/level_zero/tools/test/unit_tests/sources/sysman/global_operations/linux/test_zes_global_operations.cpp @@ -755,6 +755,8 @@ TEST_F(SysmanGlobalOperationsFixture, GivenGemCreateIoctlFailsWithEINVALWhenCall } TEST_F(SysmanGlobalOperationsFixture, GivenForceTrueWhenCallingResetThenSuccessIsReturned) { + DebugManagerStateRestore dbgRestore; + DebugManager.flags.VfBarResourceAllocationWa.set(false); initGlobalOps(); static_cast(pGlobalOperationsImp->pOsGlobalOperations)->pLinuxSysmanImp = pMockGlobalOpsLinuxSysmanImp.get(); static_cast(pGlobalOperationsImp->pOsGlobalOperations)->pLinuxSysmanImp->pDevice = pLinuxSysmanImp->getDeviceHandle(); diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index 40f2f9dffe..89ac1e37dc 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -548,3 +548,4 @@ DECLARE_DEBUG_VARIABLE(bool, BinaryCacheTrace, false, "enable cl_cache to produc /* WORKAROUND FLAGS */ DECLARE_DEBUG_VARIABLE(int32_t, ForceDummyBlitWa, -1, "-1: default, 0: disabled, 1: enabled, Forces a workaround with dummy blits, driver adds an extra blit before command MI_ARB_CHECK on bcs") +DECLARE_DEBUG_VARIABLE(bool, VfBarResourceAllocationWa, true, "Enables/disables WA for resizing VF BAR to 2GB on Warm Reset.") diff --git a/shared/test/common/test_files/igdrcl.config b/shared/test/common/test_files/igdrcl.config index f675122c65..f031159716 100644 --- a/shared/test/common/test_files/igdrcl.config +++ b/shared/test/common/test_files/igdrcl.config @@ -528,4 +528,5 @@ EnableCpuCacheForResources = 1 OverrideHwIpVersion = -1 PrintGlobalTimestampInNs = 0 EnableDeviceStateVerification = -1 +VfBarResourceAllocationWa = 1 # Please don't edit below this line