From e768d0ed5ee70677840a7eadf3cea1bc7a31e776 Mon Sep 17 00:00:00 2001 From: "Milczarek, Slawomir" Date: Fri, 1 Jul 2022 15:06:12 +0000 Subject: [PATCH] Add regkey to enable support for concurrent access in usm capabilities The new regkey is EnableUsmConcurrentAccessSupport that takes a bitmask with usm capabilities to enable concurrent access for (bit0: host, bit1: device, bit2: shared single-device, bit3: shared cross-device, bit4: shared system) Related-To: NEO-6733 Signed-off-by: Milczarek, Slawomir --- .../xe_hpc_core/pvc/get_device_info_pvc.cpp | 20 ++++++++ .../debug_settings/debug_variables_base.inl | 1 + shared/source/os_interface/hw_info_config.h | 9 ++++ shared/source/os_interface/hw_info_config.inl | 38 ++++++++++++-- .../hw_info_config_xehp_and_later.inl | 8 ++- shared/test/common/test_files/igdrcl.config | 1 + .../os_interface/hw_info_config_tests.cpp | 51 +++++++++++++++++++ 7 files changed, 124 insertions(+), 4 deletions(-) diff --git a/opencl/test/unit_test/xe_hpc_core/pvc/get_device_info_pvc.cpp b/opencl/test/unit_test/xe_hpc_core/pvc/get_device_info_pvc.cpp index 19be07771e..56680b19cf 100644 --- a/opencl/test/unit_test/xe_hpc_core/pvc/get_device_info_pvc.cpp +++ b/opencl/test/unit_test/xe_hpc_core/pvc/get_device_info_pvc.cpp @@ -5,6 +5,7 @@ * */ +#include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/hw_test.h" #include "opencl/test/unit_test/fixtures/device_info_fixture.h" @@ -12,6 +13,7 @@ using namespace NEO; HWTEST_EXCLUDE_PRODUCT(GetDeviceInfoMemCapabilitiesTest, GivenValidParametersWhenGetDeviceInfoIsCalledForXE_HP_COREThenClSuccessIsReturned, IGFX_XE_HPC_CORE); +HWTEST_EXCLUDE_PRODUCT(GetDeviceInfoMemCapabilitiesTest, GivenEnableUsmConcurrentAccessSupportWhenGetDeviceInfoIsCalledForXE_HP_COREThenClSuccessIsReturned, IGFX_XE_HPC_CORE); PVCTEST_F(GetDeviceInfoMemCapabilitiesTest, GivenValidParametersWhenGetDeviceInfoIsCalledForPVCThenClSuccessIsReturned) { std::vector params = { @@ -27,3 +29,21 @@ PVCTEST_F(GetDeviceInfoMemCapabilitiesTest, GivenValidParametersWhenGetDeviceInf check(params); } + +PVCTEST_F(GetDeviceInfoMemCapabilitiesTest, GivenEnableUsmConcurrentAccessSupportWhenGetDeviceInfoIsCalledForPVCThenClSuccessIsReturned) { + DebugManagerStateRestore restorer; + DebugManager.flags.EnableUsmConcurrentAccessSupport.set(0b1110); + + std::vector params = { + {CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL, CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL}, + {CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL, + (CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL)}, + {CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL, + (CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL)}, + {CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL, + (CL_UNIFIED_SHARED_MEMORY_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS_INTEL | CL_UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS_INTEL)}, + {CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL, + 0}}; + + check(params); +} diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index 8729ae1007..ac15be58a1 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -368,6 +368,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, PowerSavingMode, 0, "0: default 1: enable. Whene DECLARE_DEBUG_VARIABLE(int32_t, CsrDispatchMode, 0, "Chooses DispatchMode for Csr") DECLARE_DEBUG_VARIABLE(int32_t, RenderCompressedImagesEnabled, -1, "-1: default, 0: disabled, 1: enabled") DECLARE_DEBUG_VARIABLE(int32_t, RenderCompressedBuffersEnabled, -1, "-1: default, 0: disabled, 1: enabled") +DECLARE_DEBUG_VARIABLE(int32_t, EnableUsmConcurrentAccessSupport, 0, "0: default, >0: bitmask with usm capabilities to enable concurrent access for (bit0: host, bit1: device, bit2: shared single-device, bit3: shared cross-device, bit4: shared system)") DECLARE_DEBUG_VARIABLE(int32_t, EnableSharedSystemUsmSupport, -1, "-1: default, 0: shared system memory disabled, 1: shared system memory enabled") DECLARE_DEBUG_VARIABLE(int32_t, EnablePassInlineData, -1, "-1: default, 0: Do not allow to pass inline data 1: Enable passing of inline data") DECLARE_DEBUG_VARIABLE(int32_t, ForceFineGrainedSVMSupport, -1, "-1: default, 0: Do not report Fine Grained SVM capabilities 1: Report SVM Fine Grained capabilities if device supports SVM") diff --git a/shared/source/os_interface/hw_info_config.h b/shared/source/os_interface/hw_info_config.h index 77a1760046..6e15933d9e 100644 --- a/shared/source/os_interface/hw_info_config.h +++ b/shared/source/os_interface/hw_info_config.h @@ -31,6 +31,14 @@ enum class DriverModelType; extern HwInfoConfig *hwInfoConfigFactory[IGFX_MAX_PRODUCT]; +enum class UsmAccessCapabilities { + Host = 0, + Device, + SharedSingleDevice, + SharedCrossDevice, + SharedSystemCrossDevice +}; + class HwInfoConfig { public: static HwInfoConfig *get(PRODUCT_FAMILY product) { @@ -230,6 +238,7 @@ class HwInfoConfigHw : public HwInfoConfig { void enableCompression(HardwareInfo *hwInfo); void enableBlitterOperationsSupport(HardwareInfo *hwInfo); + bool getConcurrentAccessMemCapabilitiesSupported(UsmAccessCapabilities capability); uint64_t getHostMemCapabilitiesValue(); bool getHostMemCapabilitiesSupported(const HardwareInfo *hwInfo); LocalMemoryAccessMode getDefaultLocalMemoryAccessMode(const HardwareInfo &hwInfo) const override; diff --git a/shared/source/os_interface/hw_info_config.inl b/shared/source/os_interface/hw_info_config.inl index e140718ad8..f29166a28b 100644 --- a/shared/source/os_interface/hw_info_config.inl +++ b/shared/source/os_interface/hw_info_config.inl @@ -16,6 +16,8 @@ #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/hw_info_config.h" +#include + namespace NEO { template @@ -59,12 +61,24 @@ void HwInfoConfigHw::enableBlitterOperationsSupport(HardwareInfo *hw template uint64_t HwInfoConfigHw::getDeviceMemCapabilities() { - return (UNIFIED_SHARED_MEMORY_ACCESS | UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS); + uint64_t capabilities = UNIFIED_SHARED_MEMORY_ACCESS | UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS; + + if (getConcurrentAccessMemCapabilitiesSupported(UsmAccessCapabilities::Device)) { + capabilities |= UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS | UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS; + } + + return capabilities; } template uint64_t HwInfoConfigHw::getSingleDeviceSharedMemCapabilities() { - return (UNIFIED_SHARED_MEMORY_ACCESS | UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS); + uint64_t capabilities = UNIFIED_SHARED_MEMORY_ACCESS | UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS; + + if (getConcurrentAccessMemCapabilitiesSupported(UsmAccessCapabilities::SharedSingleDevice)) { + capabilities |= UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS | UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS; + } + + return capabilities; } template @@ -80,7 +94,13 @@ uint64_t HwInfoConfigHw::getHostMemCapabilities(const HardwareInfo * supported = !!DebugManager.flags.EnableHostUsmSupport.get(); } - return (supported ? getHostMemCapabilitiesValue() : 0); + uint64_t capabilities = getHostMemCapabilitiesValue(); + + if (getConcurrentAccessMemCapabilitiesSupported(UsmAccessCapabilities::Host)) { + capabilities |= UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS | UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS; + } + + return (supported ? capabilities : 0); } template @@ -94,6 +114,18 @@ uint64_t HwInfoConfigHw::getSharedSystemMemCapabilities(const Hardwa return (supported ? (UNIFIED_SHARED_MEMORY_ACCESS | UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS | UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS | UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS) : 0); } +template +bool HwInfoConfigHw::getConcurrentAccessMemCapabilitiesSupported(UsmAccessCapabilities capability) { + auto supported = false; + + if (DebugManager.flags.EnableUsmConcurrentAccessSupport.get() > 0) { + auto capabilityBitset = std::bitset<32>(DebugManager.flags.EnableUsmConcurrentAccessSupport.get()); + supported = capabilityBitset.test(static_cast(capability)); + } + + return supported; +} + template uint32_t HwInfoConfigHw::getDeviceMemoryMaxClkRate(const HardwareInfo &hwInfo, const OSInterface *osIface, uint32_t subDeviceIndex) { return 0u; diff --git a/shared/source/os_interface/hw_info_config_xehp_and_later.inl b/shared/source/os_interface/hw_info_config_xehp_and_later.inl index 34e970aede..8762429cbc 100644 --- a/shared/source/os_interface/hw_info_config_xehp_and_later.inl +++ b/shared/source/os_interface/hw_info_config_xehp_and_later.inl @@ -15,7 +15,13 @@ uint64_t HwInfoConfigHw::getHostMemCapabilitiesValue() { template uint64_t HwInfoConfigHw::getCrossDeviceSharedMemCapabilities() { - return (UNIFIED_SHARED_MEMORY_ACCESS | UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS); + uint64_t capabilities = UNIFIED_SHARED_MEMORY_ACCESS | UNIFIED_SHARED_MEMORY_ATOMIC_ACCESS; + + if (getConcurrentAccessMemCapabilitiesSupported(UsmAccessCapabilities::SharedCrossDevice)) { + capabilities |= UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS | UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS; + } + + return capabilities; } template diff --git a/shared/test/common/test_files/igdrcl.config b/shared/test/common/test_files/igdrcl.config index cf6f1113df..fb4c1291fd 100644 --- a/shared/test/common/test_files/igdrcl.config +++ b/shared/test/common/test_files/igdrcl.config @@ -165,6 +165,7 @@ CsrDispatchMode = 0 OverrideDefaultFP64Settings = -1 RenderCompressedImagesEnabled = -1 RenderCompressedBuffersEnabled = -1 +EnableUsmConcurrentAccessSupport = 0 EnableSharedSystemUsmSupport = -1 EnablePassInlineData = -1 ForceFineGrainedSVMSupport = -1 diff --git a/shared/test/unit_test/os_interface/hw_info_config_tests.cpp b/shared/test/unit_test/os_interface/hw_info_config_tests.cpp index e53226f9cf..4b3b63db99 100644 --- a/shared/test/unit_test/os_interface/hw_info_config_tests.cpp +++ b/shared/test/unit_test/os_interface/hw_info_config_tests.cpp @@ -52,6 +52,57 @@ HWTEST_F(HwInfoConfigTest, givenHwInfoConfigWhenGettingSharedSystemMemCapabiliti } } +HWTEST_F(HwInfoConfigTest, givenHwInfoConfigWhenGettingMemoryCapabilitiesThenCorrectValueIsReturned) { + DebugManagerStateRestore restore; + + auto hwInfoConfig = HwInfoConfig::get(pInHwInfo.platform.eProductFamily); + + for (auto capabilityBitmask : {0, 0b0001, 0b0010, 0b0100, 0b1000, 0b1111, 0b10000}) { + DebugManager.flags.EnableUsmConcurrentAccessSupport.set(capabilityBitmask); + std::bitset<32> capabilityBitset(capabilityBitmask); + + auto hostMemCapabilities = hwInfoConfig->getHostMemCapabilities(&pInHwInfo); + if (hostMemCapabilities > 0) { + if (capabilityBitset.test(static_cast(UsmAccessCapabilities::Host))) { + EXPECT_TRUE(UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS & hostMemCapabilities); + EXPECT_TRUE(UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS & hostMemCapabilities); + } + } + + auto deviceMemCapabilities = hwInfoConfig->getDeviceMemCapabilities(); + if (deviceMemCapabilities > 0) { + if (capabilityBitset.test(static_cast(UsmAccessCapabilities::Device))) { + EXPECT_TRUE(UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS & deviceMemCapabilities); + EXPECT_TRUE(UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS & deviceMemCapabilities); + } + } + + auto singleDeviceSharedMemCapabilities = hwInfoConfig->getSingleDeviceSharedMemCapabilities(); + if (singleDeviceSharedMemCapabilities > 0) { + if (capabilityBitset.test(static_cast(UsmAccessCapabilities::SharedSingleDevice))) { + EXPECT_TRUE(UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS & singleDeviceSharedMemCapabilities); + EXPECT_TRUE(UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS & singleDeviceSharedMemCapabilities); + } + } + + auto crossDeviceSharedMemCapabilities = hwInfoConfig->getCrossDeviceSharedMemCapabilities(); + if (crossDeviceSharedMemCapabilities > 0) { + if (capabilityBitset.test(static_cast(UsmAccessCapabilities::SharedCrossDevice))) { + EXPECT_TRUE(UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS & crossDeviceSharedMemCapabilities); + EXPECT_TRUE(UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS & crossDeviceSharedMemCapabilities); + } + } + + auto sharedSystemMemCapabilities = hwInfoConfig->getSharedSystemMemCapabilities(&pInHwInfo); + if (sharedSystemMemCapabilities > 0) { + if (capabilityBitset.test(static_cast(UsmAccessCapabilities::SharedSystemCrossDevice))) { + EXPECT_TRUE(UNIFIED_SHARED_MEMORY_CONCURRENT_ACCESS & sharedSystemMemCapabilities); + EXPECT_TRUE(UNIFIED_SHARED_MEMORY_CONCURRENT_ATOMIC_ACCESS & sharedSystemMemCapabilities); + } + } + } +} + TEST_F(HwInfoConfigTest, WhenParsingHwInfoConfigThenCorrectValuesAreReturned) { uint64_t hwInfoConfig = 0x0;