diff --git a/level_zero/core/source/device/device_imp.cpp b/level_zero/core/source/device/device_imp.cpp index 5ed5def4df..7755d8ad5a 100644 --- a/level_zero/core/source/device/device_imp.cpp +++ b/level_zero/core/source/device/device_imp.cpp @@ -29,6 +29,7 @@ #include "shared/source/helpers/topology_map.h" #include "shared/source/indirect_heap/indirect_heap.h" #include "shared/source/kernel/grf_config.h" +#include "shared/source/kernel/kernel_properties.h" #include "shared/source/memory_manager/allocation_properties.h" #include "shared/source/memory_manager/allocations_list.h" #include "shared/source/memory_manager/memory_manager.h" @@ -796,9 +797,16 @@ ze_result_t DeviceImp::getKernelProperties(ze_device_module_properties_t *pKerne if (extendedProperties->stype == ZE_STRUCTURE_TYPE_FLOAT_ATOMIC_EXT_PROPERTIES) { ze_float_atomic_ext_properties_t *floatProperties = reinterpret_cast(extendedProperties); - productHelper.getKernelExtendedProperties(&floatProperties->fp16Flags, - &floatProperties->fp32Flags, - &floatProperties->fp64Flags); + productHelper.getKernelExtendedProperties(hardwareInfo, + floatProperties->fp16Flags, + floatProperties->fp32Flags, + floatProperties->fp64Flags); + static_assert(ZE_DEVICE_FP_ATOMIC_EXT_FLAG_GLOBAL_LOAD_STORE == FpAtomicExtFlags::globalLoadStore, "Mismatch between internal and API - specific capabilities."); + static_assert(ZE_DEVICE_FP_ATOMIC_EXT_FLAG_GLOBAL_ADD == FpAtomicExtFlags::globalAdd, "Mismatch between internal and API - specific capabilities."); + static_assert(ZE_DEVICE_FP_ATOMIC_EXT_FLAG_GLOBAL_MIN_MAX == FpAtomicExtFlags::globalMinMax, "Mismatch between internal and API - specific capabilities."); + static_assert(ZE_DEVICE_FP_ATOMIC_EXT_FLAG_LOCAL_LOAD_STORE == FpAtomicExtFlags::localLoadStore, "Mismatch between internal and API - specific capabilities."); + static_assert(ZE_DEVICE_FP_ATOMIC_EXT_FLAG_LOCAL_ADD == FpAtomicExtFlags::localAdd, "Mismatch between internal and API - specific capabilities."); + static_assert(ZE_DEVICE_FP_ATOMIC_EXT_FLAG_LOCAL_MIN_MAX == FpAtomicExtFlags::localMinMax, "Mismatch between internal and API - specific capabilities."); } else if (extendedProperties->stype == ZE_STRUCTURE_TYPE_SCHEDULING_HINT_EXP_PROPERTIES) { ze_scheduling_hint_exp_properties_t *hintProperties = reinterpret_cast(extendedProperties); diff --git a/shared/source/kernel/kernel_properties.h b/shared/source/kernel/kernel_properties.h index c7bc42ad31..1e05fd943b 100644 --- a/shared/source/kernel/kernel_properties.h +++ b/shared/source/kernel/kernel_properties.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2023 Intel Corporation + * Copyright (C) 2021-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -15,4 +15,8 @@ inline constexpr uint64_t globalMinMax = 1 << 2; // Supports atomic min and m inline constexpr uint64_t localLoadStore = 1 << 16; // Supports atomic load, store, and exchange inline constexpr uint64_t localAdd = 1 << 17; // Supports atomic add and subtract inline constexpr uint64_t localMinMax = 1 << 18; // Supports atomic min and max + +inline constexpr uint32_t loadStoreAtomicCaps = (0u | FpAtomicExtFlags::globalLoadStore | FpAtomicExtFlags::localLoadStore); +inline constexpr uint32_t minMaxAtomicCaps = (0u | FpAtomicExtFlags::globalMinMax | FpAtomicExtFlags::localMinMax); +inline constexpr uint32_t addAtomicCaps = (0u | FpAtomicExtFlags::globalAdd | FpAtomicExtFlags::localAdd); } // namespace FpAtomicExtFlags diff --git a/shared/source/os_interface/product_helper.h b/shared/source/os_interface/product_helper.h index 617d12e21b..09e553885c 100644 --- a/shared/source/os_interface/product_helper.h +++ b/shared/source/os_interface/product_helper.h @@ -80,7 +80,10 @@ class ProductHelper { virtual uint64_t getSingleDeviceSharedMemCapabilities() const = 0; virtual uint64_t getCrossDeviceSharedMemCapabilities() const = 0; virtual uint64_t getSharedSystemMemCapabilities(const HardwareInfo *hwInfo) const = 0; - virtual void getKernelExtendedProperties(uint32_t *fp16, uint32_t *fp32, uint32_t *fp64) const = 0; + virtual void getKernelFp16AtomicCapabilities(const HardwareInfo &hwInfo, uint32_t &fp16) const = 0; + virtual void getKernelFp32AtomicCapabilities(const HardwareInfo &hwInfo, uint32_t &fp32) const = 0; + virtual void getKernelFp64AtomicCapabilities(const HardwareInfo &hwInfo, uint32_t &fp64) const = 0; + virtual void getKernelExtendedProperties(const HardwareInfo &hwInfo, uint32_t &fp16, uint32_t &fp32, uint32_t &fp64) const = 0; virtual std::vector getKernelSupportedThreadArbitrationPolicies() const = 0; virtual uint32_t getDeviceMemoryMaxClkRate(const HardwareInfo &hwInfo, const OSInterface *osIface, uint32_t subDeviceIndex) const = 0; virtual uint64_t getDeviceMemoryPhysicalSizeInBytes(const OSInterface *osIface, uint32_t subDeviceIndex) const = 0; diff --git a/shared/source/os_interface/product_helper.inl b/shared/source/os_interface/product_helper.inl index f24430e546..9a17bb8fbd 100644 --- a/shared/source/os_interface/product_helper.inl +++ b/shared/source/os_interface/product_helper.inl @@ -38,10 +38,25 @@ int ProductHelperHw::configureHardwareCustom(HardwareInfo *hwInfo, O } template -void ProductHelperHw::getKernelExtendedProperties(uint32_t *fp16, uint32_t *fp32, uint32_t *fp64) const { - *fp16 = (0u | FpAtomicExtFlags::globalMinMax | FpAtomicExtFlags::localMinMax | FpAtomicExtFlags::globalLoadStore | FpAtomicExtFlags::localLoadStore); - *fp32 = (0u | FpAtomicExtFlags::globalMinMax | FpAtomicExtFlags::localMinMax | FpAtomicExtFlags::globalAdd | FpAtomicExtFlags::localAdd | FpAtomicExtFlags::globalLoadStore | FpAtomicExtFlags::localLoadStore); - *fp64 = (0u | FpAtomicExtFlags::globalMinMax | FpAtomicExtFlags::localMinMax | FpAtomicExtFlags::globalAdd | FpAtomicExtFlags::localAdd | FpAtomicExtFlags::globalLoadStore | FpAtomicExtFlags::localLoadStore); +void ProductHelperHw::getKernelFp16AtomicCapabilities(const HardwareInfo &hwInfo, uint32_t &fp16) const { + fp16 = (0u | FpAtomicExtFlags::minMaxAtomicCaps | FpAtomicExtFlags::loadStoreAtomicCaps); +} + +template +void ProductHelperHw::getKernelFp32AtomicCapabilities(const HardwareInfo &hwInfo, uint32_t &fp32) const { + fp32 = (0u | FpAtomicExtFlags::minMaxAtomicCaps | FpAtomicExtFlags::loadStoreAtomicCaps | FpAtomicExtFlags::addAtomicCaps); +} + +template +void ProductHelperHw::getKernelFp64AtomicCapabilities(const HardwareInfo &hwInfo, uint32_t &fp64) const { + fp64 = (0u | FpAtomicExtFlags::minMaxAtomicCaps | FpAtomicExtFlags::loadStoreAtomicCaps | FpAtomicExtFlags::addAtomicCaps); +} + +template +void ProductHelperHw::getKernelExtendedProperties(const HardwareInfo &hwInfo, uint32_t &fp16, uint32_t &fp32, uint32_t &fp64) const { + getKernelFp16AtomicCapabilities(hwInfo, fp16); + getKernelFp32AtomicCapabilities(hwInfo, fp32); + getKernelFp64AtomicCapabilities(hwInfo, fp64); } template diff --git a/shared/source/os_interface/product_helper_hw.h b/shared/source/os_interface/product_helper_hw.h index 8d38599199..4e76df7b05 100644 --- a/shared/source/os_interface/product_helper_hw.h +++ b/shared/source/os_interface/product_helper_hw.h @@ -27,7 +27,10 @@ class ProductHelperHw : public ProductHelper { uint64_t getSingleDeviceSharedMemCapabilities() const override; uint64_t getCrossDeviceSharedMemCapabilities() const override; uint64_t getSharedSystemMemCapabilities(const HardwareInfo *hwInfo) const override; - void getKernelExtendedProperties(uint32_t *fp16, uint32_t *fp32, uint32_t *fp64) const override; + void getKernelFp16AtomicCapabilities(const HardwareInfo &hwInfo, uint32_t &fp16) const override; + void getKernelFp32AtomicCapabilities(const HardwareInfo &hwInfo, uint32_t &fp32) const override; + void getKernelFp64AtomicCapabilities(const HardwareInfo &hwInfo, uint32_t &fp64) const override; + void getKernelExtendedProperties(const HardwareInfo &hwInfo, uint32_t &fp16, uint32_t &fp32, uint32_t &fp64) const override; std::vector getKernelSupportedThreadArbitrationPolicies() const override; uint32_t getDeviceMemoryMaxClkRate(const HardwareInfo &hwInfo, const OSInterface *osIface, uint32_t subDeviceIndex) const override; uint64_t getDeviceMemoryPhysicalSizeInBytes(const OSInterface *osIface, uint32_t subDeviceIndex) const override; diff --git a/shared/test/common/mocks/mock_product_helper.cpp b/shared/test/common/mocks/mock_product_helper.cpp index d5e0eaab2c..d2831dd8e7 100644 --- a/shared/test/common/mocks/mock_product_helper.cpp +++ b/shared/test/common/mocks/mock_product_helper.cpp @@ -65,7 +65,19 @@ uint64_t ProductHelperHw::getSharedSystemMemCapabilities(const Har } template <> -void ProductHelperHw::getKernelExtendedProperties(uint32_t *fp16, uint32_t *fp32, uint32_t *fp64) const { +void ProductHelperHw::getKernelFp16AtomicCapabilities(const HardwareInfo &hwInfo, uint32_t &fp16) const { +} + +template <> +void ProductHelperHw::getKernelFp32AtomicCapabilities(const HardwareInfo &hwInfo, uint32_t &fp32) const { +} + +template <> +void ProductHelperHw::getKernelFp64AtomicCapabilities(const HardwareInfo &hwInfo, uint32_t &fp64) const { +} + +template <> +void ProductHelperHw::getKernelExtendedProperties(const HardwareInfo &hwInfo, uint32_t &fp16, uint32_t &fp32, uint32_t &fp64) const { } template <>