Add support for cl_ext_float_atomics

Resolves: NEO-6596

Signed-off-by: Kacper Kasper <kacper.k.kasper@intel.com>
This commit is contained in:
Kacper Kasper
2023-02-17 19:00:29 +00:00
committed by Compute-Runtime-Automation
parent 6c2f5df6c3
commit da22e0aac9
25 changed files with 232 additions and 45 deletions

View File

@@ -40,6 +40,9 @@ static constexpr cl_device_fp_config defaultFpFlags = static_cast<cl_device_fp_c
CL_FP_DENORM |
CL_FP_FMA);
static constexpr cl_device_fp_atomic_capabilities_ext defaultFpAtomicCapabilities = static_cast<cl_device_fp_atomic_capabilities_ext>(CL_DEVICE_GLOBAL_FP_ATOMIC_LOAD_STORE_EXT |
CL_DEVICE_LOCAL_FP_ATOMIC_LOAD_STORE_EXT);
void ClDevice::setupFp64Flags() {
auto &hwInfo = getHardwareInfo();
@@ -167,6 +170,27 @@ void ClDevice::initializeCaps() {
}
}
if (enabledClVersion >= 20) {
deviceExtensions += "cl_ext_float_atomics ";
deviceInfo.singleFpAtomicCapabilities = defaultFpAtomicCapabilities;
deviceInfo.halfFpAtomicCapabilities = 0;
if (ocl21FeaturesEnabled && hwInfo.capabilityTable.supportsFloatAtomics) {
deviceInfo.singleFpAtomicCapabilities |= static_cast<cl_device_fp_atomic_capabilities_ext>(
CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT | CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT | CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT | CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT);
deviceInfo.halfFpAtomicCapabilities |= static_cast<cl_device_fp_atomic_capabilities_ext>(
CL_DEVICE_GLOBAL_FP_ATOMIC_LOAD_STORE_EXT | CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT | CL_DEVICE_LOCAL_FP_ATOMIC_LOAD_STORE_EXT | CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT);
}
const cl_device_fp_atomic_capabilities_ext baseFP64AtomicCapabilities = hwInfo.capabilityTable.ftrSupportsInteger64BitAtomics || hwInfo.capabilityTable.supportsFloatAtomics ? defaultFpAtomicCapabilities : 0;
const cl_device_fp_atomic_capabilities_ext optionalFP64AtomicCapabilities = ocl21FeaturesEnabled && hwInfo.capabilityTable.supportsFloatAtomics ? static_cast<cl_device_fp_atomic_capabilities_ext>(
CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT | CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT |
CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT | CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)
: 0;
deviceInfo.doubleFpAtomicCapabilities = deviceInfo.doubleFpConfig != 0u ? baseFP64AtomicCapabilities | optionalFP64AtomicCapabilities : 0;
}
if (DebugManager.flags.EnableNV12.get() && hwInfo.capabilityTable.supportsImages) {
deviceExtensions += "cl_intel_planar_yuv ";
deviceInfo.nv12Extension = true;

View File

@@ -86,6 +86,7 @@ cl_int ClDevice::getDeviceInfo(cl_device_info paramName,
case CL_DEVICE_COMPILER_AVAILABLE: getCap<CL_DEVICE_COMPILER_AVAILABLE >(src, srcSize, retSize); break;
case CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL: getCap<CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL >(src, srcSize, retSize); break;
case CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL: getCap<CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL >(src, srcSize, retSize); break;
case CL_DEVICE_DOUBLE_FP_ATOMIC_CAPABILITIES_EXT: getCap<CL_DEVICE_DOUBLE_FP_ATOMIC_CAPABILITIES_EXT >(src, srcSize, retSize); break;
case CL_DEVICE_DOUBLE_FP_CONFIG: getCap<CL_DEVICE_DOUBLE_FP_CONFIG >(src, srcSize, retSize); break;
case CL_DEVICE_DRIVER_VERSION_INTEL: getCap<CL_DEVICE_DRIVER_VERSION_INTEL >(src, srcSize, retSize); break;
case CL_DEVICE_ENDIAN_LITTLE: getCap<CL_DEVICE_ENDIAN_LITTLE >(src, srcSize, retSize); break;
@@ -98,6 +99,7 @@ cl_int ClDevice::getDeviceInfo(cl_device_info paramName,
case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE: getCap<CL_DEVICE_GLOBAL_MEM_CACHE_TYPE >(src, srcSize, retSize); break;
case CL_DEVICE_GLOBAL_MEM_SIZE: getCap<CL_DEVICE_GLOBAL_MEM_SIZE >(src, srcSize, retSize); break;
case CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE: getCap<CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE >(src, srcSize, retSize); break;
case CL_DEVICE_HALF_FP_ATOMIC_CAPABILITIES_EXT: getCap<CL_DEVICE_HALF_FP_ATOMIC_CAPABILITIES_EXT >(src, srcSize, retSize); break;
case CL_DEVICE_HALF_FP_CONFIG: getCap<CL_DEVICE_HALF_FP_CONFIG >(src, srcSize, retSize); break;
case CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL: getCap<CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL >(src, srcSize, retSize); break;
case CL_DEVICE_HOST_UNIFIED_MEMORY: getCap<CL_DEVICE_HOST_UNIFIED_MEMORY >(src, srcSize, retSize); break;
@@ -165,6 +167,7 @@ cl_int ClDevice::getDeviceInfo(cl_device_info paramName,
case CL_DEVICE_QUEUE_ON_HOST_PROPERTIES: getCap<CL_DEVICE_QUEUE_ON_HOST_PROPERTIES >(src, srcSize, retSize); break;
case CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL: getCap<CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL >(src, srcSize, retSize); break;
case CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL: getCap<CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL >(src, srcSize, retSize); break;
case CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT: getCap<CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT >(src, srcSize, retSize); break;
case CL_DEVICE_SINGLE_FP_CONFIG: getCap<CL_DEVICE_SINGLE_FP_CONFIG >(src, srcSize, retSize); break;
case CL_DEVICE_SLICE_COUNT_INTEL: getCap<CL_DEVICE_SLICE_COUNT_INTEL >(src, srcSize, retSize); break;
case CL_DEVICE_SPIR_VERSIONS: getStr<CL_DEVICE_SPIR_VERSIONS >(src, srcSize, retSize); break;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2021 Intel Corporation
* Copyright (C) 2020-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -76,6 +76,9 @@ struct ClDeviceInfo {
cl_bool independentForwardProgress;
cl_device_atomic_capabilities atomicMemoryCapabilities;
cl_device_atomic_capabilities atomicFenceCapabilities;
cl_device_fp_atomic_capabilities_ext singleFpAtomicCapabilities;
cl_device_fp_atomic_capabilities_ext halfFpAtomicCapabilities;
cl_device_fp_atomic_capabilities_ext doubleFpAtomicCapabilities;
cl_bool nonUniformWorkGroupSupport;
cl_bool workGroupCollectiveFunctionsSupport;
cl_bool genericAddressSpaceSupport;

View File

@@ -88,6 +88,7 @@ template<> struct Map<CL_DEVICE_COMPILER_AVAILABLE > :
template<> struct Map<CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL > : public ClMapBase<CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL, uint64_t, &ClDeviceInfo::crossDeviceSharedMemCapabilities> {};
template<> struct Map<CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES > : public ClMapBase<CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES, uint64_t, &ClDeviceInfo::deviceEnqueueSupport> {};
template<> struct Map<CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL > : public ClMapBase<CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL, uint64_t, &ClDeviceInfo::deviceMemCapabilities> {};
template<> struct Map<CL_DEVICE_DOUBLE_FP_ATOMIC_CAPABILITIES_EXT > : public ClMapBase<CL_DEVICE_DOUBLE_FP_ATOMIC_CAPABILITIES_EXT, uint64_t, &ClDeviceInfo::doubleFpAtomicCapabilities> {};
template<> struct Map<CL_DEVICE_DOUBLE_FP_CONFIG > : public ClMapBase<CL_DEVICE_DOUBLE_FP_CONFIG, uint64_t, &ClDeviceInfo::doubleFpConfig> {};
template<> struct Map<CL_DEVICE_DRIVER_VERSION_INTEL > : public ClMapBase<CL_DEVICE_DRIVER_VERSION_INTEL, uint32_t, &ClDeviceInfo::internalDriverVersion> {};
template<> struct Map<CL_DEVICE_ENDIAN_LITTLE > : public ClMapBase<CL_DEVICE_ENDIAN_LITTLE, uint32_t, &ClDeviceInfo::endianLittle> {};
@@ -97,6 +98,7 @@ template<> struct Map<CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT > :
template<> struct Map<CL_DEVICE_GLOBAL_MEM_CACHE_SIZE > : public ClMapBase<CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, uint64_t, &ClDeviceInfo::globalMemCacheSize> {};
template<> struct Map<CL_DEVICE_GLOBAL_MEM_CACHE_TYPE > : public ClMapBase<CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, uint32_t, &ClDeviceInfo::globalMemCacheType> {};
template<> struct Map<CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE > : public ClMapBase<CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE, size_t, &ClDeviceInfo::globalVariablePreferredTotalSize> {};
template<> struct Map<CL_DEVICE_HALF_FP_ATOMIC_CAPABILITIES_EXT > : public ClMapBase<CL_DEVICE_HALF_FP_ATOMIC_CAPABILITIES_EXT, uint64_t, &ClDeviceInfo::halfFpAtomicCapabilities> {};
template<> struct Map<CL_DEVICE_HALF_FP_CONFIG > : public ClMapBase<CL_DEVICE_HALF_FP_CONFIG, uint64_t, &ClDeviceInfo::halfFpConfig> {};
template<> struct Map<CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL > : public ClMapBase<CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL, uint64_t, &ClDeviceInfo::hostMemCapabilities> {};
template<> struct Map<CL_DEVICE_HOST_UNIFIED_MEMORY > : public ClMapBase<CL_DEVICE_HOST_UNIFIED_MEMORY, uint32_t, &ClDeviceInfo::hostUnifiedMemory> {};
@@ -161,6 +163,7 @@ template<> struct Map<CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES > :
template<> struct Map<CL_DEVICE_QUEUE_ON_HOST_PROPERTIES > : public ClMapBase<CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, uint64_t, &ClDeviceInfo::queueOnHostProperties> {};
template<> struct Map<CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL > : public ClMapBase<CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL, uint64_t, &ClDeviceInfo::sharedSystemMemCapabilities> {};
template<> struct Map<CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL > : public ClMapBase<CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL, uint64_t, &ClDeviceInfo::singleDeviceSharedMemCapabilities> {};
template<> struct Map<CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT > : public ClMapBase<CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT, uint64_t, &ClDeviceInfo::singleFpAtomicCapabilities> {};
template<> struct Map<CL_DEVICE_SINGLE_FP_CONFIG > : public ClMapBase<CL_DEVICE_SINGLE_FP_CONFIG, uint64_t, &ClDeviceInfo::singleFpConfig> {};
template<> struct Map<CL_DEVICE_SLICE_COUNT_INTEL > : public ClMapBase<CL_DEVICE_SLICE_COUNT_INTEL, size_t, &ClDeviceInfo::maxSliceCount> {};
template<> struct Map<CL_DEVICE_SPIR_VERSIONS > : public ClMapBase<CL_DEVICE_SPIR_VERSIONS, const char *, &ClDeviceInfo::spirVersions> {};