Add support for cl_ext_float_atomics

Resolves: NEO-6596

Signed-off-by: Kacper Kasper <kacper.k.kasper@intel.com>
This commit is contained in:
Kacper Kasper 2023-02-17 19:00:29 +00:00 committed by Compute-Runtime-Automation
parent 674691d170
commit aece8195eb
25 changed files with 229 additions and 42 deletions

View File

@ -341,4 +341,32 @@ typedef cl_bitfield cl_command_queue_mdapi_properties_intel;
// cl_intel_variable_eu_thread_count
#define CL_DEVICE_EU_THREAD_COUNTS_INTEL 0x1000A // placeholder
#define CL_KERNEL_EU_THREAD_COUNT_INTEL 0x1000B // placeholder
#define CL_KERNEL_EU_THREAD_COUNT_INTEL 0x1000B // placeholder
/*************************************************
* cl_ext_float_atomics extension *
*************************************************/
#if !defined(cl_ext_float_atomics)
#define cl_ext_float_atomics 1
#define CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT 0x4231
#define CL_DEVICE_DOUBLE_FP_ATOMIC_CAPABILITIES_EXT 0x4232
#define CL_DEVICE_HALF_FP_ATOMIC_CAPABILITIES_EXT 0x4233
typedef cl_bitfield cl_device_fp_atomic_capabilities_ext;
#define CL_DEVICE_GLOBAL_FP_ATOMIC_LOAD_STORE_EXT (1 << 0)
#define CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT (1 << 1)
#define CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT (1 << 2)
/* bits 3 - 15 are currently unused */
#define CL_DEVICE_LOCAL_FP_ATOMIC_LOAD_STORE_EXT (1 << 16)
#define CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT (1 << 17)
#define CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT (1 << 18)
/* bits 19 and beyond are currently unused */
#endif

View File

@ -40,6 +40,9 @@ static constexpr cl_device_fp_config defaultFpFlags = static_cast<cl_device_fp_c
CL_FP_DENORM |
CL_FP_FMA);
static constexpr cl_device_fp_atomic_capabilities_ext defaultFpAtomicCapabilities = static_cast<cl_device_fp_atomic_capabilities_ext>(CL_DEVICE_GLOBAL_FP_ATOMIC_LOAD_STORE_EXT |
CL_DEVICE_LOCAL_FP_ATOMIC_LOAD_STORE_EXT);
void ClDevice::setupFp64Flags() {
auto &hwInfo = getHardwareInfo();
@ -167,6 +170,27 @@ void ClDevice::initializeCaps() {
}
}
if (enabledClVersion >= 20) {
deviceExtensions += "cl_ext_float_atomics ";
deviceInfo.singleFpAtomicCapabilities = defaultFpAtomicCapabilities;
deviceInfo.halfFpAtomicCapabilities = 0;
if (ocl21FeaturesEnabled && hwInfo.capabilityTable.supportsFloatAtomics) {
deviceInfo.singleFpAtomicCapabilities |= static_cast<cl_device_fp_atomic_capabilities_ext>(
CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT | CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT | CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT | CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT);
deviceInfo.halfFpAtomicCapabilities |= static_cast<cl_device_fp_atomic_capabilities_ext>(
CL_DEVICE_GLOBAL_FP_ATOMIC_LOAD_STORE_EXT | CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT | CL_DEVICE_LOCAL_FP_ATOMIC_LOAD_STORE_EXT | CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT);
}
const cl_device_fp_atomic_capabilities_ext baseFP64AtomicCapabilities = hwInfo.capabilityTable.ftrSupportsInteger64BitAtomics || hwInfo.capabilityTable.supportsFloatAtomics ? defaultFpAtomicCapabilities : 0;
const cl_device_fp_atomic_capabilities_ext optionalFP64AtomicCapabilities = ocl21FeaturesEnabled && hwInfo.capabilityTable.supportsFloatAtomics ? static_cast<cl_device_fp_atomic_capabilities_ext>(
CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT | CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT |
CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT | CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)
: 0;
deviceInfo.doubleFpAtomicCapabilities = deviceInfo.doubleFpConfig != 0u ? baseFP64AtomicCapabilities | optionalFP64AtomicCapabilities : 0;
}
if (DebugManager.flags.EnableNV12.get() && hwInfo.capabilityTable.supportsImages) {
deviceExtensions += "cl_intel_planar_yuv ";
deviceInfo.nv12Extension = true;

View File

@ -87,6 +87,7 @@ cl_int ClDevice::getDeviceInfo(cl_device_info paramName,
case CL_DEVICE_COMPILER_AVAILABLE: getCap<CL_DEVICE_COMPILER_AVAILABLE >(src, srcSize, retSize); break;
case CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL: getCap<CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL >(src, srcSize, retSize); break;
case CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL: getCap<CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL >(src, srcSize, retSize); break;
case CL_DEVICE_DOUBLE_FP_ATOMIC_CAPABILITIES_EXT: getCap<CL_DEVICE_DOUBLE_FP_ATOMIC_CAPABILITIES_EXT >(src, srcSize, retSize); break;
case CL_DEVICE_DOUBLE_FP_CONFIG: getCap<CL_DEVICE_DOUBLE_FP_CONFIG >(src, srcSize, retSize); break;
case CL_DEVICE_DRIVER_VERSION_INTEL: getCap<CL_DEVICE_DRIVER_VERSION_INTEL >(src, srcSize, retSize); break;
case CL_DEVICE_ENDIAN_LITTLE: getCap<CL_DEVICE_ENDIAN_LITTLE >(src, srcSize, retSize); break;
@ -99,6 +100,7 @@ cl_int ClDevice::getDeviceInfo(cl_device_info paramName,
case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE: getCap<CL_DEVICE_GLOBAL_MEM_CACHE_TYPE >(src, srcSize, retSize); break;
case CL_DEVICE_GLOBAL_MEM_SIZE: getCap<CL_DEVICE_GLOBAL_MEM_SIZE >(src, srcSize, retSize); break;
case CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE: getCap<CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE >(src, srcSize, retSize); break;
case CL_DEVICE_HALF_FP_ATOMIC_CAPABILITIES_EXT: getCap<CL_DEVICE_HALF_FP_ATOMIC_CAPABILITIES_EXT >(src, srcSize, retSize); break;
case CL_DEVICE_HALF_FP_CONFIG: getCap<CL_DEVICE_HALF_FP_CONFIG >(src, srcSize, retSize); break;
case CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL: getCap<CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL >(src, srcSize, retSize); break;
case CL_DEVICE_HOST_UNIFIED_MEMORY: getCap<CL_DEVICE_HOST_UNIFIED_MEMORY >(src, srcSize, retSize); break;
@ -166,6 +168,7 @@ cl_int ClDevice::getDeviceInfo(cl_device_info paramName,
case CL_DEVICE_QUEUE_ON_HOST_PROPERTIES: getCap<CL_DEVICE_QUEUE_ON_HOST_PROPERTIES >(src, srcSize, retSize); break;
case CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL: getCap<CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL >(src, srcSize, retSize); break;
case CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL: getCap<CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL >(src, srcSize, retSize); break;
case CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT: getCap<CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT >(src, srcSize, retSize); break;
case CL_DEVICE_SINGLE_FP_CONFIG: getCap<CL_DEVICE_SINGLE_FP_CONFIG >(src, srcSize, retSize); break;
case CL_DEVICE_SLICE_COUNT_INTEL: getCap<CL_DEVICE_SLICE_COUNT_INTEL >(src, srcSize, retSize); break;
case CL_DEVICE_SPIR_VERSIONS: getStr<CL_DEVICE_SPIR_VERSIONS >(src, srcSize, retSize); break;

View File

@ -76,6 +76,9 @@ struct ClDeviceInfo {
cl_bool independentForwardProgress;
cl_device_atomic_capabilities atomicMemoryCapabilities;
cl_device_atomic_capabilities atomicFenceCapabilities;
cl_device_fp_atomic_capabilities_ext singleFpAtomicCapabilities;
cl_device_fp_atomic_capabilities_ext halfFpAtomicCapabilities;
cl_device_fp_atomic_capabilities_ext doubleFpAtomicCapabilities;
cl_bool nonUniformWorkGroupSupport;
cl_bool workGroupCollectiveFunctionsSupport;
cl_bool genericAddressSpaceSupport;

View File

@ -88,6 +88,7 @@ template<> struct Map<CL_DEVICE_COMPILER_AVAILABLE > :
template<> struct Map<CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL > : public ClMapBase<CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL, uint64_t, &ClDeviceInfo::crossDeviceSharedMemCapabilities> {};
template<> struct Map<CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES > : public ClMapBase<CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES, uint64_t, &ClDeviceInfo::deviceEnqueueSupport> {};
template<> struct Map<CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL > : public ClMapBase<CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL, uint64_t, &ClDeviceInfo::deviceMemCapabilities> {};
template<> struct Map<CL_DEVICE_DOUBLE_FP_ATOMIC_CAPABILITIES_EXT > : public ClMapBase<CL_DEVICE_DOUBLE_FP_ATOMIC_CAPABILITIES_EXT, uint64_t, &ClDeviceInfo::doubleFpAtomicCapabilities> {};
template<> struct Map<CL_DEVICE_DOUBLE_FP_CONFIG > : public ClMapBase<CL_DEVICE_DOUBLE_FP_CONFIG, uint64_t, &ClDeviceInfo::doubleFpConfig> {};
template<> struct Map<CL_DEVICE_DRIVER_VERSION_INTEL > : public ClMapBase<CL_DEVICE_DRIVER_VERSION_INTEL, uint32_t, &ClDeviceInfo::internalDriverVersion> {};
template<> struct Map<CL_DEVICE_ENDIAN_LITTLE > : public ClMapBase<CL_DEVICE_ENDIAN_LITTLE, uint32_t, &ClDeviceInfo::endianLittle> {};
@ -97,6 +98,7 @@ template<> struct Map<CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT > :
template<> struct Map<CL_DEVICE_GLOBAL_MEM_CACHE_SIZE > : public ClMapBase<CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, uint64_t, &ClDeviceInfo::globalMemCacheSize> {};
template<> struct Map<CL_DEVICE_GLOBAL_MEM_CACHE_TYPE > : public ClMapBase<CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, uint32_t, &ClDeviceInfo::globalMemCacheType> {};
template<> struct Map<CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE > : public ClMapBase<CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE, size_t, &ClDeviceInfo::globalVariablePreferredTotalSize> {};
template<> struct Map<CL_DEVICE_HALF_FP_ATOMIC_CAPABILITIES_EXT > : public ClMapBase<CL_DEVICE_HALF_FP_ATOMIC_CAPABILITIES_EXT, uint64_t, &ClDeviceInfo::halfFpAtomicCapabilities> {};
template<> struct Map<CL_DEVICE_HALF_FP_CONFIG > : public ClMapBase<CL_DEVICE_HALF_FP_CONFIG, uint64_t, &ClDeviceInfo::halfFpConfig> {};
template<> struct Map<CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL > : public ClMapBase<CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL, uint64_t, &ClDeviceInfo::hostMemCapabilities> {};
template<> struct Map<CL_DEVICE_HOST_UNIFIED_MEMORY > : public ClMapBase<CL_DEVICE_HOST_UNIFIED_MEMORY, uint32_t, &ClDeviceInfo::hostUnifiedMemory> {};
@ -161,6 +163,7 @@ template<> struct Map<CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES > :
template<> struct Map<CL_DEVICE_QUEUE_ON_HOST_PROPERTIES > : public ClMapBase<CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, uint64_t, &ClDeviceInfo::queueOnHostProperties> {};
template<> struct Map<CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL > : public ClMapBase<CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL, uint64_t, &ClDeviceInfo::sharedSystemMemCapabilities> {};
template<> struct Map<CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL > : public ClMapBase<CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL, uint64_t, &ClDeviceInfo::singleDeviceSharedMemCapabilities> {};
template<> struct Map<CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT > : public ClMapBase<CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT, uint64_t, &ClDeviceInfo::singleFpAtomicCapabilities> {};
template<> struct Map<CL_DEVICE_SINGLE_FP_CONFIG > : public ClMapBase<CL_DEVICE_SINGLE_FP_CONFIG, uint64_t, &ClDeviceInfo::singleFpConfig> {};
template<> struct Map<CL_DEVICE_SLICE_COUNT_INTEL > : public ClMapBase<CL_DEVICE_SLICE_COUNT_INTEL, size_t, &ClDeviceInfo::maxSliceCount> {};
template<> struct Map<CL_DEVICE_SPIR_VERSIONS > : public ClMapBase<CL_DEVICE_SPIR_VERSIONS, const char *, &ClDeviceInfo::spirVersions> {};

View File

@ -92,6 +92,16 @@ struct DeviceGetCapsTest : public ::testing::Test {
EXPECT_STREQ("__opencl_c_program_scope_global_variables", (++openclCFeatureIterator)->name);
EXPECT_STREQ("__opencl_c_work_group_collective_functions", (++openclCFeatureIterator)->name);
EXPECT_STREQ("__opencl_c_subgroups", (++openclCFeatureIterator)->name);
if (hwInfo.capabilityTable.supportsFloatAtomics) {
EXPECT_STREQ("__opencl_c_ext_fp32_global_atomic_add", (++openclCFeatureIterator)->name);
EXPECT_STREQ("__opencl_c_ext_fp32_local_atomic_add", (++openclCFeatureIterator)->name);
EXPECT_STREQ("__opencl_c_ext_fp32_global_atomic_min_max", (++openclCFeatureIterator)->name);
EXPECT_STREQ("__opencl_c_ext_fp32_local_atomic_min_max", (++openclCFeatureIterator)->name);
EXPECT_STREQ("__opencl_c_ext_fp16_global_atomic_load_store", (++openclCFeatureIterator)->name);
EXPECT_STREQ("__opencl_c_ext_fp16_local_atomic_load_store", (++openclCFeatureIterator)->name);
EXPECT_STREQ("__opencl_c_ext_fp16_global_atomic_min_max", (++openclCFeatureIterator)->name);
EXPECT_STREQ("__opencl_c_ext_fp16_local_atomic_min_max", (++openclCFeatureIterator)->name);
}
}
if (hwInfo.capabilityTable.supportsDeviceEnqueue) {
EXPECT_STREQ("__opencl_c_device_enqueue", (++openclCFeatureIterator)->name);
@ -101,6 +111,12 @@ struct DeviceGetCapsTest : public ::testing::Test {
}
if (hwInfo.capabilityTable.ftrSupportsFP64) {
EXPECT_STREQ("__opencl_c_fp64", (++openclCFeatureIterator)->name);
if (hwInfo.capabilityTable.supportsOcl21Features && hwInfo.capabilityTable.supportsFloatAtomics) {
EXPECT_STREQ("__opencl_c_ext_fp64_global_atomic_add", (++openclCFeatureIterator)->name);
EXPECT_STREQ("__opencl_c_ext_fp64_local_atomic_add", (++openclCFeatureIterator)->name);
EXPECT_STREQ("__opencl_c_ext_fp64_global_atomic_min_max", (++openclCFeatureIterator)->name);
EXPECT_STREQ("__opencl_c_ext_fp64_local_atomic_min_max", (++openclCFeatureIterator)->name);
}
}
EXPECT_EQ(clDevice.getDeviceInfo().openclCFeatures.end(), ++openclCFeatureIterator);
@ -738,6 +754,24 @@ TEST_F(DeviceGetCapsTest, WhenCheckingFp64ThenResultIsConsistentWithHardwareCapa
}
}
TEST_F(DeviceGetCapsTest, givenOpenCLVersion20WhenCapsAreCreatedThenFloatAtomicsExtensionIsReported) {
DebugManagerStateRestore dbgRestorer;
DebugManager.flags.ForceOCLVersion.set(20);
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
const auto &caps = device->getDeviceInfo();
EXPECT_TRUE(hasSubstr(caps.deviceExtensions, std::string("cl_ext_float_atomics")));
}
TEST_F(DeviceGetCapsTest, givenOpenCLVersion12WhenCapsAreCreatedThenDeviceDoesntReportFloatAtomicsExtension) {
DebugManagerStateRestore dbgRestorer;
DebugManager.flags.ForceOCLVersion.set(12);
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
const auto &caps = device->getDeviceInfo();
EXPECT_FALSE(hasSubstr(caps.deviceExtensions, std::string("cl_ext_float_atomics")));
}
TEST_F(DeviceGetCapsTest, givenEnableAdvancedVmeSetToTrueAndDeviceDoesNotSupportVmeWhenCapsAreCreatedThenDeviceReportAdvancedVmeExtensionAndBuiltins) {
DebugManagerStateRestore dbgRestorer;
DebugManager.flags.EnableIntelAdvancedVme.set(1);
@ -972,38 +1006,68 @@ TEST_F(DeviceGetCapsTest, givenFp64SupportForcedWhenCheckingFp64SupportThenFp64I
auto hwInfo = *defaultHwInfo;
for (auto isFp64SupportedByHw : ::testing::Bool()) {
hwInfo.capabilityTable.ftrSupportsFP64 = isFp64SupportedByHw;
hwInfo.capabilityTable.ftrSupports64BitMath = isFp64SupportedByHw;
for (auto isInteger64BitAtomicsSupportedByHw : ::testing::Bool()) {
for (auto isFloatAtomicsSupportedByHw : ::testing::Bool()) {
hwInfo.capabilityTable.ftrSupportsInteger64BitAtomics = isInteger64BitAtomicsSupportedByHw;
hwInfo.capabilityTable.ftrSupportsFP64 = isFp64SupportedByHw;
hwInfo.capabilityTable.ftrSupports64BitMath = isFp64SupportedByHw;
hwInfo.capabilityTable.supportsFloatAtomics = isFloatAtomicsSupportedByHw;
for (auto overrideDefaultFP64Settings : overrideDefaultFP64SettingsValues) {
DebugManager.flags.OverrideDefaultFP64Settings.set(overrideDefaultFP64Settings);
auto pClDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo));
auto &caps = pClDevice->getDeviceInfo();
std::string extensionString = pClDevice->getDeviceInfo().deviceExtensions;
for (auto overrideDefaultFP64Settings : overrideDefaultFP64SettingsValues) {
DebugManager.flags.OverrideDefaultFP64Settings.set(overrideDefaultFP64Settings);
auto pClDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo));
auto &caps = pClDevice->getDeviceInfo();
std::string extensionString = pClDevice->getDeviceInfo().deviceExtensions;
size_t fp64FeaturesCount = 0;
for (auto &openclCFeature : caps.openclCFeatures) {
if (0 == strcmp(openclCFeature.name, "__opencl_c_fp64")) {
fp64FeaturesCount++;
size_t fp64FeaturesCount = 0;
for (auto &openclCFeature : caps.openclCFeatures) {
if (0 == strcmp(openclCFeature.name, "__opencl_c_fp64")) {
fp64FeaturesCount++;
}
if (0 == strcmp(openclCFeature.name, "__opencl_c_ext_fp64_global_atomic_add")) {
fp64FeaturesCount++;
}
if (0 == strcmp(openclCFeature.name, "__opencl_c_ext_fp64_local_atomic_add")) {
fp64FeaturesCount++;
}
if (0 == strcmp(openclCFeature.name, "__opencl_c_ext_fp64_global_atomic_min_max")) {
fp64FeaturesCount++;
}
if (0 == strcmp(openclCFeature.name, "__opencl_c_ext_fp64_local_atomic_min_max")) {
fp64FeaturesCount++;
}
}
bool expectedFp64Support = ((overrideDefaultFP64Settings == -1) ? isFp64SupportedByHw : overrideDefaultFP64Settings);
if (expectedFp64Support) {
const size_t expectedFp64FeaturesCount = hwInfo.capabilityTable.supportsOcl21Features && isFloatAtomicsSupportedByHw ? 5u : 1u;
EXPECT_NE(std::string::npos, extensionString.find(std::string("cl_khr_fp64")));
EXPECT_NE(0u, caps.doubleFpConfig);
if (hwInfo.capabilityTable.supportsOcl21Features && isFloatAtomicsSupportedByHw) {
const cl_device_fp_atomic_capabilities_ext expectedFpCaps = static_cast<cl_device_fp_atomic_capabilities_ext>(CL_DEVICE_GLOBAL_FP_ATOMIC_LOAD_STORE_EXT | CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT | CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT |
CL_DEVICE_LOCAL_FP_ATOMIC_LOAD_STORE_EXT | CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT | CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT);
EXPECT_EQ(expectedFpCaps, caps.doubleFpAtomicCapabilities);
} else if (isFloatAtomicsSupportedByHw || isInteger64BitAtomicsSupportedByHw) {
const cl_device_fp_atomic_capabilities_ext expectedFpCaps = static_cast<cl_device_fp_atomic_capabilities_ext>(CL_DEVICE_GLOBAL_FP_ATOMIC_LOAD_STORE_EXT | CL_DEVICE_LOCAL_FP_ATOMIC_LOAD_STORE_EXT);
EXPECT_EQ(expectedFpCaps, caps.doubleFpAtomicCapabilities);
} else {
EXPECT_EQ(0u, caps.doubleFpAtomicCapabilities);
}
EXPECT_EQ(expectedFp64FeaturesCount, fp64FeaturesCount);
EXPECT_NE(0u, caps.nativeVectorWidthDouble);
EXPECT_NE(0u, caps.preferredVectorWidthDouble);
EXPECT_TRUE(isValueSet(caps.singleFpConfig, CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT));
} else {
EXPECT_EQ(std::string::npos, extensionString.find(std::string("cl_khr_fp64")));
EXPECT_EQ(0u, caps.doubleFpConfig);
EXPECT_EQ(0u, caps.doubleFpAtomicCapabilities);
EXPECT_EQ(0u, fp64FeaturesCount);
EXPECT_EQ(0u, caps.nativeVectorWidthDouble);
EXPECT_EQ(0u, caps.preferredVectorWidthDouble);
EXPECT_FALSE(isValueSet(caps.singleFpConfig, CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT));
}
}
}
bool expectedFp64Support = ((overrideDefaultFP64Settings == -1) ? isFp64SupportedByHw : overrideDefaultFP64Settings);
if (expectedFp64Support) {
EXPECT_NE(std::string::npos, extensionString.find(std::string("cl_khr_fp64")));
EXPECT_NE(0u, caps.doubleFpConfig);
EXPECT_EQ(1u, fp64FeaturesCount);
EXPECT_NE(0u, caps.nativeVectorWidthDouble);
EXPECT_NE(0u, caps.preferredVectorWidthDouble);
EXPECT_TRUE(isValueSet(caps.singleFpConfig, CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT));
} else {
EXPECT_EQ(std::string::npos, extensionString.find(std::string("cl_khr_fp64")));
EXPECT_EQ(0u, caps.doubleFpConfig);
EXPECT_EQ(0u, fp64FeaturesCount);
EXPECT_EQ(0u, caps.nativeVectorWidthDouble);
EXPECT_EQ(0u, caps.preferredVectorWidthDouble);
EXPECT_FALSE(isValueSet(caps.singleFpConfig, CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT));
}
}
}
}

View File

@ -897,6 +897,7 @@ cl_device_info deviceInfoParams[] = {
CL_DEVICE_IL_VERSION,
// NOT_SUPPORTED
// CL_DEVICE_TERMINATE_CAPABILITY_KHR,
CL_DEVICE_DOUBLE_FP_ATOMIC_CAPABILITIES_EXT,
CL_DEVICE_DOUBLE_FP_CONFIG,
CL_DEVICE_ENDIAN_LITTLE,
CL_DEVICE_ERROR_CORRECTION_SUPPORT,
@ -908,6 +909,7 @@ cl_device_info deviceInfoParams[] = {
CL_DEVICE_GLOBAL_MEM_CACHE_TYPE,
CL_DEVICE_GLOBAL_MEM_SIZE,
CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE,
CL_DEVICE_HALF_FP_ATOMIC_CAPABILITIES_EXT,
CL_DEVICE_HALF_FP_CONFIG,
CL_DEVICE_HOST_UNIFIED_MEMORY,
CL_DEVICE_IMAGE_SUPPORT,
@ -974,6 +976,7 @@ cl_device_info deviceInfoParams[] = {
CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES,
CL_DEVICE_QUEUE_ON_HOST_PROPERTIES,
CL_DEVICE_REFERENCE_COUNT,
CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT,
CL_DEVICE_SINGLE_FP_CONFIG,
CL_DEVICE_SPIR_VERSIONS,
CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS,

View File

@ -64,6 +64,7 @@ std::string getExtensionsList(const HardwareInfo &hwInfo) {
}
allExtensionsList += "cl_intel_spirv_subgroups ";
allExtensionsList += "cl_khr_spirv_no_integer_wrap_decoration ";
allExtensionsList += "cl_ext_float_atomics ";
}
if (hwInfo.capabilityTable.ftrSupportsFP64) {
@ -128,6 +129,32 @@ void getOpenclCFeaturesList(const HardwareInfo &hwInfo, OpenClCFeaturesContainer
strcpy_s(openClCFeature.name, CL_NAME_VERSION_MAX_NAME_SIZE, "__opencl_c_subgroups");
openclCFeatures.push_back(openClCFeature);
if (hwInfo.capabilityTable.supportsFloatAtomics) {
strcpy_s(openClCFeature.name, CL_NAME_VERSION_MAX_NAME_SIZE, "__opencl_c_ext_fp32_global_atomic_add");
openclCFeatures.push_back(openClCFeature);
strcpy_s(openClCFeature.name, CL_NAME_VERSION_MAX_NAME_SIZE, "__opencl_c_ext_fp32_local_atomic_add");
openclCFeatures.push_back(openClCFeature);
strcpy_s(openClCFeature.name, CL_NAME_VERSION_MAX_NAME_SIZE, "__opencl_c_ext_fp32_global_atomic_min_max");
openclCFeatures.push_back(openClCFeature);
strcpy_s(openClCFeature.name, CL_NAME_VERSION_MAX_NAME_SIZE, "__opencl_c_ext_fp32_local_atomic_min_max");
openclCFeatures.push_back(openClCFeature);
strcpy_s(openClCFeature.name, CL_NAME_VERSION_MAX_NAME_SIZE, "__opencl_c_ext_fp16_global_atomic_load_store");
openclCFeatures.push_back(openClCFeature);
strcpy_s(openClCFeature.name, CL_NAME_VERSION_MAX_NAME_SIZE, "__opencl_c_ext_fp16_local_atomic_load_store");
openclCFeatures.push_back(openClCFeature);
strcpy_s(openClCFeature.name, CL_NAME_VERSION_MAX_NAME_SIZE, "__opencl_c_ext_fp16_global_atomic_min_max");
openclCFeatures.push_back(openClCFeature);
strcpy_s(openClCFeature.name, CL_NAME_VERSION_MAX_NAME_SIZE, "__opencl_c_ext_fp16_local_atomic_min_max");
openclCFeatures.push_back(openClCFeature);
}
}
auto forcePipeSupport = DebugManager.flags.ForcePipeSupport.get();
@ -142,6 +169,20 @@ void getOpenclCFeaturesList(const HardwareInfo &hwInfo, OpenClCFeaturesContainer
(forceFp64Support == 1)) {
strcpy_s(openClCFeature.name, CL_NAME_VERSION_MAX_NAME_SIZE, "__opencl_c_fp64");
openclCFeatures.push_back(openClCFeature);
if (hwInfo.capabilityTable.supportsOcl21Features && hwInfo.capabilityTable.supportsFloatAtomics) {
strcpy_s(openClCFeature.name, CL_NAME_VERSION_MAX_NAME_SIZE, "__opencl_c_ext_fp64_global_atomic_add");
openclCFeatures.push_back(openClCFeature);
strcpy_s(openClCFeature.name, CL_NAME_VERSION_MAX_NAME_SIZE, "__opencl_c_ext_fp64_local_atomic_add");
openclCFeatures.push_back(openClCFeature);
strcpy_s(openClCFeature.name, CL_NAME_VERSION_MAX_NAME_SIZE, "__opencl_c_ext_fp64_global_atomic_min_max");
openclCFeatures.push_back(openClCFeature);
strcpy_s(openClCFeature.name, CL_NAME_VERSION_MAX_NAME_SIZE, "__opencl_c_ext_fp64_local_atomic_min_max");
openclCFeatures.push_back(openClCFeature);
}
}
}

View File

@ -13,7 +13,7 @@
#include <string>
using OpenClCFeaturesContainer = StackVec<cl_name_version, 15>;
using OpenClCFeaturesContainer = StackVec<cl_name_version, 27>;
namespace NEO {
struct HardwareInfo;

View File

@ -76,7 +76,8 @@ const RuntimeCapabilityTable EHL::capabilityTable{
false, // p2pAccessSupported
false, // p2pAtomicAccessSupported
false, // fusedEuEnabled
false // l0DebuggerSupported;
false, // l0DebuggerSupported;
false // supportsFloatAtomics
};
WorkaroundTable EHL::workaroundTable = {};

View File

@ -76,7 +76,8 @@ const RuntimeCapabilityTable ICLLP::capabilityTable{
false, // p2pAccessSupported
false, // p2pAtomicAccessSupported
false, // fusedEuEnabled
false // l0DebuggerSupported;
false, // l0DebuggerSupported;
false // supportsFloatAtomics
};
WorkaroundTable ICLLP::workaroundTable = {};

View File

@ -76,7 +76,8 @@ const RuntimeCapabilityTable LKF::capabilityTable{
false, // p2pAccessSupported
false, // p2pAtomicAccessSupported
false, // fusedEuEnabled
false // l0DebuggerSupported;
false, // l0DebuggerSupported;
false // supportsFloatAtomics
};
WorkaroundTable LKF::workaroundTable = {};

View File

@ -78,7 +78,9 @@ const RuntimeCapabilityTable ADLN::capabilityTable{
true, // supportsMediaBlock
false, // p2pAccessSupported
false, // p2pAtomicAccessSupported
true // fusedEuEnabled
true, // fusedEuEnabled
false, // l0DebuggerSupported;
true // supportsFloatAtomics
};
WorkaroundTable ADLN::workaroundTable = {};

View File

@ -79,7 +79,8 @@ const RuntimeCapabilityTable ADLP::capabilityTable{
false, // p2pAccessSupported
false, // p2pAtomicAccessSupported
true, // fusedEuEnabled
false // l0DebuggerSupported;
false, // l0DebuggerSupported;
true // supportsFloatAtomics
};
WorkaroundTable ADLP::workaroundTable = {};

View File

@ -79,7 +79,8 @@ const RuntimeCapabilityTable ADLS::capabilityTable{
false, // p2pAccessSupported
false, // p2pAtomicAccessSupported
true, // fusedEuEnabled
false // l0DebuggerSupported;
false, // l0DebuggerSupported;
true // supportsFloatAtomics
};
WorkaroundTable ADLS::workaroundTable = {};

View File

@ -80,6 +80,7 @@ const RuntimeCapabilityTable DG1::capabilityTable{
false, // p2pAtomicAccessSupported
true, // fusedEuEnabled
true, // l0DebuggerSupported;
true // supportsFloatAtomics
};
WorkaroundTable DG1::workaroundTable = {};

View File

@ -80,6 +80,7 @@ const RuntimeCapabilityTable RKL::capabilityTable{
false, // p2pAtomicAccessSupported
true, // fusedEuEnabled
false, // l0DebuggerSupported;
true // supportsFloatAtomics
};
WorkaroundTable RKL::workaroundTable = {};

View File

@ -80,6 +80,7 @@ const RuntimeCapabilityTable TGLLP::capabilityTable{
false, // p2pAtomicAccessSupported
true, // fusedEuEnabled
false, // l0DebuggerSupported;
true // supportsFloatAtomics
};
WorkaroundTable TGLLP::workaroundTable = {};

View File

@ -76,7 +76,8 @@ const RuntimeCapabilityTable BDW::capabilityTable{
false, // p2pAccessSupported
false, // p2pAtomicAccessSupported
false, // fusedEuEnabled
false // l0DebuggerSupported;
false, // l0DebuggerSupported;
false // supportsFloatAtomics
};
WorkaroundTable BDW::workaroundTable = {};

View File

@ -76,7 +76,8 @@ const RuntimeCapabilityTable BXT::capabilityTable{
false, // p2pAccessSupported
false, // p2pAtomicAccessSupported
false, // fusedEuEnabled
false // l0DebuggerSupported;
false, // l0DebuggerSupported;
false // supportsFloatAtomics
};
WorkaroundTable BXT::workaroundTable = {};

View File

@ -76,7 +76,8 @@ const RuntimeCapabilityTable CFL::capabilityTable{
false, // p2pAccessSupported
false, // p2pAtomicAccessSupported
false, // fusedEuEnabled
false // l0DebuggerSupported;
false, // l0DebuggerSupported;
false // supportsFloatAtomics
};
WorkaroundTable CFL::workaroundTable = {};

View File

@ -76,7 +76,8 @@ const RuntimeCapabilityTable GLK::capabilityTable{
false, // p2pAccessSupported
false, // p2pAtomicAccessSupported
false, // fusedEuEnabled
false // l0DebuggerSupported;
false, // l0DebuggerSupported;
false // supportsFloatAtomics
};
WorkaroundTable GLK::workaroundTable = {};

View File

@ -76,7 +76,8 @@ const RuntimeCapabilityTable KBL::capabilityTable{
false, // p2pAccessSupported
false, // p2pAtomicAccessSupported
false, // fusedEuEnabled
false // l0DebuggerSupported;
false, // l0DebuggerSupported;
false // supportsFloatAtomics
};
WorkaroundTable KBL::workaroundTable = {};

View File

@ -76,7 +76,8 @@ const RuntimeCapabilityTable SKL::capabilityTable{
false, // p2pAccessSupported
false, // p2pAtomicAccessSupported
false, // fusedEuEnabled
false // l0DebuggerSupported;
false, // l0DebuggerSupported;
false // supportsFloatAtomics
};
WorkaroundTable SKL::workaroundTable = {};

View File

@ -64,6 +64,7 @@ struct RuntimeCapabilityTable {
bool p2pAtomicAccessSupported;
bool fusedEuEnabled;
bool l0DebuggerSupported;
bool supportsFloatAtomics;
};
inline bool operator==(const RuntimeCapabilityTable &lhs, const RuntimeCapabilityTable &rhs) {
@ -128,6 +129,7 @@ inline bool operator==(const RuntimeCapabilityTable &lhs, const RuntimeCapabilit
result &= (lhs.supportsMediaBlock == rhs.supportsMediaBlock);
result &= (lhs.fusedEuEnabled == rhs.fusedEuEnabled);
result &= (lhs.l0DebuggerSupported == rhs.l0DebuggerSupported);
result &= (lhs.supportsFloatAtomics == rhs.supportsFloatAtomics);
return result;
}