Add support for cl_ext_float_atomics
Resolves: NEO-6596 Signed-off-by: Kacper Kasper <kacper.k.kasper@intel.com>
This commit is contained in:
parent
674691d170
commit
aece8195eb
|
@ -341,4 +341,32 @@ typedef cl_bitfield cl_command_queue_mdapi_properties_intel;
|
|||
|
||||
// cl_intel_variable_eu_thread_count
|
||||
#define CL_DEVICE_EU_THREAD_COUNTS_INTEL 0x1000A // placeholder
|
||||
#define CL_KERNEL_EU_THREAD_COUNT_INTEL 0x1000B // placeholder
|
||||
#define CL_KERNEL_EU_THREAD_COUNT_INTEL 0x1000B // placeholder
|
||||
|
||||
/*************************************************
|
||||
* cl_ext_float_atomics extension *
|
||||
*************************************************/
|
||||
|
||||
#if !defined(cl_ext_float_atomics)
|
||||
|
||||
#define cl_ext_float_atomics 1
|
||||
|
||||
#define CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT 0x4231
|
||||
#define CL_DEVICE_DOUBLE_FP_ATOMIC_CAPABILITIES_EXT 0x4232
|
||||
#define CL_DEVICE_HALF_FP_ATOMIC_CAPABILITIES_EXT 0x4233
|
||||
|
||||
typedef cl_bitfield cl_device_fp_atomic_capabilities_ext;
|
||||
|
||||
#define CL_DEVICE_GLOBAL_FP_ATOMIC_LOAD_STORE_EXT (1 << 0)
|
||||
#define CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT (1 << 1)
|
||||
#define CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT (1 << 2)
|
||||
|
||||
/* bits 3 - 15 are currently unused */
|
||||
|
||||
#define CL_DEVICE_LOCAL_FP_ATOMIC_LOAD_STORE_EXT (1 << 16)
|
||||
#define CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT (1 << 17)
|
||||
#define CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT (1 << 18)
|
||||
|
||||
/* bits 19 and beyond are currently unused */
|
||||
|
||||
#endif
|
||||
|
|
|
@ -40,6 +40,9 @@ static constexpr cl_device_fp_config defaultFpFlags = static_cast<cl_device_fp_c
|
|||
CL_FP_DENORM |
|
||||
CL_FP_FMA);
|
||||
|
||||
static constexpr cl_device_fp_atomic_capabilities_ext defaultFpAtomicCapabilities = static_cast<cl_device_fp_atomic_capabilities_ext>(CL_DEVICE_GLOBAL_FP_ATOMIC_LOAD_STORE_EXT |
|
||||
CL_DEVICE_LOCAL_FP_ATOMIC_LOAD_STORE_EXT);
|
||||
|
||||
void ClDevice::setupFp64Flags() {
|
||||
auto &hwInfo = getHardwareInfo();
|
||||
|
||||
|
@ -167,6 +170,27 @@ void ClDevice::initializeCaps() {
|
|||
}
|
||||
}
|
||||
|
||||
if (enabledClVersion >= 20) {
|
||||
deviceExtensions += "cl_ext_float_atomics ";
|
||||
|
||||
deviceInfo.singleFpAtomicCapabilities = defaultFpAtomicCapabilities;
|
||||
deviceInfo.halfFpAtomicCapabilities = 0;
|
||||
if (ocl21FeaturesEnabled && hwInfo.capabilityTable.supportsFloatAtomics) {
|
||||
deviceInfo.singleFpAtomicCapabilities |= static_cast<cl_device_fp_atomic_capabilities_ext>(
|
||||
CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT | CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT | CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT | CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT);
|
||||
deviceInfo.halfFpAtomicCapabilities |= static_cast<cl_device_fp_atomic_capabilities_ext>(
|
||||
CL_DEVICE_GLOBAL_FP_ATOMIC_LOAD_STORE_EXT | CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT | CL_DEVICE_LOCAL_FP_ATOMIC_LOAD_STORE_EXT | CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT);
|
||||
}
|
||||
|
||||
const cl_device_fp_atomic_capabilities_ext baseFP64AtomicCapabilities = hwInfo.capabilityTable.ftrSupportsInteger64BitAtomics || hwInfo.capabilityTable.supportsFloatAtomics ? defaultFpAtomicCapabilities : 0;
|
||||
const cl_device_fp_atomic_capabilities_ext optionalFP64AtomicCapabilities = ocl21FeaturesEnabled && hwInfo.capabilityTable.supportsFloatAtomics ? static_cast<cl_device_fp_atomic_capabilities_ext>(
|
||||
CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT | CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT |
|
||||
CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT | CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT)
|
||||
: 0;
|
||||
|
||||
deviceInfo.doubleFpAtomicCapabilities = deviceInfo.doubleFpConfig != 0u ? baseFP64AtomicCapabilities | optionalFP64AtomicCapabilities : 0;
|
||||
}
|
||||
|
||||
if (DebugManager.flags.EnableNV12.get() && hwInfo.capabilityTable.supportsImages) {
|
||||
deviceExtensions += "cl_intel_planar_yuv ";
|
||||
deviceInfo.nv12Extension = true;
|
||||
|
|
|
@ -87,6 +87,7 @@ cl_int ClDevice::getDeviceInfo(cl_device_info paramName,
|
|||
case CL_DEVICE_COMPILER_AVAILABLE: getCap<CL_DEVICE_COMPILER_AVAILABLE >(src, srcSize, retSize); break;
|
||||
case CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL: getCap<CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL >(src, srcSize, retSize); break;
|
||||
case CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL: getCap<CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL >(src, srcSize, retSize); break;
|
||||
case CL_DEVICE_DOUBLE_FP_ATOMIC_CAPABILITIES_EXT: getCap<CL_DEVICE_DOUBLE_FP_ATOMIC_CAPABILITIES_EXT >(src, srcSize, retSize); break;
|
||||
case CL_DEVICE_DOUBLE_FP_CONFIG: getCap<CL_DEVICE_DOUBLE_FP_CONFIG >(src, srcSize, retSize); break;
|
||||
case CL_DEVICE_DRIVER_VERSION_INTEL: getCap<CL_DEVICE_DRIVER_VERSION_INTEL >(src, srcSize, retSize); break;
|
||||
case CL_DEVICE_ENDIAN_LITTLE: getCap<CL_DEVICE_ENDIAN_LITTLE >(src, srcSize, retSize); break;
|
||||
|
@ -99,6 +100,7 @@ cl_int ClDevice::getDeviceInfo(cl_device_info paramName,
|
|||
case CL_DEVICE_GLOBAL_MEM_CACHE_TYPE: getCap<CL_DEVICE_GLOBAL_MEM_CACHE_TYPE >(src, srcSize, retSize); break;
|
||||
case CL_DEVICE_GLOBAL_MEM_SIZE: getCap<CL_DEVICE_GLOBAL_MEM_SIZE >(src, srcSize, retSize); break;
|
||||
case CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE: getCap<CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE >(src, srcSize, retSize); break;
|
||||
case CL_DEVICE_HALF_FP_ATOMIC_CAPABILITIES_EXT: getCap<CL_DEVICE_HALF_FP_ATOMIC_CAPABILITIES_EXT >(src, srcSize, retSize); break;
|
||||
case CL_DEVICE_HALF_FP_CONFIG: getCap<CL_DEVICE_HALF_FP_CONFIG >(src, srcSize, retSize); break;
|
||||
case CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL: getCap<CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL >(src, srcSize, retSize); break;
|
||||
case CL_DEVICE_HOST_UNIFIED_MEMORY: getCap<CL_DEVICE_HOST_UNIFIED_MEMORY >(src, srcSize, retSize); break;
|
||||
|
@ -166,6 +168,7 @@ cl_int ClDevice::getDeviceInfo(cl_device_info paramName,
|
|||
case CL_DEVICE_QUEUE_ON_HOST_PROPERTIES: getCap<CL_DEVICE_QUEUE_ON_HOST_PROPERTIES >(src, srcSize, retSize); break;
|
||||
case CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL: getCap<CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL >(src, srcSize, retSize); break;
|
||||
case CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL: getCap<CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL >(src, srcSize, retSize); break;
|
||||
case CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT: getCap<CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT >(src, srcSize, retSize); break;
|
||||
case CL_DEVICE_SINGLE_FP_CONFIG: getCap<CL_DEVICE_SINGLE_FP_CONFIG >(src, srcSize, retSize); break;
|
||||
case CL_DEVICE_SLICE_COUNT_INTEL: getCap<CL_DEVICE_SLICE_COUNT_INTEL >(src, srcSize, retSize); break;
|
||||
case CL_DEVICE_SPIR_VERSIONS: getStr<CL_DEVICE_SPIR_VERSIONS >(src, srcSize, retSize); break;
|
||||
|
|
|
@ -76,6 +76,9 @@ struct ClDeviceInfo {
|
|||
cl_bool independentForwardProgress;
|
||||
cl_device_atomic_capabilities atomicMemoryCapabilities;
|
||||
cl_device_atomic_capabilities atomicFenceCapabilities;
|
||||
cl_device_fp_atomic_capabilities_ext singleFpAtomicCapabilities;
|
||||
cl_device_fp_atomic_capabilities_ext halfFpAtomicCapabilities;
|
||||
cl_device_fp_atomic_capabilities_ext doubleFpAtomicCapabilities;
|
||||
cl_bool nonUniformWorkGroupSupport;
|
||||
cl_bool workGroupCollectiveFunctionsSupport;
|
||||
cl_bool genericAddressSpaceSupport;
|
||||
|
|
|
@ -88,6 +88,7 @@ template<> struct Map<CL_DEVICE_COMPILER_AVAILABLE > :
|
|||
template<> struct Map<CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL > : public ClMapBase<CL_DEVICE_CROSS_DEVICE_SHARED_MEM_CAPABILITIES_INTEL, uint64_t, &ClDeviceInfo::crossDeviceSharedMemCapabilities> {};
|
||||
template<> struct Map<CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES > : public ClMapBase<CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES, uint64_t, &ClDeviceInfo::deviceEnqueueSupport> {};
|
||||
template<> struct Map<CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL > : public ClMapBase<CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL, uint64_t, &ClDeviceInfo::deviceMemCapabilities> {};
|
||||
template<> struct Map<CL_DEVICE_DOUBLE_FP_ATOMIC_CAPABILITIES_EXT > : public ClMapBase<CL_DEVICE_DOUBLE_FP_ATOMIC_CAPABILITIES_EXT, uint64_t, &ClDeviceInfo::doubleFpAtomicCapabilities> {};
|
||||
template<> struct Map<CL_DEVICE_DOUBLE_FP_CONFIG > : public ClMapBase<CL_DEVICE_DOUBLE_FP_CONFIG, uint64_t, &ClDeviceInfo::doubleFpConfig> {};
|
||||
template<> struct Map<CL_DEVICE_DRIVER_VERSION_INTEL > : public ClMapBase<CL_DEVICE_DRIVER_VERSION_INTEL, uint32_t, &ClDeviceInfo::internalDriverVersion> {};
|
||||
template<> struct Map<CL_DEVICE_ENDIAN_LITTLE > : public ClMapBase<CL_DEVICE_ENDIAN_LITTLE, uint32_t, &ClDeviceInfo::endianLittle> {};
|
||||
|
@ -97,6 +98,7 @@ template<> struct Map<CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT > :
|
|||
template<> struct Map<CL_DEVICE_GLOBAL_MEM_CACHE_SIZE > : public ClMapBase<CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, uint64_t, &ClDeviceInfo::globalMemCacheSize> {};
|
||||
template<> struct Map<CL_DEVICE_GLOBAL_MEM_CACHE_TYPE > : public ClMapBase<CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, uint32_t, &ClDeviceInfo::globalMemCacheType> {};
|
||||
template<> struct Map<CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE > : public ClMapBase<CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE, size_t, &ClDeviceInfo::globalVariablePreferredTotalSize> {};
|
||||
template<> struct Map<CL_DEVICE_HALF_FP_ATOMIC_CAPABILITIES_EXT > : public ClMapBase<CL_DEVICE_HALF_FP_ATOMIC_CAPABILITIES_EXT, uint64_t, &ClDeviceInfo::halfFpAtomicCapabilities> {};
|
||||
template<> struct Map<CL_DEVICE_HALF_FP_CONFIG > : public ClMapBase<CL_DEVICE_HALF_FP_CONFIG, uint64_t, &ClDeviceInfo::halfFpConfig> {};
|
||||
template<> struct Map<CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL > : public ClMapBase<CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL, uint64_t, &ClDeviceInfo::hostMemCapabilities> {};
|
||||
template<> struct Map<CL_DEVICE_HOST_UNIFIED_MEMORY > : public ClMapBase<CL_DEVICE_HOST_UNIFIED_MEMORY, uint32_t, &ClDeviceInfo::hostUnifiedMemory> {};
|
||||
|
@ -161,6 +163,7 @@ template<> struct Map<CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES > :
|
|||
template<> struct Map<CL_DEVICE_QUEUE_ON_HOST_PROPERTIES > : public ClMapBase<CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, uint64_t, &ClDeviceInfo::queueOnHostProperties> {};
|
||||
template<> struct Map<CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL > : public ClMapBase<CL_DEVICE_SHARED_SYSTEM_MEM_CAPABILITIES_INTEL, uint64_t, &ClDeviceInfo::sharedSystemMemCapabilities> {};
|
||||
template<> struct Map<CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL > : public ClMapBase<CL_DEVICE_SINGLE_DEVICE_SHARED_MEM_CAPABILITIES_INTEL, uint64_t, &ClDeviceInfo::singleDeviceSharedMemCapabilities> {};
|
||||
template<> struct Map<CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT > : public ClMapBase<CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT, uint64_t, &ClDeviceInfo::singleFpAtomicCapabilities> {};
|
||||
template<> struct Map<CL_DEVICE_SINGLE_FP_CONFIG > : public ClMapBase<CL_DEVICE_SINGLE_FP_CONFIG, uint64_t, &ClDeviceInfo::singleFpConfig> {};
|
||||
template<> struct Map<CL_DEVICE_SLICE_COUNT_INTEL > : public ClMapBase<CL_DEVICE_SLICE_COUNT_INTEL, size_t, &ClDeviceInfo::maxSliceCount> {};
|
||||
template<> struct Map<CL_DEVICE_SPIR_VERSIONS > : public ClMapBase<CL_DEVICE_SPIR_VERSIONS, const char *, &ClDeviceInfo::spirVersions> {};
|
||||
|
|
|
@ -92,6 +92,16 @@ struct DeviceGetCapsTest : public ::testing::Test {
|
|||
EXPECT_STREQ("__opencl_c_program_scope_global_variables", (++openclCFeatureIterator)->name);
|
||||
EXPECT_STREQ("__opencl_c_work_group_collective_functions", (++openclCFeatureIterator)->name);
|
||||
EXPECT_STREQ("__opencl_c_subgroups", (++openclCFeatureIterator)->name);
|
||||
if (hwInfo.capabilityTable.supportsFloatAtomics) {
|
||||
EXPECT_STREQ("__opencl_c_ext_fp32_global_atomic_add", (++openclCFeatureIterator)->name);
|
||||
EXPECT_STREQ("__opencl_c_ext_fp32_local_atomic_add", (++openclCFeatureIterator)->name);
|
||||
EXPECT_STREQ("__opencl_c_ext_fp32_global_atomic_min_max", (++openclCFeatureIterator)->name);
|
||||
EXPECT_STREQ("__opencl_c_ext_fp32_local_atomic_min_max", (++openclCFeatureIterator)->name);
|
||||
EXPECT_STREQ("__opencl_c_ext_fp16_global_atomic_load_store", (++openclCFeatureIterator)->name);
|
||||
EXPECT_STREQ("__opencl_c_ext_fp16_local_atomic_load_store", (++openclCFeatureIterator)->name);
|
||||
EXPECT_STREQ("__opencl_c_ext_fp16_global_atomic_min_max", (++openclCFeatureIterator)->name);
|
||||
EXPECT_STREQ("__opencl_c_ext_fp16_local_atomic_min_max", (++openclCFeatureIterator)->name);
|
||||
}
|
||||
}
|
||||
if (hwInfo.capabilityTable.supportsDeviceEnqueue) {
|
||||
EXPECT_STREQ("__opencl_c_device_enqueue", (++openclCFeatureIterator)->name);
|
||||
|
@ -101,6 +111,12 @@ struct DeviceGetCapsTest : public ::testing::Test {
|
|||
}
|
||||
if (hwInfo.capabilityTable.ftrSupportsFP64) {
|
||||
EXPECT_STREQ("__opencl_c_fp64", (++openclCFeatureIterator)->name);
|
||||
if (hwInfo.capabilityTable.supportsOcl21Features && hwInfo.capabilityTable.supportsFloatAtomics) {
|
||||
EXPECT_STREQ("__opencl_c_ext_fp64_global_atomic_add", (++openclCFeatureIterator)->name);
|
||||
EXPECT_STREQ("__opencl_c_ext_fp64_local_atomic_add", (++openclCFeatureIterator)->name);
|
||||
EXPECT_STREQ("__opencl_c_ext_fp64_global_atomic_min_max", (++openclCFeatureIterator)->name);
|
||||
EXPECT_STREQ("__opencl_c_ext_fp64_local_atomic_min_max", (++openclCFeatureIterator)->name);
|
||||
}
|
||||
}
|
||||
|
||||
EXPECT_EQ(clDevice.getDeviceInfo().openclCFeatures.end(), ++openclCFeatureIterator);
|
||||
|
@ -738,6 +754,24 @@ TEST_F(DeviceGetCapsTest, WhenCheckingFp64ThenResultIsConsistentWithHardwareCapa
|
|||
}
|
||||
}
|
||||
|
||||
TEST_F(DeviceGetCapsTest, givenOpenCLVersion20WhenCapsAreCreatedThenFloatAtomicsExtensionIsReported) {
|
||||
DebugManagerStateRestore dbgRestorer;
|
||||
DebugManager.flags.ForceOCLVersion.set(20);
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
|
||||
const auto &caps = device->getDeviceInfo();
|
||||
|
||||
EXPECT_TRUE(hasSubstr(caps.deviceExtensions, std::string("cl_ext_float_atomics")));
|
||||
}
|
||||
|
||||
TEST_F(DeviceGetCapsTest, givenOpenCLVersion12WhenCapsAreCreatedThenDeviceDoesntReportFloatAtomicsExtension) {
|
||||
DebugManagerStateRestore dbgRestorer;
|
||||
DebugManager.flags.ForceOCLVersion.set(12);
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get()));
|
||||
const auto &caps = device->getDeviceInfo();
|
||||
|
||||
EXPECT_FALSE(hasSubstr(caps.deviceExtensions, std::string("cl_ext_float_atomics")));
|
||||
}
|
||||
|
||||
TEST_F(DeviceGetCapsTest, givenEnableAdvancedVmeSetToTrueAndDeviceDoesNotSupportVmeWhenCapsAreCreatedThenDeviceReportAdvancedVmeExtensionAndBuiltins) {
|
||||
DebugManagerStateRestore dbgRestorer;
|
||||
DebugManager.flags.EnableIntelAdvancedVme.set(1);
|
||||
|
@ -972,38 +1006,68 @@ TEST_F(DeviceGetCapsTest, givenFp64SupportForcedWhenCheckingFp64SupportThenFp64I
|
|||
auto hwInfo = *defaultHwInfo;
|
||||
|
||||
for (auto isFp64SupportedByHw : ::testing::Bool()) {
|
||||
hwInfo.capabilityTable.ftrSupportsFP64 = isFp64SupportedByHw;
|
||||
hwInfo.capabilityTable.ftrSupports64BitMath = isFp64SupportedByHw;
|
||||
for (auto isInteger64BitAtomicsSupportedByHw : ::testing::Bool()) {
|
||||
for (auto isFloatAtomicsSupportedByHw : ::testing::Bool()) {
|
||||
hwInfo.capabilityTable.ftrSupportsInteger64BitAtomics = isInteger64BitAtomicsSupportedByHw;
|
||||
hwInfo.capabilityTable.ftrSupportsFP64 = isFp64SupportedByHw;
|
||||
hwInfo.capabilityTable.ftrSupports64BitMath = isFp64SupportedByHw;
|
||||
hwInfo.capabilityTable.supportsFloatAtomics = isFloatAtomicsSupportedByHw;
|
||||
|
||||
for (auto overrideDefaultFP64Settings : overrideDefaultFP64SettingsValues) {
|
||||
DebugManager.flags.OverrideDefaultFP64Settings.set(overrideDefaultFP64Settings);
|
||||
auto pClDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo));
|
||||
auto &caps = pClDevice->getDeviceInfo();
|
||||
std::string extensionString = pClDevice->getDeviceInfo().deviceExtensions;
|
||||
for (auto overrideDefaultFP64Settings : overrideDefaultFP64SettingsValues) {
|
||||
DebugManager.flags.OverrideDefaultFP64Settings.set(overrideDefaultFP64Settings);
|
||||
auto pClDevice = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(&hwInfo));
|
||||
auto &caps = pClDevice->getDeviceInfo();
|
||||
std::string extensionString = pClDevice->getDeviceInfo().deviceExtensions;
|
||||
|
||||
size_t fp64FeaturesCount = 0;
|
||||
for (auto &openclCFeature : caps.openclCFeatures) {
|
||||
if (0 == strcmp(openclCFeature.name, "__opencl_c_fp64")) {
|
||||
fp64FeaturesCount++;
|
||||
size_t fp64FeaturesCount = 0;
|
||||
for (auto &openclCFeature : caps.openclCFeatures) {
|
||||
if (0 == strcmp(openclCFeature.name, "__opencl_c_fp64")) {
|
||||
fp64FeaturesCount++;
|
||||
}
|
||||
if (0 == strcmp(openclCFeature.name, "__opencl_c_ext_fp64_global_atomic_add")) {
|
||||
fp64FeaturesCount++;
|
||||
}
|
||||
if (0 == strcmp(openclCFeature.name, "__opencl_c_ext_fp64_local_atomic_add")) {
|
||||
fp64FeaturesCount++;
|
||||
}
|
||||
if (0 == strcmp(openclCFeature.name, "__opencl_c_ext_fp64_global_atomic_min_max")) {
|
||||
fp64FeaturesCount++;
|
||||
}
|
||||
if (0 == strcmp(openclCFeature.name, "__opencl_c_ext_fp64_local_atomic_min_max")) {
|
||||
fp64FeaturesCount++;
|
||||
}
|
||||
}
|
||||
|
||||
bool expectedFp64Support = ((overrideDefaultFP64Settings == -1) ? isFp64SupportedByHw : overrideDefaultFP64Settings);
|
||||
if (expectedFp64Support) {
|
||||
const size_t expectedFp64FeaturesCount = hwInfo.capabilityTable.supportsOcl21Features && isFloatAtomicsSupportedByHw ? 5u : 1u;
|
||||
EXPECT_NE(std::string::npos, extensionString.find(std::string("cl_khr_fp64")));
|
||||
EXPECT_NE(0u, caps.doubleFpConfig);
|
||||
if (hwInfo.capabilityTable.supportsOcl21Features && isFloatAtomicsSupportedByHw) {
|
||||
const cl_device_fp_atomic_capabilities_ext expectedFpCaps = static_cast<cl_device_fp_atomic_capabilities_ext>(CL_DEVICE_GLOBAL_FP_ATOMIC_LOAD_STORE_EXT | CL_DEVICE_GLOBAL_FP_ATOMIC_ADD_EXT | CL_DEVICE_GLOBAL_FP_ATOMIC_MIN_MAX_EXT |
|
||||
CL_DEVICE_LOCAL_FP_ATOMIC_LOAD_STORE_EXT | CL_DEVICE_LOCAL_FP_ATOMIC_ADD_EXT | CL_DEVICE_LOCAL_FP_ATOMIC_MIN_MAX_EXT);
|
||||
EXPECT_EQ(expectedFpCaps, caps.doubleFpAtomicCapabilities);
|
||||
} else if (isFloatAtomicsSupportedByHw || isInteger64BitAtomicsSupportedByHw) {
|
||||
const cl_device_fp_atomic_capabilities_ext expectedFpCaps = static_cast<cl_device_fp_atomic_capabilities_ext>(CL_DEVICE_GLOBAL_FP_ATOMIC_LOAD_STORE_EXT | CL_DEVICE_LOCAL_FP_ATOMIC_LOAD_STORE_EXT);
|
||||
EXPECT_EQ(expectedFpCaps, caps.doubleFpAtomicCapabilities);
|
||||
} else {
|
||||
EXPECT_EQ(0u, caps.doubleFpAtomicCapabilities);
|
||||
}
|
||||
EXPECT_EQ(expectedFp64FeaturesCount, fp64FeaturesCount);
|
||||
EXPECT_NE(0u, caps.nativeVectorWidthDouble);
|
||||
EXPECT_NE(0u, caps.preferredVectorWidthDouble);
|
||||
EXPECT_TRUE(isValueSet(caps.singleFpConfig, CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT));
|
||||
} else {
|
||||
EXPECT_EQ(std::string::npos, extensionString.find(std::string("cl_khr_fp64")));
|
||||
EXPECT_EQ(0u, caps.doubleFpConfig);
|
||||
EXPECT_EQ(0u, caps.doubleFpAtomicCapabilities);
|
||||
EXPECT_EQ(0u, fp64FeaturesCount);
|
||||
EXPECT_EQ(0u, caps.nativeVectorWidthDouble);
|
||||
EXPECT_EQ(0u, caps.preferredVectorWidthDouble);
|
||||
EXPECT_FALSE(isValueSet(caps.singleFpConfig, CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool expectedFp64Support = ((overrideDefaultFP64Settings == -1) ? isFp64SupportedByHw : overrideDefaultFP64Settings);
|
||||
if (expectedFp64Support) {
|
||||
EXPECT_NE(std::string::npos, extensionString.find(std::string("cl_khr_fp64")));
|
||||
EXPECT_NE(0u, caps.doubleFpConfig);
|
||||
EXPECT_EQ(1u, fp64FeaturesCount);
|
||||
EXPECT_NE(0u, caps.nativeVectorWidthDouble);
|
||||
EXPECT_NE(0u, caps.preferredVectorWidthDouble);
|
||||
EXPECT_TRUE(isValueSet(caps.singleFpConfig, CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT));
|
||||
} else {
|
||||
EXPECT_EQ(std::string::npos, extensionString.find(std::string("cl_khr_fp64")));
|
||||
EXPECT_EQ(0u, caps.doubleFpConfig);
|
||||
EXPECT_EQ(0u, fp64FeaturesCount);
|
||||
EXPECT_EQ(0u, caps.nativeVectorWidthDouble);
|
||||
EXPECT_EQ(0u, caps.preferredVectorWidthDouble);
|
||||
EXPECT_FALSE(isValueSet(caps.singleFpConfig, CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -897,6 +897,7 @@ cl_device_info deviceInfoParams[] = {
|
|||
CL_DEVICE_IL_VERSION,
|
||||
// NOT_SUPPORTED
|
||||
// CL_DEVICE_TERMINATE_CAPABILITY_KHR,
|
||||
CL_DEVICE_DOUBLE_FP_ATOMIC_CAPABILITIES_EXT,
|
||||
CL_DEVICE_DOUBLE_FP_CONFIG,
|
||||
CL_DEVICE_ENDIAN_LITTLE,
|
||||
CL_DEVICE_ERROR_CORRECTION_SUPPORT,
|
||||
|
@ -908,6 +909,7 @@ cl_device_info deviceInfoParams[] = {
|
|||
CL_DEVICE_GLOBAL_MEM_CACHE_TYPE,
|
||||
CL_DEVICE_GLOBAL_MEM_SIZE,
|
||||
CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE,
|
||||
CL_DEVICE_HALF_FP_ATOMIC_CAPABILITIES_EXT,
|
||||
CL_DEVICE_HALF_FP_CONFIG,
|
||||
CL_DEVICE_HOST_UNIFIED_MEMORY,
|
||||
CL_DEVICE_IMAGE_SUPPORT,
|
||||
|
@ -974,6 +976,7 @@ cl_device_info deviceInfoParams[] = {
|
|||
CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES,
|
||||
CL_DEVICE_QUEUE_ON_HOST_PROPERTIES,
|
||||
CL_DEVICE_REFERENCE_COUNT,
|
||||
CL_DEVICE_SINGLE_FP_ATOMIC_CAPABILITIES_EXT,
|
||||
CL_DEVICE_SINGLE_FP_CONFIG,
|
||||
CL_DEVICE_SPIR_VERSIONS,
|
||||
CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS,
|
||||
|
|
|
@ -64,6 +64,7 @@ std::string getExtensionsList(const HardwareInfo &hwInfo) {
|
|||
}
|
||||
allExtensionsList += "cl_intel_spirv_subgroups ";
|
||||
allExtensionsList += "cl_khr_spirv_no_integer_wrap_decoration ";
|
||||
allExtensionsList += "cl_ext_float_atomics ";
|
||||
}
|
||||
|
||||
if (hwInfo.capabilityTable.ftrSupportsFP64) {
|
||||
|
@ -128,6 +129,32 @@ void getOpenclCFeaturesList(const HardwareInfo &hwInfo, OpenClCFeaturesContainer
|
|||
|
||||
strcpy_s(openClCFeature.name, CL_NAME_VERSION_MAX_NAME_SIZE, "__opencl_c_subgroups");
|
||||
openclCFeatures.push_back(openClCFeature);
|
||||
|
||||
if (hwInfo.capabilityTable.supportsFloatAtomics) {
|
||||
strcpy_s(openClCFeature.name, CL_NAME_VERSION_MAX_NAME_SIZE, "__opencl_c_ext_fp32_global_atomic_add");
|
||||
openclCFeatures.push_back(openClCFeature);
|
||||
|
||||
strcpy_s(openClCFeature.name, CL_NAME_VERSION_MAX_NAME_SIZE, "__opencl_c_ext_fp32_local_atomic_add");
|
||||
openclCFeatures.push_back(openClCFeature);
|
||||
|
||||
strcpy_s(openClCFeature.name, CL_NAME_VERSION_MAX_NAME_SIZE, "__opencl_c_ext_fp32_global_atomic_min_max");
|
||||
openclCFeatures.push_back(openClCFeature);
|
||||
|
||||
strcpy_s(openClCFeature.name, CL_NAME_VERSION_MAX_NAME_SIZE, "__opencl_c_ext_fp32_local_atomic_min_max");
|
||||
openclCFeatures.push_back(openClCFeature);
|
||||
|
||||
strcpy_s(openClCFeature.name, CL_NAME_VERSION_MAX_NAME_SIZE, "__opencl_c_ext_fp16_global_atomic_load_store");
|
||||
openclCFeatures.push_back(openClCFeature);
|
||||
|
||||
strcpy_s(openClCFeature.name, CL_NAME_VERSION_MAX_NAME_SIZE, "__opencl_c_ext_fp16_local_atomic_load_store");
|
||||
openclCFeatures.push_back(openClCFeature);
|
||||
|
||||
strcpy_s(openClCFeature.name, CL_NAME_VERSION_MAX_NAME_SIZE, "__opencl_c_ext_fp16_global_atomic_min_max");
|
||||
openclCFeatures.push_back(openClCFeature);
|
||||
|
||||
strcpy_s(openClCFeature.name, CL_NAME_VERSION_MAX_NAME_SIZE, "__opencl_c_ext_fp16_local_atomic_min_max");
|
||||
openclCFeatures.push_back(openClCFeature);
|
||||
}
|
||||
}
|
||||
|
||||
auto forcePipeSupport = DebugManager.flags.ForcePipeSupport.get();
|
||||
|
@ -142,6 +169,20 @@ void getOpenclCFeaturesList(const HardwareInfo &hwInfo, OpenClCFeaturesContainer
|
|||
(forceFp64Support == 1)) {
|
||||
strcpy_s(openClCFeature.name, CL_NAME_VERSION_MAX_NAME_SIZE, "__opencl_c_fp64");
|
||||
openclCFeatures.push_back(openClCFeature);
|
||||
|
||||
if (hwInfo.capabilityTable.supportsOcl21Features && hwInfo.capabilityTable.supportsFloatAtomics) {
|
||||
strcpy_s(openClCFeature.name, CL_NAME_VERSION_MAX_NAME_SIZE, "__opencl_c_ext_fp64_global_atomic_add");
|
||||
openclCFeatures.push_back(openClCFeature);
|
||||
|
||||
strcpy_s(openClCFeature.name, CL_NAME_VERSION_MAX_NAME_SIZE, "__opencl_c_ext_fp64_local_atomic_add");
|
||||
openclCFeatures.push_back(openClCFeature);
|
||||
|
||||
strcpy_s(openClCFeature.name, CL_NAME_VERSION_MAX_NAME_SIZE, "__opencl_c_ext_fp64_global_atomic_min_max");
|
||||
openclCFeatures.push_back(openClCFeature);
|
||||
|
||||
strcpy_s(openClCFeature.name, CL_NAME_VERSION_MAX_NAME_SIZE, "__opencl_c_ext_fp64_local_atomic_min_max");
|
||||
openclCFeatures.push_back(openClCFeature);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
|
||||
#include <string>
|
||||
|
||||
using OpenClCFeaturesContainer = StackVec<cl_name_version, 15>;
|
||||
using OpenClCFeaturesContainer = StackVec<cl_name_version, 27>;
|
||||
|
||||
namespace NEO {
|
||||
struct HardwareInfo;
|
||||
|
|
|
@ -76,7 +76,8 @@ const RuntimeCapabilityTable EHL::capabilityTable{
|
|||
false, // p2pAccessSupported
|
||||
false, // p2pAtomicAccessSupported
|
||||
false, // fusedEuEnabled
|
||||
false // l0DebuggerSupported;
|
||||
false, // l0DebuggerSupported;
|
||||
false // supportsFloatAtomics
|
||||
};
|
||||
|
||||
WorkaroundTable EHL::workaroundTable = {};
|
||||
|
|
|
@ -76,7 +76,8 @@ const RuntimeCapabilityTable ICLLP::capabilityTable{
|
|||
false, // p2pAccessSupported
|
||||
false, // p2pAtomicAccessSupported
|
||||
false, // fusedEuEnabled
|
||||
false // l0DebuggerSupported;
|
||||
false, // l0DebuggerSupported;
|
||||
false // supportsFloatAtomics
|
||||
};
|
||||
|
||||
WorkaroundTable ICLLP::workaroundTable = {};
|
||||
|
|
|
@ -76,7 +76,8 @@ const RuntimeCapabilityTable LKF::capabilityTable{
|
|||
false, // p2pAccessSupported
|
||||
false, // p2pAtomicAccessSupported
|
||||
false, // fusedEuEnabled
|
||||
false // l0DebuggerSupported;
|
||||
false, // l0DebuggerSupported;
|
||||
false // supportsFloatAtomics
|
||||
};
|
||||
|
||||
WorkaroundTable LKF::workaroundTable = {};
|
||||
|
|
|
@ -78,7 +78,9 @@ const RuntimeCapabilityTable ADLN::capabilityTable{
|
|||
true, // supportsMediaBlock
|
||||
false, // p2pAccessSupported
|
||||
false, // p2pAtomicAccessSupported
|
||||
true // fusedEuEnabled
|
||||
true, // fusedEuEnabled
|
||||
false, // l0DebuggerSupported;
|
||||
true // supportsFloatAtomics
|
||||
};
|
||||
|
||||
WorkaroundTable ADLN::workaroundTable = {};
|
||||
|
|
|
@ -79,7 +79,8 @@ const RuntimeCapabilityTable ADLP::capabilityTable{
|
|||
false, // p2pAccessSupported
|
||||
false, // p2pAtomicAccessSupported
|
||||
true, // fusedEuEnabled
|
||||
false // l0DebuggerSupported;
|
||||
false, // l0DebuggerSupported;
|
||||
true // supportsFloatAtomics
|
||||
};
|
||||
|
||||
WorkaroundTable ADLP::workaroundTable = {};
|
||||
|
|
|
@ -79,7 +79,8 @@ const RuntimeCapabilityTable ADLS::capabilityTable{
|
|||
false, // p2pAccessSupported
|
||||
false, // p2pAtomicAccessSupported
|
||||
true, // fusedEuEnabled
|
||||
false // l0DebuggerSupported;
|
||||
false, // l0DebuggerSupported;
|
||||
true // supportsFloatAtomics
|
||||
};
|
||||
|
||||
WorkaroundTable ADLS::workaroundTable = {};
|
||||
|
|
|
@ -80,6 +80,7 @@ const RuntimeCapabilityTable DG1::capabilityTable{
|
|||
false, // p2pAtomicAccessSupported
|
||||
true, // fusedEuEnabled
|
||||
true, // l0DebuggerSupported;
|
||||
true // supportsFloatAtomics
|
||||
};
|
||||
|
||||
WorkaroundTable DG1::workaroundTable = {};
|
||||
|
|
|
@ -80,6 +80,7 @@ const RuntimeCapabilityTable RKL::capabilityTable{
|
|||
false, // p2pAtomicAccessSupported
|
||||
true, // fusedEuEnabled
|
||||
false, // l0DebuggerSupported;
|
||||
true // supportsFloatAtomics
|
||||
};
|
||||
|
||||
WorkaroundTable RKL::workaroundTable = {};
|
||||
|
|
|
@ -80,6 +80,7 @@ const RuntimeCapabilityTable TGLLP::capabilityTable{
|
|||
false, // p2pAtomicAccessSupported
|
||||
true, // fusedEuEnabled
|
||||
false, // l0DebuggerSupported;
|
||||
true // supportsFloatAtomics
|
||||
};
|
||||
|
||||
WorkaroundTable TGLLP::workaroundTable = {};
|
||||
|
|
|
@ -76,7 +76,8 @@ const RuntimeCapabilityTable BDW::capabilityTable{
|
|||
false, // p2pAccessSupported
|
||||
false, // p2pAtomicAccessSupported
|
||||
false, // fusedEuEnabled
|
||||
false // l0DebuggerSupported;
|
||||
false, // l0DebuggerSupported;
|
||||
false // supportsFloatAtomics
|
||||
};
|
||||
|
||||
WorkaroundTable BDW::workaroundTable = {};
|
||||
|
|
|
@ -76,7 +76,8 @@ const RuntimeCapabilityTable BXT::capabilityTable{
|
|||
false, // p2pAccessSupported
|
||||
false, // p2pAtomicAccessSupported
|
||||
false, // fusedEuEnabled
|
||||
false // l0DebuggerSupported;
|
||||
false, // l0DebuggerSupported;
|
||||
false // supportsFloatAtomics
|
||||
};
|
||||
|
||||
WorkaroundTable BXT::workaroundTable = {};
|
||||
|
|
|
@ -76,7 +76,8 @@ const RuntimeCapabilityTable CFL::capabilityTable{
|
|||
false, // p2pAccessSupported
|
||||
false, // p2pAtomicAccessSupported
|
||||
false, // fusedEuEnabled
|
||||
false // l0DebuggerSupported;
|
||||
false, // l0DebuggerSupported;
|
||||
false // supportsFloatAtomics
|
||||
};
|
||||
|
||||
WorkaroundTable CFL::workaroundTable = {};
|
||||
|
|
|
@ -76,7 +76,8 @@ const RuntimeCapabilityTable GLK::capabilityTable{
|
|||
false, // p2pAccessSupported
|
||||
false, // p2pAtomicAccessSupported
|
||||
false, // fusedEuEnabled
|
||||
false // l0DebuggerSupported;
|
||||
false, // l0DebuggerSupported;
|
||||
false // supportsFloatAtomics
|
||||
};
|
||||
|
||||
WorkaroundTable GLK::workaroundTable = {};
|
||||
|
|
|
@ -76,7 +76,8 @@ const RuntimeCapabilityTable KBL::capabilityTable{
|
|||
false, // p2pAccessSupported
|
||||
false, // p2pAtomicAccessSupported
|
||||
false, // fusedEuEnabled
|
||||
false // l0DebuggerSupported;
|
||||
false, // l0DebuggerSupported;
|
||||
false // supportsFloatAtomics
|
||||
};
|
||||
|
||||
WorkaroundTable KBL::workaroundTable = {};
|
||||
|
|
|
@ -76,7 +76,8 @@ const RuntimeCapabilityTable SKL::capabilityTable{
|
|||
false, // p2pAccessSupported
|
||||
false, // p2pAtomicAccessSupported
|
||||
false, // fusedEuEnabled
|
||||
false // l0DebuggerSupported;
|
||||
false, // l0DebuggerSupported;
|
||||
false // supportsFloatAtomics
|
||||
};
|
||||
|
||||
WorkaroundTable SKL::workaroundTable = {};
|
||||
|
|
|
@ -64,6 +64,7 @@ struct RuntimeCapabilityTable {
|
|||
bool p2pAtomicAccessSupported;
|
||||
bool fusedEuEnabled;
|
||||
bool l0DebuggerSupported;
|
||||
bool supportsFloatAtomics;
|
||||
};
|
||||
|
||||
inline bool operator==(const RuntimeCapabilityTable &lhs, const RuntimeCapabilityTable &rhs) {
|
||||
|
@ -128,6 +129,7 @@ inline bool operator==(const RuntimeCapabilityTable &lhs, const RuntimeCapabilit
|
|||
result &= (lhs.supportsMediaBlock == rhs.supportsMediaBlock);
|
||||
result &= (lhs.fusedEuEnabled == rhs.fusedEuEnabled);
|
||||
result &= (lhs.l0DebuggerSupported == rhs.l0DebuggerSupported);
|
||||
result &= (lhs.supportsFloatAtomics == rhs.supportsFloatAtomics);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue