diff --git a/level_zero/core/source/device/device_imp.cpp b/level_zero/core/source/device/device_imp.cpp index 3f80513852..059113c456 100644 --- a/level_zero/core/source/device/device_imp.cpp +++ b/level_zero/core/source/device/device_imp.cpp @@ -688,6 +688,11 @@ ze_result_t DeviceImp::getKernelProperties(ze_device_module_properties_t *pKerne pKernelProperties->fp64flags |= ZE_DEVICE_FP_FLAG_ROUNDED_DIVIDE_SQRT; pKernelProperties->fp32flags |= ZE_DEVICE_FP_FLAG_ROUNDED_DIVIDE_SQRT; } + } else if (hardwareInfo.capabilityTable.ftrSupportsFP64Emulation) { + if (neoDevice->getExecutionEnvironment()->isFP64EmulationEnabled()) { + pKernelProperties->flags |= ZE_DEVICE_MODULE_FLAG_FP64; + pKernelProperties->fp64flags = defaultFpFlags | ZE_DEVICE_FP_FLAG_SOFT_FLOAT; + } } } diff --git a/level_zero/core/source/driver/driver.cpp b/level_zero/core/source/driver/driver.cpp index ad17bdb7a3..d855b1e92e 100644 --- a/level_zero/core/source/driver/driver.cpp +++ b/level_zero/core/source/driver/driver.cpp @@ -43,6 +43,8 @@ void DriverImp::initialize(ze_result_t *result) { envReader.getSetting("ZES_ENABLE_SYSMAN", false); envVariables.pciIdDeviceOrder = envReader.getSetting("ZE_ENABLE_PCI_ID_DEVICE_ORDER", false); + envVariables.fp64Emulation = + envReader.getSetting("NEO_FP64_EMULATION", false); auto executionEnvironment = new NEO::ExecutionEnvironment(); UNRECOVERABLE_IF(nullptr == executionEnvironment); @@ -53,6 +55,10 @@ void DriverImp::initialize(ze_result_t *result) { } } + if (envVariables.fp64Emulation) { + executionEnvironment->setFP64EmulationEnabled(); + } + executionEnvironment->setMetricsEnabled(envVariables.metrics); executionEnvironment->incRefInternal(); diff --git a/level_zero/core/source/driver/driver_imp.h b/level_zero/core/source/driver/driver_imp.h index f20a517b68..76d68067a2 100644 --- a/level_zero/core/source/driver/driver_imp.h +++ b/level_zero/core/source/driver/driver_imp.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2021 Intel Corporation + * Copyright (C) 2020-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -32,6 +32,7 @@ struct L0EnvVariables { bool pin; bool sysman; bool pciIdDeviceOrder; + bool fp64Emulation; }; } // namespace L0 diff --git a/level_zero/core/source/module/module_imp.cpp b/level_zero/core/source/module/module_imp.cpp index 21e5c70b07..3050412d1e 100644 --- a/level_zero/core/source/module/module_imp.cpp +++ b/level_zero/core/source/module/module_imp.cpp @@ -63,6 +63,7 @@ NEO::ConstStringRef optLargeRegisterFile = "-ze-opt-large-register-file"; NEO::ConstStringRef optAutoGrf = "-ze-intel-enable-auto-large-GRF-mode"; NEO::ConstStringRef enableLibraryCompile = "-library-compilation"; NEO::ConstStringRef enableGlobalVariableSymbols = "-ze-take-global-address"; +NEO::ConstStringRef enableFP64GenEmu = "-ze-fp64-gen-emu"; } // namespace BuildOptions ModuleTranslationUnit::ModuleTranslationUnit(L0::Device *device) @@ -143,6 +144,10 @@ std::string ModuleTranslationUnit::generateCompilerOptions(const char *buildOpti internalOptions = NEO::CompilerOptions::concatenate(internalOptions, BuildOptions::debugKernelEnable); } + if (neoDevice.getExecutionEnvironment()->isFP64EmulationEnabled()) { + internalOptions = NEO::CompilerOptions::concatenate(internalOptions, BuildOptions::enableFP64GenEmu); + } + const auto &compilerProductHelper = neoDevice.getRootDeviceEnvironment().getHelper(); auto forceToStatelessRequired = compilerProductHelper.isForceToStatelessRequired(); auto statelessToStatefulOptimizationDisabled = NEO::DebugManager.flags.DisableStatelessToStatefulOptimization.get(); diff --git a/level_zero/core/source/module/module_imp.h b/level_zero/core/source/module/module_imp.h index 42476c1076..17d7e27e9b 100644 --- a/level_zero/core/source/module/module_imp.h +++ b/level_zero/core/source/module/module_imp.h @@ -40,6 +40,7 @@ extern NEO::ConstStringRef optLargeRegisterFile; extern NEO::ConstStringRef optAutoGrf; extern NEO::ConstStringRef enableLibraryCompile; extern NEO::ConstStringRef enableGlobalVariableSymbols; +extern NEO::ConstStringRef enableFP64GenEmu; } // namespace BuildOptions diff --git a/level_zero/core/test/unit_tests/sources/device/test_l0_device.cpp b/level_zero/core/test/unit_tests/sources/device/test_l0_device.cpp index 29e8d5e719..904c266fdb 100644 --- a/level_zero/core/test/unit_tests/sources/device/test_l0_device.cpp +++ b/level_zero/core/test/unit_tests/sources/device/test_l0_device.cpp @@ -1861,6 +1861,56 @@ TEST_F(DeviceHasNoDoubleFp64Test, givenDeviceThatDoesntHaveFp64WhenDbgFlagEnable EXPECT_TRUE(kernelProperties.fp64flags & ZE_DEVICE_FP_FLAG_FMA); } +struct DeviceHasNoFp64HasFp64EmulationTest : public ::testing::Test { + void SetUp() override { + HardwareInfo fp64EmulationDevice = *defaultHwInfo; + fp64EmulationDevice.capabilityTable.ftrSupportsFP64 = false; + fp64EmulationDevice.capabilityTable.ftrSupportsFP64Emulation = true; + neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment(&fp64EmulationDevice, rootDeviceIndex); + NEO::DeviceVector devices; + devices.push_back(std::unique_ptr(neoDevice)); + driverHandle = std::make_unique>(); + driverHandle->initialize(std::move(devices)); + device = driverHandle->devices[0]; + } + + std::unique_ptr> driverHandle; + NEO::Device *neoDevice = nullptr; + L0::Device *device = nullptr; + const uint32_t rootDeviceIndex = 1u; + const uint32_t numRootDevices = 1u; +}; + +TEST_F(DeviceHasNoFp64HasFp64EmulationTest, givenDefaultFp64EmulationSettingsAndDeviceSupportingFp64EmulationAndWithoutNativeFp64ThenReportCorrectFp64Flags) { + ze_device_module_properties_t kernelProperties = {}; + memset(&kernelProperties, std::numeric_limits::max(), sizeof(ze_device_module_properties_t)); + kernelProperties.pNext = nullptr; + + device->getKernelProperties(&kernelProperties); + EXPECT_FALSE(kernelProperties.flags & ZE_DEVICE_MODULE_FLAG_FP64); + EXPECT_FALSE(kernelProperties.fp64flags & ZE_DEVICE_FP_FLAG_SOFT_FLOAT); + EXPECT_EQ(0u, kernelProperties.fp64flags); +} + +TEST_F(DeviceHasNoFp64HasFp64EmulationTest, givenFp64EmulationEnabledAndDeviceSupportingFp64EmulationAndWithoutNativeFp64ThenReportCorrectFp64Flags) { + neoDevice->getExecutionEnvironment()->setFP64EmulationEnabled(); + ze_device_module_properties_t kernelProperties = {}; + memset(&kernelProperties, std::numeric_limits::max(), sizeof(ze_device_module_properties_t)); + kernelProperties.pNext = nullptr; + + device->getKernelProperties(&kernelProperties); + EXPECT_TRUE(kernelProperties.flags & ZE_DEVICE_MODULE_FLAG_FP64); + EXPECT_TRUE(kernelProperties.fp64flags & ZE_DEVICE_FP_FLAG_SOFT_FLOAT); + + ze_device_fp_flags_t defaultFpFlags = static_cast(ZE_DEVICE_FP_FLAG_ROUND_TO_NEAREST | + ZE_DEVICE_FP_FLAG_ROUND_TO_ZERO | + ZE_DEVICE_FP_FLAG_ROUND_TO_INF | + ZE_DEVICE_FP_FLAG_INF_NAN | + ZE_DEVICE_FP_FLAG_DENORM | + ZE_DEVICE_FP_FLAG_FMA); + EXPECT_EQ(defaultFpFlags, kernelProperties.fp64flags & defaultFpFlags); +} + struct DeviceHasFp64Test : public ::testing::Test { void SetUp() override { DebugManager.flags.CreateMultipleRootDevices.set(numRootDevices); diff --git a/level_zero/core/test/unit_tests/sources/driver/test_driver.cpp b/level_zero/core/test/unit_tests/sources/driver/test_driver.cpp index 985345e380..1372de679e 100644 --- a/level_zero/core/test/unit_tests/sources/driver/test_driver.cpp +++ b/level_zero/core/test/unit_tests/sources/driver/test_driver.cpp @@ -413,6 +413,28 @@ TEST(DriverImpTest, givenEnabledProgramDebuggingWhenCreatingExecutionEnvironment L0::GlobalDriver = nullptr; } +TEST(DriverImpTest, givenEnabledFP64EmulationWhenCreatingExecutionEnvironmentThenFP64EmulationIsEnabled) { + + NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get(); + hwInfo.capabilityTable.levelZeroSupported = true; + + VariableBackup mockGetenvCalledBackup(&IoFunctions::mockGetenvCalled, 0); + std::unordered_map mockableEnvs = {{"NEO_FP64_EMULATION", "1"}}; + VariableBackup *> mockableEnvValuesBackup(&IoFunctions::mockableEnvValues, &mockableEnvs); + + ze_result_t result = ZE_RESULT_ERROR_UNINITIALIZED; + DriverImp driverImp; + driverImp.initialize(&result); + + ASSERT_NE(nullptr, L0::GlobalDriver); + ASSERT_NE(0u, L0::GlobalDriver->numDevices); + EXPECT_TRUE(L0::GlobalDriver->devices[0]->getNEODevice()->getExecutionEnvironment()->isFP64EmulationEnabled()); + + delete L0::GlobalDriver; + L0::GlobalDriverHandle = nullptr; + L0::GlobalDriver = nullptr; +} + TEST(DriverImpTest, givenEnabledProgramDebuggingAndEnabledExperimentalOpenCLWhenCreatingExecutionEnvironmentThenDebuggingEnabledIsFalse) { DebugManagerStateRestore restorer; NEO::HardwareInfo hwInfo = *NEO::defaultHwInfo.get(); diff --git a/level_zero/core/test/unit_tests/sources/module/test_module_2.cpp b/level_zero/core/test/unit_tests/sources/module/test_module_2.cpp index cc26037fb0..bf300e779d 100644 --- a/level_zero/core/test/unit_tests/sources/module/test_module_2.cpp +++ b/level_zero/core/test/unit_tests/sources/module/test_module_2.cpp @@ -375,5 +375,45 @@ TEST_F(ModuleTests, givenDefaultGrfFlagSetWhenCreatingModuleThenOverrideInternal EXPECT_NE(pMockCompilerInterface->inputInternalOptions.find("-cl-intel-128-GRF-per-thread"), std::string::npos); } +TEST_F(ModuleTests, givenFP64EmulationDisabledWhenCreatingModuleThenEnableFP64GenEmuOptionIsNotPresent) { + auto cip = new NEO::MockCompilerInterfaceCaptureBuildOptions(); + device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]->compilerInterface.reset(cip); + ASSERT_FALSE(device->getNEODevice()->getExecutionEnvironment()->isFP64EmulationEnabled()); + + uint8_t binary[10]; + ze_module_desc_t moduleDesc = {}; + moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; + moduleDesc.pInputModule = binary; + moduleDesc.inputSize = 10; + + ModuleBuildLog *moduleBuildLog = nullptr; + + auto module = std::unique_ptr(new L0::ModuleImp(device, moduleBuildLog, ModuleType::Builtin)); + ASSERT_NE(nullptr, module.get()); + module->initialize(&moduleDesc, device->getNEODevice()); + + EXPECT_FALSE(CompilerOptions::contains(cip->buildInternalOptions, BuildOptions::enableFP64GenEmu)); +}; + +TEST_F(ModuleTests, givenFP64EmulationEnabledWhenCreatingModuleThenEnableFP64GenEmuOptionIsPresent) { + auto cip = new NEO::MockCompilerInterfaceCaptureBuildOptions(); + device->getNEODevice()->getExecutionEnvironment()->rootDeviceEnvironments[device->getRootDeviceIndex()]->compilerInterface.reset(cip); + device->getNEODevice()->getExecutionEnvironment()->setFP64EmulationEnabled(); + + uint8_t binary[10]; + ze_module_desc_t moduleDesc = {}; + moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV; + moduleDesc.pInputModule = binary; + moduleDesc.inputSize = 10; + + ModuleBuildLog *moduleBuildLog = nullptr; + + auto module = std::unique_ptr(new L0::ModuleImp(device, moduleBuildLog, ModuleType::Builtin)); + ASSERT_NE(nullptr, module.get()); + module->initialize(&moduleDesc, device->getNEODevice()); + + EXPECT_TRUE(CompilerOptions::contains(cip->buildInternalOptions, BuildOptions::enableFP64GenEmu)); +}; + } // namespace ult } // namespace L0 diff --git a/opencl/source/api/api.cpp b/opencl/source/api/api.cpp index 629c9e67bd..4b08af5ee6 100644 --- a/opencl/source/api/api.cpp +++ b/opencl/source/api/api.cpp @@ -87,13 +87,16 @@ cl_int CL_API_CALL clGetPlatformIDs(cl_uint numEntries, auto executionEnvironment = new ClExecutionEnvironment(); executionEnvironment->incRefInternal(); + NEO::EnvironmentVariableReader envReader; if (NEO::DebugManager.flags.ExperimentalEnableL0DebuggerForOpenCL.get()) { - NEO::EnvironmentVariableReader envReader; auto programDebugging = envReader.getSetting("ZET_ENABLE_PROGRAM_DEBUGGING", false); if (programDebugging) { executionEnvironment->setDebuggingEnabled(); } } + if (envReader.getSetting("NEO_FP64_EMULATION", false)) { + executionEnvironment->setFP64EmulationEnabled(); + } auto allDevices = DeviceFactory::createDevices(*executionEnvironment); executionEnvironment->decRefInternal(); if (allDevices.empty()) { diff --git a/opencl/source/cl_device/cl_device_caps.cpp b/opencl/source/cl_device/cl_device_caps.cpp index 1d5d352271..bd92248f77 100644 --- a/opencl/source/cl_device/cl_device_caps.cpp +++ b/opencl/source/cl_device/cl_device_caps.cpp @@ -7,6 +7,7 @@ #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/device/device.h" +#include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/helpers/hw_info.h" @@ -59,9 +60,17 @@ void ClDevice::setupFp64Flags() { deviceInfo.nativeVectorWidthDouble = 1; deviceInfo.preferredVectorWidthDouble = 1; } else { - deviceInfo.doubleFpConfig = 0; - deviceInfo.nativeVectorWidthDouble = 0; - deviceInfo.preferredVectorWidthDouble = 0; + if (hwInfo.capabilityTable.ftrSupportsFP64Emulation) { + if (getDevice().getExecutionEnvironment()->isFP64EmulationEnabled()) { + deviceInfo.doubleFpConfig = defaultFpFlags | CL_FP_SOFT_FLOAT; + deviceInfo.nativeVectorWidthDouble = 1; + deviceInfo.preferredVectorWidthDouble = 1; + } + } else { + deviceInfo.doubleFpConfig = 0; + deviceInfo.nativeVectorWidthDouble = 0; + deviceInfo.preferredVectorWidthDouble = 0; + } } deviceInfo.singleFpConfig = static_cast( diff --git a/opencl/source/program/program.cpp b/opencl/source/program/program.cpp index d195032b27..c38bff6b4b 100644 --- a/opencl/source/program/program.cpp +++ b/opencl/source/program/program.cpp @@ -99,6 +99,10 @@ std::string Program::getInternalOptions() const { CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::enableImageSupport); } + if (pClDevice->getDevice().getExecutionEnvironment()->isFP64EmulationEnabled()) { + CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::enableFP64GenEmu); + } + CompilerOptions::concatenateAppend(internalOptions, CompilerOptions::preserveVec3Type); auto isDebuggerActive = pClDevice->getDevice().isDebuggerActive() || pClDevice->getDevice().getDebugger() != nullptr; CompilerOptions::concatenateAppend(internalOptions, compilerProductHelper.getCachingPolicyOptions(isDebuggerActive)); diff --git a/opencl/test/unit_test/api/cl_get_platform_ids_tests.inl b/opencl/test/unit_test/api/cl_get_platform_ids_tests.inl index b9c8c925fb..3b5fa40ac9 100644 --- a/opencl/test/unit_test/api/cl_get_platform_ids_tests.inl +++ b/opencl/test/unit_test/api/cl_get_platform_ids_tests.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2022 Intel Corporation + * Copyright (C) 2018-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -215,4 +215,48 @@ TEST(clGetPlatformIDsTest, givenEnabledExperimentalSupportAndZeroProgramDebuggin platformsImpl->clear(); } + +TEST(clGetPlatformIDsTest, givenEnabledFP64EmulationWhenGettingPlatformIdsThenFP64EmulationIsEnabled) { + VariableBackup mockGetenvCalledBackup(&IoFunctions::mockGetenvCalled, 0); + std::unordered_map mockableEnvs = {{"NEO_FP64_EMULATION", "1"}}; + VariableBackup *> mockableEnvValuesBackup(&IoFunctions::mockableEnvValues, &mockableEnvs); + + cl_int retVal = CL_SUCCESS; + cl_platform_id platformRet = nullptr; + cl_uint numPlatforms = 0; + + platformsImpl->clear(); + + retVal = clGetPlatformIDs(1, &platformRet, &numPlatforms); + + EXPECT_EQ(CL_SUCCESS, retVal); + + ASSERT_NE(nullptr, platformsImpl); + auto executionEnvironment = platform()->peekExecutionEnvironment(); + EXPECT_TRUE(executionEnvironment->isFP64EmulationEnabled()); + + platformsImpl->clear(); +} + +TEST(clGetPlatformIDsTest, givenDefaultFP64EmulationStateWhenGettingPlatformIdsThenFP64EmulationIsDisabled) { + VariableBackup mockGetenvCalledBackup(&IoFunctions::mockGetenvCalled, 0); + std::unordered_map mockableEnvs = {{"NEO_FP64_EMULATION", "0"}}; + VariableBackup *> mockableEnvValuesBackup(&IoFunctions::mockableEnvValues, &mockableEnvs); + + cl_int retVal = CL_SUCCESS; + cl_platform_id platformRet = nullptr; + cl_uint numPlatforms = 0; + + platformsImpl->clear(); + + retVal = clGetPlatformIDs(1, &platformRet, &numPlatforms); + + EXPECT_EQ(CL_SUCCESS, retVal); + + ASSERT_NE(nullptr, platformsImpl); + auto executionEnvironment = platform()->peekExecutionEnvironment(); + EXPECT_FALSE(executionEnvironment->isDebuggingEnabled()); + + platformsImpl->clear(); +} } // namespace ULT diff --git a/opencl/test/unit_test/device/device_caps_tests.cpp b/opencl/test/unit_test/device/device_caps_tests.cpp index 80dec2e32b..c514a778c2 100644 --- a/opencl/test/unit_test/device/device_caps_tests.cpp +++ b/opencl/test/unit_test/device/device_caps_tests.cpp @@ -1072,6 +1072,49 @@ TEST_F(DeviceGetCapsTest, givenFp64SupportForcedWhenCheckingFp64SupportThenFp64I } } +TEST_F(DeviceGetCapsTest, givenFp64EmulationSupportWithoutFp64EmulationEnvVarWhenCreatingDeviceThenDeviceCapsAreSetCorrectly) { + auto hwInfo = *defaultHwInfo; + + hwInfo.capabilityTable.ftrSupportsFP64 = false; + hwInfo.capabilityTable.ftrSupportsFP64Emulation = true; + + auto executionEnvironment = MockClDevice::prepareExecutionEnvironment(&hwInfo, 0); + auto pClDevice = std::make_unique(MockDevice::createWithExecutionEnvironment(&hwInfo, executionEnvironment, 0)); + + auto &caps = pClDevice->getDeviceInfo(); + std::string extensionString = pClDevice->getDeviceInfo().deviceExtensions; + + EXPECT_EQ(std::string::npos, extensionString.find(std::string("cl_khr_fp64"))); + EXPECT_FALSE(isValueSet(caps.doubleFpConfig, CL_FP_SOFT_FLOAT)); +} + +TEST_F(DeviceGetCapsTest, givenFp64EmulationSupportWithFp64EmulationEnvVarSetWhenCreatingDeviceThenDeviceCapsAreSetCorrectly) { + auto hwInfo = *defaultHwInfo; + + hwInfo.capabilityTable.ftrSupportsFP64 = false; + hwInfo.capabilityTable.ftrSupportsFP64Emulation = true; + + auto executionEnvironment = MockClDevice::prepareExecutionEnvironment(&hwInfo, 0); + executionEnvironment->setFP64EmulationEnabled(); + auto pClDevice = std::make_unique(MockDevice::createWithExecutionEnvironment(&hwInfo, executionEnvironment, 0)); + + auto &caps = pClDevice->getDeviceInfo(); + std::string extensionString = pClDevice->getDeviceInfo().deviceExtensions; + + EXPECT_EQ(std::string::npos, extensionString.find(std::string("cl_khr_fp64"))); + EXPECT_TRUE(isValueSet(caps.doubleFpConfig, CL_FP_SOFT_FLOAT)); + + cl_device_fp_config defaultFpFlags = static_cast(CL_FP_ROUND_TO_NEAREST | + CL_FP_ROUND_TO_ZERO | + CL_FP_ROUND_TO_INF | + CL_FP_INF_NAN | + CL_FP_DENORM | + CL_FP_FMA); + EXPECT_EQ(defaultFpFlags, caps.doubleFpConfig & defaultFpFlags); + EXPECT_EQ(1u, caps.nativeVectorWidthDouble); + EXPECT_EQ(1u, caps.preferredVectorWidthDouble); +} + TEST(DeviceGetCaps, WhenPeekingCompilerExtensionsThenCompilerExtensionsAreReturned) { UltClDeviceFactory deviceFactory{1, 0}; auto pClDevice = deviceFactory.rootDevices[0]; diff --git a/opencl/test/unit_test/program/program_tests.cpp b/opencl/test/unit_test/program/program_tests.cpp index 67f41ee93e..af695e09d6 100644 --- a/opencl/test/unit_test/program/program_tests.cpp +++ b/opencl/test/unit_test/program/program_tests.cpp @@ -1765,6 +1765,20 @@ TEST_F(ProgramTests, Force32BitAddressessWhenProgramIsCreatedThenGreaterThan4gbB } } +TEST_F(ProgramTests, givenFp64EmulationInDefaultStateWhenProgramIsCreatedThenEnableFP64GenEmuBuildOptionIsNotPresent) { + std::unique_ptr program{Program::createBuiltInFromSource("", pContext, pContext->getDevices(), nullptr)}; + auto internalOptions = program->getInternalOptions(); + EXPECT_FALSE(CompilerOptions::contains(internalOptions, NEO::CompilerOptions::enableFP64GenEmu)) << internalOptions; +} + +TEST_F(ProgramTests, givenFp64EmulationEnabledTheWhenProgramIsCreatedThenEnableFP64GenEmuBuildOptionIsPresent) { + std::unique_ptr program{Program::createBuiltInFromSource("", pContext, pContext->getDevices(), nullptr)}; + ASSERT_FALSE(pDevice->getExecutionEnvironment()->isFP64EmulationEnabled()); + pDevice->getExecutionEnvironment()->setFP64EmulationEnabled(); + auto internalOptions = program->getInternalOptions(); + EXPECT_TRUE(CompilerOptions::contains(internalOptions, NEO::CompilerOptions::enableFP64GenEmu)) << internalOptions; +} + TEST_F(ProgramTests, whenContainsStatefulAccessIsCalledThenReturnCorrectResult) { std::vector> testParams = { {false, undefined, undefined}, diff --git a/shared/source/compiler_interface/compiler_options.h b/shared/source/compiler_interface/compiler_options.h index bf4d4481ac..aed1e46bfc 100644 --- a/shared/source/compiler_interface/compiler_options.h +++ b/shared/source/compiler_interface/compiler_options.h @@ -44,6 +44,7 @@ inline constexpr ConstStringRef largeGrf = "-cl-intel-256-GRF-per-thread"; inline constexpr ConstStringRef autoGrf = "-cl-intel-enable-auto-large-GRF-mode"; inline constexpr ConstStringRef numThreadsPerEu = "-cl-intel-reqd-eu-thread-count"; inline constexpr ConstStringRef useCMCompiler = "-cmc"; +inline constexpr ConstStringRef enableFP64GenEmu = "-cl-fp64-gen-emu"; inline constexpr size_t nullterminateSize = 1U; inline constexpr size_t spaceSeparatorSize = 1U; diff --git a/shared/source/execution_environment/execution_environment.h b/shared/source/execution_environment/execution_environment.h index 4de800baaf..8c0ac91cfa 100644 --- a/shared/source/execution_environment/execution_environment.h +++ b/shared/source/execution_environment/execution_environment.h @@ -42,6 +42,10 @@ class ExecutionEnvironment : public ReferenceTrackedObject this->metricsEnabled = value; } bool areMetricsEnabled() { return this->metricsEnabled; } + void setFP64EmulationEnabled() { + fp64EmulationEnabled = true; + } + bool isFP64EmulationEnabled() const { return fp64EmulationEnabled; } DirectSubmissionController *initializeDirectSubmissionController(); std::unique_ptr memoryManager; @@ -56,6 +60,7 @@ class ExecutionEnvironment : public ReferenceTrackedObject void configureNeoEnvironment(); bool debuggingEnabled = false; bool metricsEnabled = false; + bool fp64EmulationEnabled = false; std::unordered_map rootDeviceNumCcsMap; }; } // namespace NEO diff --git a/shared/source/gen11/hw_info_ehl.cpp b/shared/source/gen11/hw_info_ehl.cpp index 49ea9c77f2..da6859ddee 100644 --- a/shared/source/gen11/hw_info_ehl.cpp +++ b/shared/source/gen11/hw_info_ehl.cpp @@ -51,6 +51,7 @@ const RuntimeCapabilityTable EHL::capabilityTable{ false, // blitterOperationsSupported false, // ftrSupportsInteger64BitAtomics false, // ftrSupportsFP64 + false, // ftrSupportsFP64Emulation false, // ftrSupports64BitMath false, // ftrSvm false, // ftrSupportsCoherency diff --git a/shared/source/gen11/hw_info_icllp.cpp b/shared/source/gen11/hw_info_icllp.cpp index 763e8a38a8..ccc4ef747e 100644 --- a/shared/source/gen11/hw_info_icllp.cpp +++ b/shared/source/gen11/hw_info_icllp.cpp @@ -51,6 +51,7 @@ const RuntimeCapabilityTable ICLLP::capabilityTable{ false, // blitterOperationsSupported true, // ftrSupportsInteger64BitAtomics false, // ftrSupportsFP64 + false, // ftrSupportsFP64Emulation false, // ftrSupports64BitMath true, // ftrSvm false, // ftrSupportsCoherency diff --git a/shared/source/gen11/hw_info_lkf.cpp b/shared/source/gen11/hw_info_lkf.cpp index 6c97c1949d..81eaf98562 100644 --- a/shared/source/gen11/hw_info_lkf.cpp +++ b/shared/source/gen11/hw_info_lkf.cpp @@ -51,6 +51,7 @@ const RuntimeCapabilityTable LKF::capabilityTable{ false, // blitterOperationsSupported false, // ftrSupportsInteger64BitAtomics false, // ftrSupportsFP64 + false, // ftrSupportsFP64Emulation false, // ftrSupports64BitMath false, // ftrSvm false, // ftrSupportsCoherency diff --git a/shared/source/gen12lp/hw_info_adln.cpp b/shared/source/gen12lp/hw_info_adln.cpp index d4583c9d75..ee0a0217f9 100644 --- a/shared/source/gen12lp/hw_info_adln.cpp +++ b/shared/source/gen12lp/hw_info_adln.cpp @@ -54,6 +54,7 @@ const RuntimeCapabilityTable ADLN::capabilityTable{ false, // blitterOperationsSupported true, // ftrSupportsInteger64BitAtomics false, // ftrSupportsFP64 + false, // ftrSupportsFP64Emulation false, // ftrSupports64BitMath true, // ftrSvm false, // ftrSupportsCoherency diff --git a/shared/source/gen12lp/hw_info_adlp.cpp b/shared/source/gen12lp/hw_info_adlp.cpp index 78a900429f..436a543a66 100644 --- a/shared/source/gen12lp/hw_info_adlp.cpp +++ b/shared/source/gen12lp/hw_info_adlp.cpp @@ -54,6 +54,7 @@ const RuntimeCapabilityTable ADLP::capabilityTable{ false, // blitterOperationsSupported true, // ftrSupportsInteger64BitAtomics false, // ftrSupportsFP64 + false, // ftrSupportsFP64Emulation false, // ftrSupports64BitMath true, // ftrSvm false, // ftrSupportsCoherency diff --git a/shared/source/gen12lp/hw_info_adls.cpp b/shared/source/gen12lp/hw_info_adls.cpp index b644a2473f..546edbd95a 100644 --- a/shared/source/gen12lp/hw_info_adls.cpp +++ b/shared/source/gen12lp/hw_info_adls.cpp @@ -54,6 +54,7 @@ const RuntimeCapabilityTable ADLS::capabilityTable{ false, // blitterOperationsSupported true, // ftrSupportsInteger64BitAtomics false, // ftrSupportsFP64 + false, // ftrSupportsFP64Emulation false, // ftrSupports64BitMath true, // ftrSvm false, // ftrSupportsCoherency diff --git a/shared/source/gen12lp/hw_info_dg1.cpp b/shared/source/gen12lp/hw_info_dg1.cpp index e37adafdb0..e5fffc8b3c 100644 --- a/shared/source/gen12lp/hw_info_dg1.cpp +++ b/shared/source/gen12lp/hw_info_dg1.cpp @@ -54,6 +54,7 @@ const RuntimeCapabilityTable DG1::capabilityTable{ false, // blitterOperationsSupported true, // ftrSupportsInteger64BitAtomics false, // ftrSupportsFP64 + false, // ftrSupportsFP64Emulation false, // ftrSupports64BitMath true, // ftrSvm false, // ftrSupportsCoherency diff --git a/shared/source/gen12lp/hw_info_rkl.cpp b/shared/source/gen12lp/hw_info_rkl.cpp index 236b454362..a004c5204e 100644 --- a/shared/source/gen12lp/hw_info_rkl.cpp +++ b/shared/source/gen12lp/hw_info_rkl.cpp @@ -54,6 +54,7 @@ const RuntimeCapabilityTable RKL::capabilityTable{ false, // blitterOperationsSupported true, // ftrSupportsInteger64BitAtomics false, // ftrSupportsFP64 + false, // ftrSupportsFP64Emulation false, // ftrSupports64BitMath true, // ftrSvm false, // ftrSupportsCoherency diff --git a/shared/source/gen12lp/hw_info_tgllp.cpp b/shared/source/gen12lp/hw_info_tgllp.cpp index 58f2143fdb..e19bd0a307 100644 --- a/shared/source/gen12lp/hw_info_tgllp.cpp +++ b/shared/source/gen12lp/hw_info_tgllp.cpp @@ -54,6 +54,7 @@ const RuntimeCapabilityTable TGLLP::capabilityTable{ false, // blitterOperationsSupported true, // ftrSupportsInteger64BitAtomics false, // ftrSupportsFP64 + false, // ftrSupportsFP64Emulation false, // ftrSupports64BitMath true, // ftrSvm false, // ftrSupportsCoherency diff --git a/shared/source/gen8/hw_info_bdw.cpp b/shared/source/gen8/hw_info_bdw.cpp index 32a9b6ce68..1e11a415d9 100644 --- a/shared/source/gen8/hw_info_bdw.cpp +++ b/shared/source/gen8/hw_info_bdw.cpp @@ -51,6 +51,7 @@ const RuntimeCapabilityTable BDW::capabilityTable{ false, // blitterOperationsSupported true, // ftrSupportsInteger64BitAtomics true, // ftrSupportsFP64 + false, // ftrSupportsFP64Emulation true, // ftrSupports64BitMath true, // ftrSvm true, // ftrSupportsCoherency diff --git a/shared/source/gen9/hw_info_bxt.cpp b/shared/source/gen9/hw_info_bxt.cpp index b36822d653..4191407a4d 100644 --- a/shared/source/gen9/hw_info_bxt.cpp +++ b/shared/source/gen9/hw_info_bxt.cpp @@ -51,6 +51,7 @@ const RuntimeCapabilityTable BXT::capabilityTable{ false, // blitterOperationsSupported false, // ftrSupportsInteger64BitAtomics true, // ftrSupportsFP64 + false, // ftrSupportsFP64Emulation true, // ftrSupports64BitMath false, // ftrSvm true, // ftrSupportsCoherency diff --git a/shared/source/gen9/hw_info_cfl.cpp b/shared/source/gen9/hw_info_cfl.cpp index ed3cd2e6ad..fc9d0d6598 100644 --- a/shared/source/gen9/hw_info_cfl.cpp +++ b/shared/source/gen9/hw_info_cfl.cpp @@ -51,6 +51,7 @@ const RuntimeCapabilityTable CFL::capabilityTable{ false, // blitterOperationsSupported true, // ftrSupportsInteger64BitAtomics true, // ftrSupportsFP64 + false, // ftrSupportsFP64Emulation true, // ftrSupports64BitMath true, // ftrSvm true, // ftrSupportsCoherency diff --git a/shared/source/gen9/hw_info_glk.cpp b/shared/source/gen9/hw_info_glk.cpp index 272d372531..7f168181ba 100644 --- a/shared/source/gen9/hw_info_glk.cpp +++ b/shared/source/gen9/hw_info_glk.cpp @@ -51,6 +51,7 @@ const RuntimeCapabilityTable GLK::capabilityTable{ false, // blitterOperationsSupported false, // ftrSupportsInteger64BitAtomics true, // ftrSupportsFP64 + false, // ftrSupportsFP64Emulation true, // ftrSupports64BitMath false, // ftrSvm true, // ftrSupportsCoherency diff --git a/shared/source/gen9/hw_info_kbl.cpp b/shared/source/gen9/hw_info_kbl.cpp index aaf6f11b98..cdb34d5bfe 100644 --- a/shared/source/gen9/hw_info_kbl.cpp +++ b/shared/source/gen9/hw_info_kbl.cpp @@ -51,6 +51,7 @@ const RuntimeCapabilityTable KBL::capabilityTable{ false, // blitterOperationsSupported true, // ftrSupportsInteger64BitAtomics true, // ftrSupportsFP64 + false, // ftrSupportsFP64Emulation true, // ftrSupports64BitMath true, // ftrSvm true, // ftrSupportsCoherency diff --git a/shared/source/gen9/hw_info_skl.cpp b/shared/source/gen9/hw_info_skl.cpp index 01d73e71d1..c1f3d93c6e 100644 --- a/shared/source/gen9/hw_info_skl.cpp +++ b/shared/source/gen9/hw_info_skl.cpp @@ -51,6 +51,7 @@ const RuntimeCapabilityTable SKL::capabilityTable{ false, // blitterOperationsSupported true, // ftrSupportsInteger64BitAtomics true, // ftrSupportsFP64 + false, // ftrSupportsFP64Emulation true, // ftrSupports64BitMath true, // ftrSvm true, // ftrSupportsCoherency diff --git a/shared/source/helpers/hw_info.h b/shared/source/helpers/hw_info.h index 123106276f..924ed21360 100644 --- a/shared/source/helpers/hw_info.h +++ b/shared/source/helpers/hw_info.h @@ -38,6 +38,7 @@ struct RuntimeCapabilityTable { bool blitterOperationsSupported; bool ftrSupportsInteger64BitAtomics; bool ftrSupportsFP64; + bool ftrSupportsFP64Emulation; bool ftrSupports64BitMath; bool ftrSvm; bool ftrSupportsCoherency; @@ -104,6 +105,7 @@ inline bool operator==(const RuntimeCapabilityTable &lhs, const RuntimeCapabilit result &= (lhs.blitterOperationsSupported == rhs.blitterOperationsSupported); result &= (lhs.ftrSupportsInteger64BitAtomics == rhs.ftrSupportsInteger64BitAtomics); result &= (lhs.ftrSupportsFP64 == rhs.ftrSupportsFP64); + result &= (lhs.ftrSupportsFP64Emulation == rhs.ftrSupportsFP64Emulation); result &= (lhs.ftrSupports64BitMath == rhs.ftrSupports64BitMath); result &= (lhs.ftrSvm == rhs.ftrSvm); result &= (lhs.ftrSupportsCoherency == rhs.ftrSupportsCoherency); diff --git a/shared/source/xe_hp_core/hw_info_xe_hp_sdv.cpp b/shared/source/xe_hp_core/hw_info_xe_hp_sdv.cpp index 0f678be5e5..6b09d2c0ba 100644 --- a/shared/source/xe_hp_core/hw_info_xe_hp_sdv.cpp +++ b/shared/source/xe_hp_core/hw_info_xe_hp_sdv.cpp @@ -54,6 +54,7 @@ const RuntimeCapabilityTable XE_HP_SDV::capabilityTable{ false, // blitterOperationsSupported true, // ftrSupportsInteger64BitAtomics true, // ftrSupportsFP64 + false, // ftrSupportsFP64Emulation true, // ftrSupports64BitMath true, // ftrSvm false, // ftrSupportsCoherency diff --git a/shared/source/xe_hpc_core/hw_info_pvc.cpp b/shared/source/xe_hpc_core/hw_info_pvc.cpp index ce6457e796..7c89ce849a 100644 --- a/shared/source/xe_hpc_core/hw_info_pvc.cpp +++ b/shared/source/xe_hpc_core/hw_info_pvc.cpp @@ -65,6 +65,7 @@ const RuntimeCapabilityTable PVC::capabilityTable{ false, // blitterOperationsSupported true, // ftrSupportsInteger64BitAtomics true, // ftrSupportsFP64 + false, // ftrSupportsFP64Emulation true, // ftrSupports64BitMath true, // ftrSvm false, // ftrSupportsCoherency diff --git a/shared/source/xe_hpg_core/hw_info_dg2.cpp b/shared/source/xe_hpg_core/hw_info_dg2.cpp index 0837a92e0a..a7dda8a4ef 100644 --- a/shared/source/xe_hpg_core/hw_info_dg2.cpp +++ b/shared/source/xe_hpg_core/hw_info_dg2.cpp @@ -57,6 +57,7 @@ const RuntimeCapabilityTable DG2::capabilityTable{ false, // blitterOperationsSupported true, // ftrSupportsInteger64BitAtomics false, // ftrSupportsFP64 + true, // ftrSupportsFP64Emulation true, // ftrSupports64BitMath true, // ftrSvm false, // ftrSupportsCoherency diff --git a/shared/source/xe_hpg_core/hw_info_mtl.cpp b/shared/source/xe_hpg_core/hw_info_mtl.cpp index 1fa6c5ce3a..996fd77267 100644 --- a/shared/source/xe_hpg_core/hw_info_mtl.cpp +++ b/shared/source/xe_hpg_core/hw_info_mtl.cpp @@ -53,6 +53,7 @@ const RuntimeCapabilityTable MTL::capabilityTable{ false, // blitterOperationsSupported true, // ftrSupportsInteger64BitAtomics true, // ftrSupportsFP64 + false, // ftrSupportsFP64Emulation true, // ftrSupports64BitMath true, // ftrSvm false, // ftrSupportsCoherency diff --git a/shared/test/unit_test/execution_environment/execution_environment_tests.cpp b/shared/test/unit_test/execution_environment/execution_environment_tests.cpp index e36ff3fcba..b7a6ce5c6b 100644 --- a/shared/test/unit_test/execution_environment/execution_environment_tests.cpp +++ b/shared/test/unit_test/execution_environment/execution_environment_tests.cpp @@ -444,3 +444,15 @@ TEST(ExecutionEnvironment, whenCalculateMaxOsContexCountThenGlobalVariableHasPro EXPECT_EQ(expectedOsContextCount + expectedOsContextCountForCcs, MemoryManager::maxOsContextCount); } } + +TEST(ExecutionEnvironment, givenDefaultExecutionEnvironmentSettingsWhenCheckingFP64EmulationThenFP64EmulationIsDisabled) { + ExecutionEnvironment executionEnvironment{}; + EXPECT_FALSE(executionEnvironment.isFP64EmulationEnabled()); +} + +TEST(ExecutionEnvironment, givenExecutionEnvironmentWhenSettingFP64EmulationEnabledThenFP64EmulationIsEnabled) { + ExecutionEnvironment executionEnvironment{}; + ASSERT_FALSE(executionEnvironment.isFP64EmulationEnabled()); + executionEnvironment.setFP64EmulationEnabled(); + EXPECT_TRUE(executionEnvironment.isFP64EmulationEnabled()); +} diff --git a/shared/test/unit_test/xe_hpg_core/dg2/test_device_caps_dg2.cpp b/shared/test/unit_test/xe_hpg_core/dg2/test_device_caps_dg2.cpp index 59789d8bbf..7a9c602a8b 100644 --- a/shared/test/unit_test/xe_hpg_core/dg2/test_device_caps_dg2.cpp +++ b/shared/test/unit_test/xe_hpg_core/dg2/test_device_caps_dg2.cpp @@ -23,6 +23,10 @@ DG2TEST_F(Dg2UsDeviceIdTest, givenDg2ProductWhenCheckFp64SupportThenReturnFalse) EXPECT_FALSE(pDevice->getHardwareInfo().capabilityTable.ftrSupportsFP64); } +DG2TEST_F(Dg2UsDeviceIdTest, givenDg2ProductWhenCheckFp64EmulationSupportThenReturnTrue) { + EXPECT_TRUE(pDevice->getHardwareInfo().capabilityTable.ftrSupportsFP64Emulation); +} + DG2TEST_F(Dg2UsDeviceIdTest, givenEnabledFtrPooledEuA0SteppingAndG10DevIdWhenCalculatingMaxEuPerSSThenDontIgnoreEuCountPerPoolMin) { HardwareInfo myHwInfo = *defaultHwInfo; GT_SYSTEM_INFO &mySysInfo = myHwInfo.gtSystemInfo;