diff --git a/opencl/test/unit_test/xe_hpc_core/pvc/test_cl_device_caps_pvc.cpp b/opencl/test/unit_test/xe_hpc_core/pvc/test_cl_device_caps_pvc.cpp index 715f5849fe..ed7e92de66 100644 --- a/opencl/test/unit_test/xe_hpc_core/pvc/test_cl_device_caps_pvc.cpp +++ b/opencl/test/unit_test/xe_hpc_core/pvc/test_cl_device_caps_pvc.cpp @@ -21,6 +21,7 @@ PVCTEST_F(PvcClDeviceCapsTests, givenPvcProductWhenDeviceCapsInitializedThenAddP EXPECT_TRUE(hasSubstr(dInfo.deviceExtensions, std::string("cl_intel_create_buffer_with_properties"))); EXPECT_TRUE(hasSubstr(dInfo.deviceExtensions, std::string("cl_intel_subgroup_local_block_io"))); EXPECT_TRUE(hasSubstr(dInfo.deviceExtensions, std::string("cl_intel_subgroup_matrix_multiply_accumulate"))); + EXPECT_TRUE(hasSubstr(dInfo.deviceExtensions, std::string("cl_intel_subgroup_matrix_multiply_accumulate_tf32"))); EXPECT_TRUE(hasSubstr(dInfo.deviceExtensions, std::string("cl_khr_subgroup_named_barrier"))); EXPECT_TRUE(hasSubstr(dInfo.deviceExtensions, std::string("cl_intel_subgroup_extended_block_read"))); EXPECT_TRUE(hasSubstr(dInfo.deviceExtensions, std::string("cl_intel_subgroup_2d_block_io"))); diff --git a/shared/source/helpers/compiler_product_helper.h b/shared/source/helpers/compiler_product_helper.h index 82ad3f40fe..80d0bb39d5 100644 --- a/shared/source/helpers/compiler_product_helper.h +++ b/shared/source/helpers/compiler_product_helper.h @@ -56,6 +56,7 @@ class CompilerProductHelper { virtual bool isForceEmuInt32DivRemSPRequired() const = 0; virtual bool isStatelessToStatefulBufferOffsetSupported() const = 0; virtual bool isMatrixMultiplyAccumulateSupported(const ReleaseHelper *releaseHelper) const = 0; + virtual bool isMatrixMultiplyAccumulateTF32Supported(const HardwareInfo &hwInfo) const = 0; virtual bool isSplitMatrixMultiplyAccumulateSupported(const ReleaseHelper *releaseHelper) const = 0; virtual bool isBFloat16ConversionSupported(const ReleaseHelper *releaseHelper) const = 0; virtual bool isSubgroupLocalBlockIoSupported() const = 0; @@ -106,6 +107,7 @@ class CompilerProductHelperHw : public CompilerProductHelper { bool isForceEmuInt32DivRemSPRequired() const override; bool isStatelessToStatefulBufferOffsetSupported() const override; bool isMatrixMultiplyAccumulateSupported(const ReleaseHelper *releaseHelper) const override; + bool isMatrixMultiplyAccumulateTF32Supported(const HardwareInfo &hwInfo) const override; bool isSplitMatrixMultiplyAccumulateSupported(const ReleaseHelper *releaseHelper) const override; bool isBFloat16ConversionSupported(const ReleaseHelper *releaseHelper) const override; bool isSubgroupLocalBlockIoSupported() const override; diff --git a/shared/source/helpers/compiler_product_helper_base.inl b/shared/source/helpers/compiler_product_helper_base.inl index c7fb7ae814..cafc324f81 100644 --- a/shared/source/helpers/compiler_product_helper_base.inl +++ b/shared/source/helpers/compiler_product_helper_base.inl @@ -181,6 +181,10 @@ std::string CompilerProductHelperHw::getDeviceExtensions(const Hardw extensions += "cl_intel_subgroup_matrix_multiply_accumulate "; } + if (isMatrixMultiplyAccumulateTF32Supported(hwInfo)) { + extensions += "cl_intel_subgroup_matrix_multiply_accumulate_tf32 "; + } + if (isSplitMatrixMultiplyAccumulateSupported(releaseHelper)) { extensions += "cl_intel_subgroup_split_matrix_multiply_accumulate "; } diff --git a/shared/source/helpers/compiler_product_helper_before_xe_hpc.inl b/shared/source/helpers/compiler_product_helper_before_xe_hpc.inl index 8d406a1735..a8419c4ee4 100644 --- a/shared/source/helpers/compiler_product_helper_before_xe_hpc.inl +++ b/shared/source/helpers/compiler_product_helper_before_xe_hpc.inl @@ -16,6 +16,11 @@ bool CompilerProductHelperHw::isForceToStatelessRequired() const { return false; } +template +bool CompilerProductHelperHw::isMatrixMultiplyAccumulateTF32Supported(const HardwareInfo &hwInfo) const { + return false; +} + template bool CompilerProductHelperHw::isSubgroupNamedBarrierSupported() const { return false; diff --git a/shared/source/helpers/compiler_product_helper_xe_hpc_and_later.inl b/shared/source/helpers/compiler_product_helper_xe_hpc_and_later.inl index b797ec4370..56d13db82a 100644 --- a/shared/source/helpers/compiler_product_helper_xe_hpc_and_later.inl +++ b/shared/source/helpers/compiler_product_helper_xe_hpc_and_later.inl @@ -20,6 +20,11 @@ bool CompilerProductHelperHw::isForceToStatelessRequired() const { return true; } +template +bool CompilerProductHelperHw::isMatrixMultiplyAccumulateTF32Supported(const HardwareInfo &hwInfo) const { + return true; +} + template bool CompilerProductHelperHw::isSubgroupNamedBarrierSupported() const { return true; diff --git a/shared/source/xe_hpc_core/enable_compiler_product_helper_pvc.cpp b/shared/source/xe_hpc_core/enable_compiler_product_helper_pvc.cpp index 66f7becc52..e018ec87e2 100644 --- a/shared/source/xe_hpc_core/enable_compiler_product_helper_pvc.cpp +++ b/shared/source/xe_hpc_core/enable_compiler_product_helper_pvc.cpp @@ -77,6 +77,14 @@ bool CompilerProductHelperHw::isMatrixMultiplyAccumulateSupported(cons return true; } +template <> +bool CompilerProductHelperHw::isMatrixMultiplyAccumulateTF32Supported(const HardwareInfo &hwInfo) const { + auto config = getProductConfigFromHwInfo(hwInfo); + if (config >= AOT::PVC_XT_B0 && config < AOT::PVC_XT_C0_VG) + return true; + return false; +} + template <> bool CompilerProductHelperHw::isBFloat16ConversionSupported(const ReleaseHelper *releaseHelper) const { return true; diff --git a/shared/test/unit_test/helpers/compiler_product_helper_tests.cpp b/shared/test/unit_test/helpers/compiler_product_helper_tests.cpp index 82806d13f1..124a6fc725 100644 --- a/shared/test/unit_test/helpers/compiler_product_helper_tests.cpp +++ b/shared/test/unit_test/helpers/compiler_product_helper_tests.cpp @@ -168,6 +168,20 @@ HWTEST2_F(CompilerProductHelperFixture, GivenReleaseHelperThenMatrixMultiplyAccu } } +HWTEST2_F(CompilerProductHelperFixture, GivenXeHpcAndLaterThenMatrixMultiplyAccumulateTF32IsSupported, IsAtLeastXeHpcCore) { + auto &compilerProductHelper = pDevice->getCompilerProductHelper(); + auto hwInfo = *defaultHwInfo; + + EXPECT_TRUE(compilerProductHelper.isMatrixMultiplyAccumulateTF32Supported(hwInfo)); +} + +HWTEST2_F(CompilerProductHelperFixture, GivenPreXeHpcThenMatrixMultiplyAccumulateTF32IsNotSupported, IsAtMostXeHpgCore) { + auto &compilerProductHelper = pDevice->getCompilerProductHelper(); + auto hwInfo = *defaultHwInfo; + + EXPECT_FALSE(compilerProductHelper.isMatrixMultiplyAccumulateTF32Supported(hwInfo)); +} + HWTEST2_F(CompilerProductHelperFixture, GivenReleaseHelperThenDotProductAccumulateSystolicIsSupportedBasedOnReleaseHelper, IsNotXeHpcCore) { auto &compilerProductHelper = pDevice->getCompilerProductHelper(); auto releaseHelper = pDevice->getReleaseHelper(); diff --git a/shared/test/unit_test/xe_hpc_core/pvc/compiler_product_helper_tests_pvc.cpp b/shared/test/unit_test/xe_hpc_core/pvc/compiler_product_helper_tests_pvc.cpp index d6c41b3905..1145d446b0 100644 --- a/shared/test/unit_test/xe_hpc_core/pvc/compiler_product_helper_tests_pvc.cpp +++ b/shared/test/unit_test/xe_hpc_core/pvc/compiler_product_helper_tests_pvc.cpp @@ -1,12 +1,14 @@ /* - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023-2024 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/helpers/compiler_product_helper.h" +#include "shared/source/os_interface/product_helper.h" #include "shared/source/xe_hpc_core/hw_cmds_pvc.h" +#include "shared/source/xe_hpc_core/pvc/device_ids_configs_pvc.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/test_macros/header/per_product_test_definitions.h" #include "shared/test/common/test_macros/test.h" @@ -42,3 +44,40 @@ PVCTEST_F(CompilerProductHelperPvcTest, givenPvcWhenFailBuildProgramWithStateful auto &compilerProductHelper = rootDeviceEnvironment.getHelper(); EXPECT_FALSE(compilerProductHelper.failBuildProgramWithStatefulAccessPreference()); } + +PVCTEST_F(CompilerProductHelperPvcTest, givenPvcB0AndLaterThenMatrixMultiplyAccumulateTF32IsSupported) { + MockExecutionEnvironment executionEnvironment{}; + auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0]; + auto &compilerProductHelper = rootDeviceEnvironment.getHelper(); + auto &productHelper = rootDeviceEnvironment.getHelper(); + + auto hwInfo = *rootDeviceEnvironment.getHardwareInfo(); + + for (auto revision : {REVISION_A0, REVISION_B}) { + for (auto deviceId : pvcXlDeviceIds) { + hwInfo.platform.usDeviceID = deviceId; + hwInfo.platform.usRevId = productHelper.getHwRevIdFromStepping(revision, hwInfo); + EXPECT_FALSE(compilerProductHelper.isMatrixMultiplyAccumulateTF32Supported(hwInfo)); + } + } + + for (auto deviceId : pvcXtDeviceIds) { + hwInfo.platform.usDeviceID = deviceId; + hwInfo.platform.usRevId = productHelper.getHwRevIdFromStepping(REVISION_A0, hwInfo); + EXPECT_FALSE(compilerProductHelper.isMatrixMultiplyAccumulateTF32Supported(hwInfo)); + } + + for (auto revision : {REVISION_B, REVISION_C}) { + for (auto deviceId : pvcXtDeviceIds) { + hwInfo.platform.usDeviceID = deviceId; + hwInfo.platform.usRevId = productHelper.getHwRevIdFromStepping(revision, hwInfo); + EXPECT_TRUE(compilerProductHelper.isMatrixMultiplyAccumulateTF32Supported(hwInfo)); + } + } + + for (auto deviceId : pvcXtVgDeviceIds) { + hwInfo.platform.usDeviceID = deviceId; + hwInfo.platform.usRevId = productHelper.getHwRevIdFromStepping(REVISION_C, hwInfo); + EXPECT_FALSE(compilerProductHelper.isMatrixMultiplyAccumulateTF32Supported(hwInfo)); + } +}