igc_opt returns non-zero on failed assert
This commit is contained in:
Kwasniewski, Patryk
2025-08-22 07:08:07 +00:00
committed by igcbot
parent 46c11fc759
commit b717c7c181
6 changed files with 1153 additions and 126 deletions

View File

@ -107,6 +107,8 @@ private:
/// ///
static const StringRef SG_PREFIX_IDPAS16; static const StringRef SG_PREFIX_IDPAS16;
static const StringRef SG_PREFIX_FDPAS16; static const StringRef SG_PREFIX_FDPAS16;
static const StringRef SG_PREFIX_IDPAS32N16;
static const StringRef SG_PREFIX_FDPAS32N16;
// PVC+: pure hf/bf dpas builtins // PVC+: pure hf/bf dpas builtins
static const StringRef WI_PREFIX_HFDPAS; static const StringRef WI_PREFIX_HFDPAS;
static const StringRef WI_PREFIX_BFDPAS; static const StringRef WI_PREFIX_BFDPAS;
@ -200,6 +202,8 @@ const StringRef DpasFuncsResolution::WI_PREFIX_IDPAS = "__builtin_IB_idpas";
const StringRef DpasFuncsResolution::WI_PREFIX_FDPAS = "__builtin_IB_fdpas"; const StringRef DpasFuncsResolution::WI_PREFIX_FDPAS = "__builtin_IB_fdpas";
const StringRef DpasFuncsResolution::SG_PREFIX_IDPAS16 = "__builtin_IB_sub_group16_idpas"; const StringRef DpasFuncsResolution::SG_PREFIX_IDPAS16 = "__builtin_IB_sub_group16_idpas";
const StringRef DpasFuncsResolution::SG_PREFIX_FDPAS16 = "__builtin_IB_sub_group16_fdpas"; const StringRef DpasFuncsResolution::SG_PREFIX_FDPAS16 = "__builtin_IB_sub_group16_fdpas";
const StringRef DpasFuncsResolution::SG_PREFIX_IDPAS32N16 = "__builtin_IB_sub_group32n16_idpas";
const StringRef DpasFuncsResolution::SG_PREFIX_FDPAS32N16 = "__builtin_IB_sub_group32n16_fdpas";
// PVC+: pure hf/bf dpas builtins // PVC+: pure hf/bf dpas builtins
const StringRef DpasFuncsResolution::WI_PREFIX_HFDPAS = "__builtin_IB_hfdpas"; const StringRef DpasFuncsResolution::WI_PREFIX_HFDPAS = "__builtin_IB_hfdpas";
const StringRef DpasFuncsResolution::WI_PREFIX_BFDPAS = "__builtin_IB_bfdpas"; const StringRef DpasFuncsResolution::WI_PREFIX_BFDPAS = "__builtin_IB_bfdpas";
@ -263,6 +267,11 @@ void DpasFuncsResolution::visitCallInst(CallInst &CI) {
bool IsDpasw = false; bool IsDpasw = false;
bool IsIDpas = false; bool IsIDpas = false;
// Dimension N is platform specific and is directly correlated to minimum subgroup-size for
// given platform. If DPAS with the same M, N, K dimensions is executed within a subgroup
// twice the size of minimum subgroup-size, each work item must contain half of the data
// compared to the minimum subgroup-size.
bool IsDoubleSubgroup = false;
int DstTy, AccTy, PA, PB, SD, RC; int DstTy, AccTy, PA, PB, SD, RC;
GenISAIntrinsic::ID iid = GenISAIntrinsic::no_intrinsic; GenISAIntrinsic::ID iid = GenISAIntrinsic::no_intrinsic;
bool doVerify = false; bool doVerify = false;
@ -277,12 +286,26 @@ void DpasFuncsResolution::visitCallInst(CallInst &CI) {
if (!demangleSuffix(funcName, SG_PREFIX_LEN, false, IsIDpas, DstTy, AccTy, PA, PB, SD, RC, nullptr)) if (!demangleSuffix(funcName, SG_PREFIX_LEN, false, IsIDpas, DstTy, AccTy, PA, PB, SD, RC, nullptr))
return; return;
iid = GenISAIntrinsic::GenISA_sub_group_dpas; iid = GenISAIntrinsic::GenISA_sub_group_dpas;
} else if (funcName.startswith(DpasFuncsResolution::SG_PREFIX_IDPAS32N16)) {
const int SG_PREFIX_LEN = DpasFuncsResolution::SG_PREFIX_IDPAS32N16.size();
IsIDpas = true;
IsDoubleSubgroup = true;
if (!demangleSuffix(funcName, SG_PREFIX_LEN, false, IsIDpas, DstTy, AccTy, PA, PB, SD, RC, nullptr))
return;
iid = GenISAIntrinsic::GenISA_sub_group_dpas;
} else if (funcName.startswith(DpasFuncsResolution::SG_PREFIX_FDPAS16)) { } else if (funcName.startswith(DpasFuncsResolution::SG_PREFIX_FDPAS16)) {
const int SG_PREFIX_LEN = DpasFuncsResolution::SG_PREFIX_FDPAS16.size(); const int SG_PREFIX_LEN = DpasFuncsResolution::SG_PREFIX_FDPAS16.size();
IsIDpas = false; IsIDpas = false;
if (!demangleSuffix(funcName, SG_PREFIX_LEN, true, IsIDpas, DstTy, AccTy, PA, PB, SD, RC, nullptr)) if (!demangleSuffix(funcName, SG_PREFIX_LEN, true, IsIDpas, DstTy, AccTy, PA, PB, SD, RC, nullptr))
return; return;
iid = GenISAIntrinsic::GenISA_sub_group_dpas; iid = GenISAIntrinsic::GenISA_sub_group_dpas;
} else if (funcName.startswith(DpasFuncsResolution::SG_PREFIX_FDPAS32N16)) {
const int SG_PREFIX_LEN = DpasFuncsResolution::SG_PREFIX_FDPAS32N16.size();
IsIDpas = false;
IsDoubleSubgroup = true;
if (!demangleSuffix(funcName, SG_PREFIX_LEN, true, IsIDpas, DstTy, AccTy, PA, PB, SD, RC, nullptr))
return;
iid = GenISAIntrinsic::GenISA_sub_group_dpas;
} }
else { else {
return; return;
@ -363,6 +386,14 @@ void DpasFuncsResolution::visitCallInst(CallInst &CI) {
Type *A_BaseTy = ATy->getScalarType(); Type *A_BaseTy = ATy->getScalarType();
Type *B_BaseTy = BTy->getScalarType(); Type *B_BaseTy = BTy->getScalarType();
if (IsDoubleSubgroup) {
IGC_ASSERT_MESSAGE(RC >= 2, "ICE: repeat count of DPAS for double subgroup-size must be >= 2!");
D_nelts *= 2;
ACC_nelts *= 2;
A_nelts *= 2;
B_nelts *= 2;
}
if (IsIDpas) { if (IsIDpas) {
uint32_t Abits = getPrecisionInBits((PrecisionType)PA); uint32_t Abits = getPrecisionInBits((PrecisionType)PA);
uint32_t Bbits = getPrecisionInBits((PrecisionType)PB); uint32_t Bbits = getPrecisionInBits((PrecisionType)PB);

View File

@ -272,7 +272,7 @@ int SpvSubgroupMMAResolution::getElemCount(const Type *Ty) const {
} }
bool SpvSubgroupMMAResolution::validateElemCounts(int M, int AElemCount, int BElemCount, uint32_t Operands, bool SpvSubgroupMMAResolution::validateElemCounts(int M, int AElemCount, int BElemCount, uint32_t Operands,
const CallInst &CI) { CallInst &CI) {
if (M != 1 && M != 2 && M != 4 && M != 8) { if (M != 1 && M != 2 && M != 4 && M != 8) {
emitError( emitError(
"__spirv_SubgroupMatrixMultiplyAccumulateINTEL: M dimension must be 1, 2, 4 or 8 for targeted HW. Actual: " + "__spirv_SubgroupMatrixMultiplyAccumulateINTEL: M dimension must be 1, 2, 4 or 8 for targeted HW. Actual: " +
@ -295,16 +295,27 @@ bool SpvSubgroupMMAResolution::validateElemCounts(int M, int AElemCount, int BEl
CI); CI);
return false; return false;
} }
if (BElemCount != 8) { const int expectedBCount = isDoubleSubgroup(CI) ? 4 : 8;
emitError("__spirv_SubgroupMatrixMultiplyAccumulateINTEL: Matrix B argument must have 8 components for targeted " if (BElemCount != expectedBCount) {
"HW. Actual: " + emitError("__spirv_SubgroupMatrixMultiplyAccumulateINTEL: Matrix B argument must have " +
std::to_string(BElemCount), std::to_string(expectedBCount) +
" components for targeted HW. Actual: " + std::to_string(BElemCount),
CI); CI);
return false; return false;
} }
return true; return true;
} }
// Dimension N is platform specific and is directly correlated to minimum subgroup-size for
// given platform. If DPAS with the same M, N, K dimensions is executed within a subgroup
// twice the size of minimum subgroup-size, each work item must contain half of the data
// compared to the minimum subgroup-size.
bool SpvSubgroupMMAResolution::isDoubleSubgroup(CallInst &CI) {
if (!m_Ctx->platform.hasExecSize16DPAS())
return false;
return IGC::getSIMDSize(getAnalysis<MetaDataUtilsWrapper>().getMetaDataUtils(), CI.getParent()->getParent()) == 32;
}
SpvSubgroupMMAResolution::SupportedTable *SpvSubgroupMMAResolution::getSupportedTable() { SpvSubgroupMMAResolution::SupportedTable *SpvSubgroupMMAResolution::getSupportedTable() {
if (m_Ctx->platform.hasExecSize16DPAS()) { if (m_Ctx->platform.hasExecSize16DPAS()) {
if (m_Simd16Table.empty()) if (m_Simd16Table.empty())
@ -480,9 +491,16 @@ void SpvSubgroupMMAResolution::visitCallInst(CallInst &CI) {
SmallVector<Type *, 3> argTypes({c->getType(), a->getType(), b->getType()}); SmallVector<Type *, 3> argTypes({c->getType(), a->getType(), b->getType()});
FunctionType *FT = FunctionType::get(CI.getType(), argTypes, false); FunctionType *FT = FunctionType::get(CI.getType(), argTypes, false);
std::string subgroupSize;
if (isDoubleSubgroup(CI)) {
subgroupSize = "32n16";
M *= 2;
} else {
subgroupSize = m_Ctx->platform.hasExecSize16DPAS() ? "16" : "";
}
std::stringstream newFuncName; std::stringstream newFuncName;
newFuncName << "__builtin_IB_sub_group"; newFuncName << "__builtin_IB_sub_group" << subgroupSize;
newFuncName << (m_Ctx->platform.hasExecSize16DPAS() ? "16" : "");
newFuncName << "_" << (ResultElemTy == I32 ? "i" : "f"); newFuncName << "_" << (ResultElemTy == I32 ? "i" : "f");
newFuncName << "dpas_" << OperandsIt->second.str() << "8_" << M; newFuncName << "dpas_" << OperandsIt->second.str() << "8_" << M;

View File

@ -72,7 +72,7 @@ private:
bool validateI32Constant(const llvm::Value *V, const llvm::Twine &ParamName, const llvm::CallInst &CI); bool validateI32Constant(const llvm::Value *V, const llvm::Twine &ParamName, const llvm::CallInst &CI);
bool validateCType(const llvm::Type *ResultTy, const llvm::Type *CType, const llvm::CallInst &CI); bool validateCType(const llvm::Type *ResultTy, const llvm::Type *CType, const llvm::CallInst &CI);
bool validateElementType(const ElType ElemTy, llvm::StringRef ParamName, const llvm::CallInst &CI); bool validateElementType(const ElType ElemTy, llvm::StringRef ParamName, const llvm::CallInst &CI);
bool validateElemCounts(int M, int AElemCount, int BElemCount, uint32_t Operands, const llvm::CallInst &CI); bool validateElemCounts(int M, int AElemCount, int BElemCount, uint32_t Operands, llvm::CallInst &CI);
template <typename T> template <typename T>
bool validateKDimInTable(const T KIt, int K, const SupportedTable *table, const llvm::CallInst &CI); bool validateKDimInTable(const T KIt, int K, const SupportedTable *table, const llvm::CallInst &CI);
@ -89,6 +89,8 @@ private:
bool validateOperands(const T OpIt, int K, ElType ResultElemTy, ElType AElemTy, ElType BElemTy, uint32_t Operands, bool validateOperands(const T OpIt, int K, ElType ResultElemTy, ElType AElemTy, ElType BElemTy, uint32_t Operands,
const OperandsTable &operandMap, const llvm::CallInst &CI); const OperandsTable &operandMap, const llvm::CallInst &CI);
bool isDoubleSubgroup(llvm::CallInst &CI);
llvm::DenseSet<llvm::Function *> m_BuiltinsToRemove; llvm::DenseSet<llvm::Function *> m_BuiltinsToRemove;
bool m_Changed = false; bool m_Changed = false;
IGC::CodeGenContext *m_Ctx = nullptr; IGC::CodeGenContext *m_Ctx = nullptr;

View File

@ -0,0 +1,26 @@
;=========================== begin_copyright_notice ============================
;
; Copyright (C) 2025 Intel Corporation
;
; SPDX-License-Identifier: MIT
;
;============================ end_copyright_notice =============================
; UNSUPPORTED: system-windows
; REQUIRES: debug, llvm-14-plus
; RUN: not igc_opt --opaque-pointers -platformpvc --igc-arith-funcs-translation -S < %s 2>&1 | FileCheck %s
; ------------------------------------------------
; DpasFuncsResolution
; ------------------------------------------------
; Check assertion unique to DPAS in double-subgroup size.
; CHECK: RC >= 2, ICE: repeat count of DPAS for double subgroup-size must be >= 2!
define spir_kernel void @test_dpas(<4 x i32> %src, i32 %src2, ptr %dst) {
%1 = load i16, ptr %dst, align 4
%2 = call i32 @__builtin_IB_sub_group32n16_idpas_s8_s8_8_1(i32 %src2, i16 %1, <4 x i32> %src)
store i32 %2, ptr %dst, align 4
ret void
}
declare i32 @__builtin_IB_sub_group32n16_idpas_s8_s8_8_1(i32, i16, <4 x i32>)

View File

@ -10,7 +10,8 @@
; RUN: llvm-as %s -o %t.bc ; RUN: llvm-as %s -o %t.bc
; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_INTEL_subgroup_matrix_multiply_accumulate -o %t.spv ; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_INTEL_subgroup_matrix_multiply_accumulate -o %t.spv
; RUN: ocloc compile -spirv_input -file %t.spv -device pvc -options " -igc_opts 'PrintToConsole=1 PrintAfter=ArithmeticFuncsTranslation'" 2>&1 | FileCheck %s ; RUN: ocloc compile -spirv_input -file %t.spv -device pvc -options " -igc_opts 'PrintToConsole=1 PrintAfter=ArithmeticFuncsTranslation'" 2>&1 | FileCheck %s --check-prefix=CHECK-GENISA
; RUN: ocloc compile -spirv_input -file %t.spv -device pvc -options " -igc_opts 'DumpVISAASMToConsole=1'" 2>&1 | FileCheck %s --check-prefix=CHECK-VISAASM
target triple = "spir64-unknown-unknown" target triple = "spir64-unknown-unknown"
@ -45,15 +46,41 @@ define spir_kernel void @test_v1(i32* %res1I32, <2 x i32>* %res2I32, <4 x i32>*
<8 x i32> %b, <8 x i32> %b,
i32 %c1I32, <2 x i32> %c2I32, <4 x i32> %c4I32, <8 x i32> %c8I32) !intel_reqd_sub_group_size !100 { i32 %c1I32, <2 x i32> %c2I32, <4 x i32> %c4I32, <8 x i32> %c8I32) !intel_reqd_sub_group_size !100 {
entry: entry:
; CHECK-LABEL: @test_v1( ; CHECK-GENISA-LABEL: @test_v1(
; CHECK: [[DPAS:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.sub.group.dpas.i32.i32.i16.v8i32(i32 %c1I32, i16 %a1, <8 x i32> %b, i32 4, i32 4, i32 8, i32 1, i1 false) ; CHECK-GENISA: [[DPAS:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.sub.group.dpas.i32.i32.i16.v8i32(i32 %c1I32, i16 %a1, <8 x i32> %b, i32 4, i32 4, i32 8, i32 1, i1 false)
; CHECK: store i32 [[DPAS]], i32* %res1I32 ; CHECK-GENISA: store i32 [[DPAS]], i32* %res1I32
; CHECK: [[DPAS1:%[A-z0-9]*]] = call <2 x i32> @llvm.genx.GenISA.sub.group.dpas.v2i32.v2i32.v2i16.v8i32(<2 x i32> %c2I32, <2 x i16> %a2, <8 x i32> %b, i32 4, i32 4, i32 8, i32 2, i1 false) ; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call <2 x i32> @llvm.genx.GenISA.sub.group.dpas.v2i32.v2i32.v2i16.v8i32(<2 x i32> %c2I32, <2 x i16> %a2, <8 x i32> %b, i32 4, i32 4, i32 8, i32 2, i1 false)
; CHECK: store <2 x i32> [[DPAS1]], <2 x i32>* %res2I32 ; CHECK-GENISA: store <2 x i32> [[DPAS1]], <2 x i32>* %res2I32
; CHECK: [[DPAS2:%[A-z0-9]*]] = call <4 x i32> @llvm.genx.GenISA.sub.group.dpas.v4i32.v4i32.v4i16.v8i32(<4 x i32> %c4I32, <4 x i16> %a4, <8 x i32> %b, i32 4, i32 4, i32 8, i32 4, i1 false) ; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <4 x i32> @llvm.genx.GenISA.sub.group.dpas.v4i32.v4i32.v4i16.v8i32(<4 x i32> %c4I32, <4 x i16> %a4, <8 x i32> %b, i32 4, i32 4, i32 8, i32 4, i1 false)
; CHECK: store <4 x i32> [[DPAS2]], <4 x i32>* %res4I32 ; CHECK-GENISA: store <4 x i32> [[DPAS2]], <4 x i32>* %res4I32
; CHECK: [[DPAS3:%[A-z0-9]*]] = call <8 x i32> @llvm.genx.GenISA.sub.group.dpas.v8i32.v8i32.v8i16.v8i32(<8 x i32> %c8I32, <8 x i16> %a8, <8 x i32> %b, i32 4, i32 4, i32 8, i32 8, i1 false) ; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <8 x i32> @llvm.genx.GenISA.sub.group.dpas.v8i32.v8i32.v8i16.v8i32(<8 x i32> %c8I32, <8 x i16> %a8, <8 x i32> %b, i32 4, i32 4, i32 8, i32 8, i1 false)
; CHECK: store <8 x i32> [[DPAS3]], <8 x i32>* %res8I32 ; CHECK-GENISA: store <8 x i32> [[DPAS3]], <8 x i32>* %res8I32
; CHECK-VISAASM-LABEL: .kernel "test_v1"
; CHECK-VISAASM-DAG: dpas.s8.s8.8.1 (M1, 16) [[D1:[A-z0-9_]*]].0 [[C1:[A-z0-9_]*]].0 [[B1:[A-z0-9_]*]].0 [[A1:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D1]] v_type=G type=d num_elts=16
; CHECK-VISAASM-DAG: .decl [[C1]] v_type=G type=d num_elts=16
; CHECK-VISAASM-DAG: .decl [[B1]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A1]] v_type=G type=ud num_elts=8 align=wordx32 alias=<[[A1_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A1_ALIAS]] v_type=G type=w num_elts=16
; CHECK-VISAASM-DAG: dpas.s8.s8.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=d num_elts=32
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=d num_elts=32
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=w num_elts=32
; CHECK-VISAASM-DAG: dpas.s8.s8.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=d num_elts=64
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=d num_elts=64
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=w num_elts=64
; CHECK-VISAASM-DAG: dpas.s8.s8.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=w num_elts=128
%call0 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_iii(i32 32, i16 %a1, <8 x i32> %b, i32 %c1I32, i32 51) %call0 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_iii(i32 32, i16 %a1, <8 x i32> %b, i32 %c1I32, i32 51)
store i32 %call0, i32* %res1I32 store i32 %call0, i32* %res1I32
@ -72,15 +99,41 @@ define spir_kernel void @test_v2(i32* %res1I32, <2 x i32>* %res2I32, <4 x i32>*
<8 x i32> %b, <8 x i32> %b,
i32 %c1I32, <2 x i32> %c2I32, <4 x i32> %c4I32, <8 x i32> %c8I32) !intel_reqd_sub_group_size !100 { i32 %c1I32, <2 x i32> %c2I32, <4 x i32> %c4I32, <8 x i32> %c8I32) !intel_reqd_sub_group_size !100 {
entry: entry:
; CHECK-LABEL: @test_v2( ; CHECK-GENISA-LABEL: @test_v2(
; CHECK: [[DPAS:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.sub.group.dpas.i32.i32.i16.v8i32(i32 %c1I32, i16 %a1, <8 x i32> %b, i32 4, i32 1, i32 8, i32 1, i1 false) ; CHECK-GENISA: [[DPAS:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.sub.group.dpas.i32.i32.i16.v8i32(i32 %c1I32, i16 %a1, <8 x i32> %b, i32 4, i32 1, i32 8, i32 1, i1 false)
; CHECK: store i32 [[DPAS]], i32* %res1I32 ; CHECK-GENISA: store i32 [[DPAS]], i32* %res1I32
; CHECK: [[DPAS1:%[A-z0-9]*]] = call <2 x i32> @llvm.genx.GenISA.sub.group.dpas.v2i32.v2i32.v2i16.v8i32(<2 x i32> %c2I32, <2 x i16> %a2, <8 x i32> %b, i32 4, i32 1, i32 8, i32 2, i1 false) ; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call <2 x i32> @llvm.genx.GenISA.sub.group.dpas.v2i32.v2i32.v2i16.v8i32(<2 x i32> %c2I32, <2 x i16> %a2, <8 x i32> %b, i32 4, i32 1, i32 8, i32 2, i1 false)
; CHECK: store <2 x i32> [[DPAS1]], <2 x i32>* %res2I32 ; CHECK-GENISA: store <2 x i32> [[DPAS1]], <2 x i32>* %res2I32
; CHECK: [[DPAS2:%[A-z0-9]*]] = call <4 x i32> @llvm.genx.GenISA.sub.group.dpas.v4i32.v4i32.v4i16.v8i32(<4 x i32> %c4I32, <4 x i16> %a4, <8 x i32> %b, i32 4, i32 1, i32 8, i32 4, i1 false) ; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <4 x i32> @llvm.genx.GenISA.sub.group.dpas.v4i32.v4i32.v4i16.v8i32(<4 x i32> %c4I32, <4 x i16> %a4, <8 x i32> %b, i32 4, i32 1, i32 8, i32 4, i1 false)
; CHECK: store <4 x i32> [[DPAS2]], <4 x i32>* %res4I32 ; CHECK-GENISA: store <4 x i32> [[DPAS2]], <4 x i32>* %res4I32
; CHECK: [[DPAS3:%[A-z0-9]*]] = call <8 x i32> @llvm.genx.GenISA.sub.group.dpas.v8i32.v8i32.v8i16.v8i32(<8 x i32> %c8I32, <8 x i16> %a8, <8 x i32> %b, i32 4, i32 1, i32 8, i32 8, i1 false) ; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <8 x i32> @llvm.genx.GenISA.sub.group.dpas.v8i32.v8i32.v8i16.v8i32(<8 x i32> %c8I32, <8 x i16> %a8, <8 x i32> %b, i32 4, i32 1, i32 8, i32 8, i1 false)
; CHECK: store <8 x i32> [[DPAS3]], <8 x i32>* %res8I32 ; CHECK-GENISA: store <8 x i32> [[DPAS3]], <8 x i32>* %res8I32
; CHECK-VISAASM-LABEL: .kernel "test_v2"
; CHECK-VISAASM-DAG: dpas.u8.s8.8.1 (M1, 16) [[D1:[A-z0-9_]*]].0 [[C1:[A-z0-9_]*]].0 [[B1:[A-z0-9_]*]].0 [[A1:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D1]] v_type=G type=d num_elts=16
; CHECK-VISAASM-DAG: .decl [[C1]] v_type=G type=d num_elts=16
; CHECK-VISAASM-DAG: .decl [[B1]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A1]] v_type=G type=ud num_elts=8 align=wordx32 alias=<[[A1_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A1_ALIAS]] v_type=G type=w num_elts=16
; CHECK-VISAASM-DAG: dpas.u8.s8.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=d num_elts=32
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=d num_elts=32
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=w num_elts=32
; CHECK-VISAASM-DAG: dpas.u8.s8.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=d num_elts=64
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=d num_elts=64
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=w num_elts=64
; CHECK-VISAASM-DAG: dpas.u8.s8.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=w num_elts=128
%call4 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_iii(i32 32, i16 %a1, <8 x i32> %b, i32 %c1I32, i32 49) %call4 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_iii(i32 32, i16 %a1, <8 x i32> %b, i32 %c1I32, i32 49)
store i32 %call4, i32* %res1I32 store i32 %call4, i32* %res1I32
@ -99,15 +152,41 @@ define spir_kernel void @test_v3(i32* %res1I32, <2 x i32>* %res2I32, <4 x i32>*
<8 x i32> %b, <8 x i32> %b,
i32 %c1I32, <2 x i32> %c2I32, <4 x i32> %c4I32, <8 x i32> %c8I32) !intel_reqd_sub_group_size !100 { i32 %c1I32, <2 x i32> %c2I32, <4 x i32> %c4I32, <8 x i32> %c8I32) !intel_reqd_sub_group_size !100 {
entry: entry:
; CHECK-LABEL: @test_v3( ; CHECK-GENISA-LABEL: @test_v3(
; CHECK: [[DPAS:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.sub.group.dpas.i32.i32.i16.v8i32(i32 %c1I32, i16 %a1, <8 x i32> %b, i32 1, i32 4, i32 8, i32 1, i1 false) ; CHECK-GENISA: [[DPAS:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.sub.group.dpas.i32.i32.i16.v8i32(i32 %c1I32, i16 %a1, <8 x i32> %b, i32 1, i32 4, i32 8, i32 1, i1 false)
; CHECK: store i32 [[DPAS]], i32* %res1I32 ; CHECK-GENISA: store i32 [[DPAS]], i32* %res1I32
; CHECK: [[DPAS1:%[A-z0-9]*]] = call <2 x i32> @llvm.genx.GenISA.sub.group.dpas.v2i32.v2i32.v2i16.v8i32(<2 x i32> %c2I32, <2 x i16> %a2, <8 x i32> %b, i32 1, i32 4, i32 8, i32 2, i1 false) ; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call <2 x i32> @llvm.genx.GenISA.sub.group.dpas.v2i32.v2i32.v2i16.v8i32(<2 x i32> %c2I32, <2 x i16> %a2, <8 x i32> %b, i32 1, i32 4, i32 8, i32 2, i1 false)
; CHECK: store <2 x i32> [[DPAS1]], <2 x i32>* %res2I32 ; CHECK-GENISA: store <2 x i32> [[DPAS1]], <2 x i32>* %res2I32
; CHECK: [[DPAS2:%[A-z0-9]*]] = call <4 x i32> @llvm.genx.GenISA.sub.group.dpas.v4i32.v4i32.v4i16.v8i32(<4 x i32> %c4I32, <4 x i16> %a4, <8 x i32> %b, i32 1, i32 4, i32 8, i32 4, i1 false) ; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <4 x i32> @llvm.genx.GenISA.sub.group.dpas.v4i32.v4i32.v4i16.v8i32(<4 x i32> %c4I32, <4 x i16> %a4, <8 x i32> %b, i32 1, i32 4, i32 8, i32 4, i1 false)
; CHECK: store <4 x i32> [[DPAS2]], <4 x i32>* %res4I32 ; CHECK-GENISA: store <4 x i32> [[DPAS2]], <4 x i32>* %res4I32
; CHECK: [[DPAS3:%[A-z0-9]*]] = call <8 x i32> @llvm.genx.GenISA.sub.group.dpas.v8i32.v8i32.v8i16.v8i32(<8 x i32> %c8I32, <8 x i16> %a8, <8 x i32> %b, i32 1, i32 4, i32 8, i32 8, i1 false) ; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <8 x i32> @llvm.genx.GenISA.sub.group.dpas.v8i32.v8i32.v8i16.v8i32(<8 x i32> %c8I32, <8 x i16> %a8, <8 x i32> %b, i32 1, i32 4, i32 8, i32 8, i1 false)
; CHECK: store <8 x i32> [[DPAS3]], <8 x i32>* %res8I32 ; CHECK-GENISA: store <8 x i32> [[DPAS3]], <8 x i32>* %res8I32
; CHECK-VISAASM-LABEL: .kernel "test_v3"
; CHECK-VISAASM-DAG: dpas.s8.u8.8.1 (M1, 16) [[D1:[A-z0-9_]*]].0 [[C1:[A-z0-9_]*]].0 [[B1:[A-z0-9_]*]].0 [[A1:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D1]] v_type=G type=d num_elts=16
; CHECK-VISAASM-DAG: .decl [[C1]] v_type=G type=d num_elts=16
; CHECK-VISAASM-DAG: .decl [[B1]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A1]] v_type=G type=ud num_elts=8 align=wordx32 alias=<[[A1_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A1_ALIAS]] v_type=G type=w num_elts=16
; CHECK-VISAASM-DAG: dpas.s8.u8.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=d num_elts=32
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=d num_elts=32
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=w num_elts=32
; CHECK-VISAASM-DAG: dpas.s8.u8.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=d num_elts=64
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=d num_elts=64
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=w num_elts=64
; CHECK-VISAASM-DAG: dpas.s8.u8.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=w num_elts=128
%call8 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_iii(i32 32, i16 %a1, <8 x i32> %b, i32 %c1I32, i32 50) %call8 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_iii(i32 32, i16 %a1, <8 x i32> %b, i32 %c1I32, i32 50)
store i32 %call8, i32* %res1I32 store i32 %call8, i32* %res1I32
@ -126,15 +205,41 @@ define spir_kernel void @test_v4(i32* %res1I32, <2 x i32>* %res2I32, <4 x i32>*
<8 x i32> %b, <8 x i32> %b,
i32 %c1I32, <2 x i32> %c2I32, <4 x i32> %c4I32, <8 x i32> %c8I32) !intel_reqd_sub_group_size !100 { i32 %c1I32, <2 x i32> %c2I32, <4 x i32> %c4I32, <8 x i32> %c8I32) !intel_reqd_sub_group_size !100 {
entry: entry:
; CHECK-LABEL: @test_v4( ; CHECK-GENISA-LABEL: @test_v4(
; CHECK: [[DPAS:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.sub.group.dpas.i32.i32.i16.v8i32(i32 %c1I32, i16 %a1, <8 x i32> %b, i32 1, i32 1, i32 8, i32 1, i1 false) ; CHECK-GENISA: [[DPAS:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.sub.group.dpas.i32.i32.i16.v8i32(i32 %c1I32, i16 %a1, <8 x i32> %b, i32 1, i32 1, i32 8, i32 1, i1 false)
; CHECK: store i32 [[DPAS]], i32* %res1I32 ; CHECK-GENISA: store i32 [[DPAS]], i32* %res1I32
; CHECK: [[DPAS1:%[A-z0-9]*]] = call <2 x i32> @llvm.genx.GenISA.sub.group.dpas.v2i32.v2i32.v2i16.v8i32(<2 x i32> %c2I32, <2 x i16> %a2, <8 x i32> %b, i32 1, i32 1, i32 8, i32 2, i1 false) ; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call <2 x i32> @llvm.genx.GenISA.sub.group.dpas.v2i32.v2i32.v2i16.v8i32(<2 x i32> %c2I32, <2 x i16> %a2, <8 x i32> %b, i32 1, i32 1, i32 8, i32 2, i1 false)
; CHECK: store <2 x i32> [[DPAS1]], <2 x i32>* %res2I32 ; CHECK-GENISA: store <2 x i32> [[DPAS1]], <2 x i32>* %res2I32
; CHECK: [[DPAS2:%[A-z0-9]*]] = call <4 x i32> @llvm.genx.GenISA.sub.group.dpas.v4i32.v4i32.v4i16.v8i32(<4 x i32> %c4I32, <4 x i16> %a4, <8 x i32> %b, i32 1, i32 1, i32 8, i32 4, i1 false) ; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <4 x i32> @llvm.genx.GenISA.sub.group.dpas.v4i32.v4i32.v4i16.v8i32(<4 x i32> %c4I32, <4 x i16> %a4, <8 x i32> %b, i32 1, i32 1, i32 8, i32 4, i1 false)
; CHECK: store <4 x i32> [[DPAS2]], <4 x i32>* %res4I32 ; CHECK-GENISA: store <4 x i32> [[DPAS2]], <4 x i32>* %res4I32
; CHECK: [[DPAS3:%[A-z0-9]*]] = call <8 x i32> @llvm.genx.GenISA.sub.group.dpas.v8i32.v8i32.v8i16.v8i32(<8 x i32> %c8I32, <8 x i16> %a8, <8 x i32> %b, i32 1, i32 1, i32 8, i32 8, i1 false) ; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <8 x i32> @llvm.genx.GenISA.sub.group.dpas.v8i32.v8i32.v8i16.v8i32(<8 x i32> %c8I32, <8 x i16> %a8, <8 x i32> %b, i32 1, i32 1, i32 8, i32 8, i1 false)
; CHECK: store <8 x i32> [[DPAS3]], <8 x i32>* %res8I32 ; CHECK-GENISA: store <8 x i32> [[DPAS3]], <8 x i32>* %res8I32
; CHECK-VISAASM-LABEL: .kernel "test_v4"
; CHECK-VISAASM-DAG: dpas.u8.u8.8.1 (M1, 16) [[D1:[A-z0-9_]*]].0 [[C1:[A-z0-9_]*]].0 [[B1:[A-z0-9_]*]].0 [[A1:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D1]] v_type=G type=d num_elts=16
; CHECK-VISAASM-DAG: .decl [[C1]] v_type=G type=d num_elts=16
; CHECK-VISAASM-DAG: .decl [[B1]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A1]] v_type=G type=ud num_elts=8 align=wordx32 alias=<[[A1_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A1_ALIAS]] v_type=G type=w num_elts=16
; CHECK-VISAASM-DAG: dpas.u8.u8.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=d num_elts=32
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=d num_elts=32
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=w num_elts=32
; CHECK-VISAASM-DAG: dpas.u8.u8.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=d num_elts=64
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=d num_elts=64
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=w num_elts=64
; CHECK-VISAASM-DAG: dpas.u8.u8.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=w num_elts=128
%call12 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_iii(i32 32, i16 %a1, <8 x i32> %b, i32 %c1I32, i32 48) %call12 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_iii(i32 32, i16 %a1, <8 x i32> %b, i32 %c1I32, i32 48)
store i32 %call12, i32* %res1I32 store i32 %call12, i32* %res1I32
@ -154,15 +259,41 @@ define spir_kernel void @test_v5(i32* %res1I32, <2 x i32>* %res2I32, <4 x i32>*
<8 x i32> %b, <8 x i32> %b,
i32 %c1I32, <2 x i32> %c2I32, <4 x i32> %c4I32, <8 x i32> %c8I32) !intel_reqd_sub_group_size !100 { i32 %c1I32, <2 x i32> %c2I32, <4 x i32> %c4I32, <8 x i32> %c8I32) !intel_reqd_sub_group_size !100 {
entry: entry:
; CHECK-LABEL: @test_v5( ; CHECK-GENISA-LABEL: @test_v5(
; CHECK: [[DPAS:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.sub.group.dpas.i32.i32.i16.v8i32(i32 %c1I32, i16 %a1, <8 x i32> %b, i32 5, i32 5, i32 8, i32 1, i1 false) ; CHECK-GENISA: [[DPAS:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.sub.group.dpas.i32.i32.i16.v8i32(i32 %c1I32, i16 %a1, <8 x i32> %b, i32 5, i32 5, i32 8, i32 1, i1 false)
; CHECK: store i32 [[DPAS]], i32* %res1I32 ; CHECK-GENISA: store i32 [[DPAS]], i32* %res1I32
; CHECK: [[DPAS1:%[A-z0-9]*]] = call <2 x i32> @llvm.genx.GenISA.sub.group.dpas.v2i32.v2i32.v2i16.v8i32(<2 x i32> %c2I32, <2 x i16> %a2, <8 x i32> %b, i32 5, i32 5, i32 8, i32 2, i1 false) ; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call <2 x i32> @llvm.genx.GenISA.sub.group.dpas.v2i32.v2i32.v2i16.v8i32(<2 x i32> %c2I32, <2 x i16> %a2, <8 x i32> %b, i32 5, i32 5, i32 8, i32 2, i1 false)
; CHECK: store <2 x i32> [[DPAS1]], <2 x i32>* %res2I32 ; CHECK-GENISA: store <2 x i32> [[DPAS1]], <2 x i32>* %res2I32
; CHECK: [[DPAS2:%[A-z0-9]*]] = call <4 x i32> @llvm.genx.GenISA.sub.group.dpas.v4i32.v4i32.v4i16.v8i32(<4 x i32> %c4I32, <4 x i16> %a4, <8 x i32> %b, i32 5, i32 5, i32 8, i32 4, i1 false) ; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <4 x i32> @llvm.genx.GenISA.sub.group.dpas.v4i32.v4i32.v4i16.v8i32(<4 x i32> %c4I32, <4 x i16> %a4, <8 x i32> %b, i32 5, i32 5, i32 8, i32 4, i1 false)
; CHECK: store <4 x i32> [[DPAS2]], <4 x i32>* %res4I32 ; CHECK-GENISA: store <4 x i32> [[DPAS2]], <4 x i32>* %res4I32
; CHECK: [[DPAS3:%[A-z0-9]*]] = call <8 x i32> @llvm.genx.GenISA.sub.group.dpas.v8i32.v8i32.v8i16.v8i32(<8 x i32> %c8I32, <8 x i16> %a8, <8 x i32> %b, i32 5, i32 5, i32 8, i32 8, i1 false) ; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <8 x i32> @llvm.genx.GenISA.sub.group.dpas.v8i32.v8i32.v8i16.v8i32(<8 x i32> %c8I32, <8 x i16> %a8, <8 x i32> %b, i32 5, i32 5, i32 8, i32 8, i1 false)
; CHECK: store <8 x i32> [[DPAS3]], <8 x i32>* %res8I32 ; CHECK-GENISA: store <8 x i32> [[DPAS3]], <8 x i32>* %res8I32
; CHECK-VISAASM-LABEL: .kernel "test_v5"
; CHECK-VISAASM-DAG: dpas.s4.s4.8.1 (M1, 16) [[D1:[A-z0-9_]*]].0 [[C1:[A-z0-9_]*]].0 [[B1:[A-z0-9_]*]].0 [[A1:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D1]] v_type=G type=d num_elts=16
; CHECK-VISAASM-DAG: .decl [[C1]] v_type=G type=d num_elts=16
; CHECK-VISAASM-DAG: .decl [[B1]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A1]] v_type=G type=ud num_elts=8 align=wordx32 alias=<[[A1_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A1_ALIAS]] v_type=G type=w num_elts=16
; CHECK-VISAASM-DAG: dpas.s4.s4.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=d num_elts=32
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=d num_elts=32
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=w num_elts=32
; CHECK-VISAASM-DAG: dpas.s4.s4.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=d num_elts=64
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=d num_elts=64
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=w num_elts=64
; CHECK-VISAASM-DAG: dpas.s4.s4.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=w num_elts=128
%call16 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_iii(i32 64, i16 %a1, <8 x i32> %b, i32 %c1I32, i32 195) %call16 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_iii(i32 64, i16 %a1, <8 x i32> %b, i32 %c1I32, i32 195)
store i32 %call16, i32* %res1I32 store i32 %call16, i32* %res1I32
@ -181,15 +312,41 @@ define spir_kernel void @test_v6(i32* %res1I32, <2 x i32>* %res2I32, <4 x i32>*
<8 x i32> %b, <8 x i32> %b,
i32 %c1I32, <2 x i32> %c2I32, <4 x i32> %c4I32, <8 x i32> %c8I32) !intel_reqd_sub_group_size !100 { i32 %c1I32, <2 x i32> %c2I32, <4 x i32> %c4I32, <8 x i32> %c8I32) !intel_reqd_sub_group_size !100 {
entry: entry:
; CHECK-LABEL: @test_v6( ; CHECK-GENISA-LABEL: @test_v6(
; CHECK: [[DPAS:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.sub.group.dpas.i32.i32.i16.v8i32(i32 %c1I32, i16 %a1, <8 x i32> %b, i32 5, i32 2, i32 8, i32 1, i1 false) ; CHECK-GENISA: [[DPAS:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.sub.group.dpas.i32.i32.i16.v8i32(i32 %c1I32, i16 %a1, <8 x i32> %b, i32 5, i32 2, i32 8, i32 1, i1 false)
; CHECK: store i32 [[DPAS]], i32* %res1I32 ; CHECK-GENISA: store i32 [[DPAS]], i32* %res1I32
; CHECK: [[DPAS1:%[A-z0-9]*]] = call <2 x i32> @llvm.genx.GenISA.sub.group.dpas.v2i32.v2i32.v2i16.v8i32(<2 x i32> %c2I32, <2 x i16> %a2, <8 x i32> %b, i32 5, i32 2, i32 8, i32 2, i1 false) ; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call <2 x i32> @llvm.genx.GenISA.sub.group.dpas.v2i32.v2i32.v2i16.v8i32(<2 x i32> %c2I32, <2 x i16> %a2, <8 x i32> %b, i32 5, i32 2, i32 8, i32 2, i1 false)
; CHECK: store <2 x i32> [[DPAS1]], <2 x i32>* %res2I32 ; CHECK-GENISA: store <2 x i32> [[DPAS1]], <2 x i32>* %res2I32
; CHECK: [[DPAS2:%[A-z0-9]*]] = call <4 x i32> @llvm.genx.GenISA.sub.group.dpas.v4i32.v4i32.v4i16.v8i32(<4 x i32> %c4I32, <4 x i16> %a4, <8 x i32> %b, i32 5, i32 2, i32 8, i32 4, i1 false) ; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <4 x i32> @llvm.genx.GenISA.sub.group.dpas.v4i32.v4i32.v4i16.v8i32(<4 x i32> %c4I32, <4 x i16> %a4, <8 x i32> %b, i32 5, i32 2, i32 8, i32 4, i1 false)
; CHECK: store <4 x i32> [[DPAS2]], <4 x i32>* %res4I32 ; CHECK-GENISA: store <4 x i32> [[DPAS2]], <4 x i32>* %res4I32
; CHECK: [[DPAS3:%[A-z0-9]*]] = call <8 x i32> @llvm.genx.GenISA.sub.group.dpas.v8i32.v8i32.v8i16.v8i32(<8 x i32> %c8I32, <8 x i16> %a8, <8 x i32> %b, i32 5, i32 2, i32 8, i32 8, i1 false) ; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <8 x i32> @llvm.genx.GenISA.sub.group.dpas.v8i32.v8i32.v8i16.v8i32(<8 x i32> %c8I32, <8 x i16> %a8, <8 x i32> %b, i32 5, i32 2, i32 8, i32 8, i1 false)
; CHECK: store <8 x i32> [[DPAS3]], <8 x i32>* %res8I32 ; CHECK-GENISA: store <8 x i32> [[DPAS3]], <8 x i32>* %res8I32
; CHECK-VISAASM-LABEL: .kernel "test_v6"
; CHECK-VISAASM-DAG: dpas.u4.s4.8.1 (M1, 16) [[D1:[A-z0-9_]*]].0 [[C1:[A-z0-9_]*]].0 [[B1:[A-z0-9_]*]].0 [[A1:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D1]] v_type=G type=d num_elts=16
; CHECK-VISAASM-DAG: .decl [[C1]] v_type=G type=d num_elts=16
; CHECK-VISAASM-DAG: .decl [[B1]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A1]] v_type=G type=ud num_elts=8 align=wordx32 alias=<[[A1_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A1_ALIAS]] v_type=G type=w num_elts=16
; CHECK-VISAASM-DAG: dpas.u4.s4.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=d num_elts=32
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=d num_elts=32
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=w num_elts=32
; CHECK-VISAASM-DAG: dpas.u4.s4.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=d num_elts=64
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=d num_elts=64
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=w num_elts=64
; CHECK-VISAASM-DAG: dpas.u4.s4.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=w num_elts=128
%call20 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_iii(i32 64, i16 %a1, <8 x i32> %b, i32 %c1I32, i32 193) %call20 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_iii(i32 64, i16 %a1, <8 x i32> %b, i32 %c1I32, i32 193)
store i32 %call20, i32* %res1I32 store i32 %call20, i32* %res1I32
@ -208,15 +365,41 @@ define spir_kernel void @test_v7(i32* %res1I32, <2 x i32>* %res2I32, <4 x i32>*
<8 x i32> %b, <8 x i32> %b,
i32 %c1I32, <2 x i32> %c2I32, <4 x i32> %c4I32, <8 x i32> %c8I32) !intel_reqd_sub_group_size !100 { i32 %c1I32, <2 x i32> %c2I32, <4 x i32> %c4I32, <8 x i32> %c8I32) !intel_reqd_sub_group_size !100 {
entry: entry:
; CHECK-LABEL: @test_v7( ; CHECK-GENISA-LABEL: @test_v7(
; CHECK: [[DPAS:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.sub.group.dpas.i32.i32.i16.v8i32(i32 %c1I32, i16 %a1, <8 x i32> %b, i32 2, i32 5, i32 8, i32 1, i1 false) ; CHECK-GENISA: [[DPAS:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.sub.group.dpas.i32.i32.i16.v8i32(i32 %c1I32, i16 %a1, <8 x i32> %b, i32 2, i32 5, i32 8, i32 1, i1 false)
; CHECK: store i32 [[DPAS]], i32* %res1I32 ; CHECK-GENISA: store i32 [[DPAS]], i32* %res1I32
; CHECK: [[DPAS1:%[A-z0-9]*]] = call <2 x i32> @llvm.genx.GenISA.sub.group.dpas.v2i32.v2i32.v2i16.v8i32(<2 x i32> %c2I32, <2 x i16> %a2, <8 x i32> %b, i32 2, i32 5, i32 8, i32 2, i1 false) ; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call <2 x i32> @llvm.genx.GenISA.sub.group.dpas.v2i32.v2i32.v2i16.v8i32(<2 x i32> %c2I32, <2 x i16> %a2, <8 x i32> %b, i32 2, i32 5, i32 8, i32 2, i1 false)
; CHECK: store <2 x i32> [[DPAS1]], <2 x i32>* %res2I32 ; CHECK-GENISA: store <2 x i32> [[DPAS1]], <2 x i32>* %res2I32
; CHECK: [[DPAS2:%[A-z0-9]*]] = call <4 x i32> @llvm.genx.GenISA.sub.group.dpas.v4i32.v4i32.v4i16.v8i32(<4 x i32> %c4I32, <4 x i16> %a4, <8 x i32> %b, i32 2, i32 5, i32 8, i32 4, i1 false) ; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <4 x i32> @llvm.genx.GenISA.sub.group.dpas.v4i32.v4i32.v4i16.v8i32(<4 x i32> %c4I32, <4 x i16> %a4, <8 x i32> %b, i32 2, i32 5, i32 8, i32 4, i1 false)
; CHECK: store <4 x i32> [[DPAS2]], <4 x i32>* %res4I32 ; CHECK-GENISA: store <4 x i32> [[DPAS2]], <4 x i32>* %res4I32
; CHECK: [[DPAS3:%[A-z0-9]*]] = call <8 x i32> @llvm.genx.GenISA.sub.group.dpas.v8i32.v8i32.v8i16.v8i32(<8 x i32> %c8I32, <8 x i16> %a8, <8 x i32> %b, i32 2, i32 5, i32 8, i32 8, i1 false) ; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <8 x i32> @llvm.genx.GenISA.sub.group.dpas.v8i32.v8i32.v8i16.v8i32(<8 x i32> %c8I32, <8 x i16> %a8, <8 x i32> %b, i32 2, i32 5, i32 8, i32 8, i1 false)
; CHECK: store <8 x i32> [[DPAS3]], <8 x i32>* %res8I32 ; CHECK-GENISA: store <8 x i32> [[DPAS3]], <8 x i32>* %res8I32
; CHECK-VISAASM-LABEL: .kernel "test_v7"
; CHECK-VISAASM-DAG: dpas.s4.u4.8.1 (M1, 16) [[D1:[A-z0-9_]*]].0 [[C1:[A-z0-9_]*]].0 [[B1:[A-z0-9_]*]].0 [[A1:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D1]] v_type=G type=d num_elts=16
; CHECK-VISAASM-DAG: .decl [[C1]] v_type=G type=d num_elts=16
; CHECK-VISAASM-DAG: .decl [[B1]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A1]] v_type=G type=ud num_elts=8 align=wordx32 alias=<[[A1_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A1_ALIAS]] v_type=G type=w num_elts=16
; CHECK-VISAASM-DAG: dpas.s4.u4.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=d num_elts=32
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=d num_elts=32
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=w num_elts=32
; CHECK-VISAASM-DAG: dpas.s4.u4.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=d num_elts=64
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=d num_elts=64
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=w num_elts=64
; CHECK-VISAASM-DAG: dpas.s4.u4.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=w num_elts=128
%call24 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_iii(i32 64, i16 %a1, <8 x i32> %b, i32 %c1I32, i32 194) %call24 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_iii(i32 64, i16 %a1, <8 x i32> %b, i32 %c1I32, i32 194)
store i32 %call24, i32* %res1I32 store i32 %call24, i32* %res1I32
@ -230,20 +413,47 @@ entry:
ret void ret void
} }
; int4 matrix sources, fp32 accumulator:
define spir_kernel void @test_v8(i32* %res1I32, <2 x i32>* %res2I32, <4 x i32>* %res4I32, <8 x i32>* %res8I32, define spir_kernel void @test_v8(i32* %res1I32, <2 x i32>* %res2I32, <4 x i32>* %res4I32, <8 x i32>* %res8I32,
i16 %a1, <2 x i16> %a2, <4 x i16> %a4, <8 x i16> %a8, i16 %a1, <2 x i16> %a2, <4 x i16> %a4, <8 x i16> %a8,
<8 x i32> %b, <8 x i32> %b,
i32 %c1I32, <2 x i32> %c2I32, <4 x i32> %c4I32, <8 x i32> %c8I32) !intel_reqd_sub_group_size !100 { i32 %c1I32, <2 x i32> %c2I32, <4 x i32> %c4I32, <8 x i32> %c8I32) !intel_reqd_sub_group_size !100 {
entry: entry:
; CHECK-LABEL: @test_v8( ; CHECK-GENISA-LABEL: @test_v8(
; CHECK: [[DPAS:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.sub.group.dpas.i32.i32.i16.v8i32(i32 %c1I32, i16 %a1, <8 x i32> %b, i32 2, i32 2, i32 8, i32 1, i1 false) ; CHECK-GENISA: [[DPAS:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.sub.group.dpas.i32.i32.i16.v8i32(i32 %c1I32, i16 %a1, <8 x i32> %b, i32 2, i32 2, i32 8, i32 1, i1 false)
; CHECK: store i32 [[DPAS]], i32* %res1I32 ; CHECK-GENISA: store i32 [[DPAS]], i32* %res1I32
; CHECK: [[DPAS1:%[A-z0-9]*]] = call <2 x i32> @llvm.genx.GenISA.sub.group.dpas.v2i32.v2i32.v2i16.v8i32(<2 x i32> %c2I32, <2 x i16> %a2, <8 x i32> %b, i32 2, i32 2, i32 8, i32 2, i1 false) ; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call <2 x i32> @llvm.genx.GenISA.sub.group.dpas.v2i32.v2i32.v2i16.v8i32(<2 x i32> %c2I32, <2 x i16> %a2, <8 x i32> %b, i32 2, i32 2, i32 8, i32 2, i1 false)
; CHECK: store <2 x i32> [[DPAS1]], <2 x i32>* %res2I32 ; CHECK-GENISA: store <2 x i32> [[DPAS1]], <2 x i32>* %res2I32
; CHECK: [[DPAS2:%[A-z0-9]*]] = call <4 x i32> @llvm.genx.GenISA.sub.group.dpas.v4i32.v4i32.v4i16.v8i32(<4 x i32> %c4I32, <4 x i16> %a4, <8 x i32> %b, i32 2, i32 2, i32 8, i32 4, i1 false) ; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <4 x i32> @llvm.genx.GenISA.sub.group.dpas.v4i32.v4i32.v4i16.v8i32(<4 x i32> %c4I32, <4 x i16> %a4, <8 x i32> %b, i32 2, i32 2, i32 8, i32 4, i1 false)
; CHECK: store <4 x i32> [[DPAS2]], <4 x i32>* %res4I32 ; CHECK-GENISA: store <4 x i32> [[DPAS2]], <4 x i32>* %res4I32
; CHECK: [[DPAS3:%[A-z0-9]*]] = call <8 x i32> @llvm.genx.GenISA.sub.group.dpas.v8i32.v8i32.v8i16.v8i32(<8 x i32> %c8I32, <8 x i16> %a8, <8 x i32> %b, i32 2, i32 2, i32 8, i32 8, i1 false) ; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <8 x i32> @llvm.genx.GenISA.sub.group.dpas.v8i32.v8i32.v8i16.v8i32(<8 x i32> %c8I32, <8 x i16> %a8, <8 x i32> %b, i32 2, i32 2, i32 8, i32 8, i1 false)
; CHECK: store <8 x i32> [[DPAS3]], <8 x i32>* %res8I32 ; CHECK-GENISA: store <8 x i32> [[DPAS3]], <8 x i32>* %res8I32
; CHECK-VISAASM-LABEL: .kernel "test_v8"
; CHECK-VISAASM-DAG: dpas.u4.u4.8.1 (M1, 16) [[D1:[A-z0-9_]*]].0 [[C1:[A-z0-9_]*]].0 [[B1:[A-z0-9_]*]].0 [[A1:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D1]] v_type=G type=d num_elts=16
; CHECK-VISAASM-DAG: .decl [[C1]] v_type=G type=d num_elts=16
; CHECK-VISAASM-DAG: .decl [[B1]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A1]] v_type=G type=ud num_elts=8 align=wordx32 alias=<[[A1_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A1_ALIAS]] v_type=G type=w num_elts=16
; CHECK-VISAASM-DAG: dpas.u4.u4.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=d num_elts=32
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=d num_elts=32
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=w num_elts=32
; CHECK-VISAASM-DAG: dpas.u4.u4.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=d num_elts=64
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=d num_elts=64
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=w num_elts=64
; CHECK-VISAASM-DAG: dpas.u4.u4.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=w num_elts=128
%call28 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_iii(i32 64, i16 %a1, <8 x i32> %b, i32 %c1I32, i32 192) %call28 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_iii(i32 64, i16 %a1, <8 x i32> %b, i32 %c1I32, i32 192)
store i32 %call28, i32* %res1I32 store i32 %call28, i32* %res1I32
@ -263,15 +473,41 @@ define spir_kernel void @test_v9(float* %resF, <2 x float>* %res2, <4 x float>*
<8 x i32> %b, <8 x i32> %b,
float %cF, <2 x float> %c2F, <4 x float> %c4F, <8 x float> %c8F) !intel_reqd_sub_group_size !100 { float %cF, <2 x float> %c2F, <4 x float> %c4F, <8 x float> %c8F) !intel_reqd_sub_group_size !100 {
entry: entry:
; CHECK-LABEL: @test_v9( ; CHECK-GENISA-LABEL: @test_v9(
; CHECK: [[DPAS:%[A-z0-9]*]] = call float @llvm.genx.GenISA.sub.group.dpas.f32.f32.i16.v8i32(float %cF, i16 %a1, <8 x i32> %b, i32 12, i32 12, i32 8, i32 1, i1 false) ; CHECK-GENISA: [[DPAS:%[A-z0-9]*]] = call float @llvm.genx.GenISA.sub.group.dpas.f32.f32.i16.v8i32(float %cF, i16 %a1, <8 x i32> %b, i32 12, i32 12, i32 8, i32 1, i1 false)
; CHECK: store float [[DPAS]], float* %resF ; CHECK-GENISA: store float [[DPAS]], float* %resF
; CHECK: [[DPAS1:%[A-z0-9]*]] = call <2 x float> @llvm.genx.GenISA.sub.group.dpas.v2f32.v2f32.v2i16.v8i32(<2 x float> %c2F, <2 x i16> %a2, <8 x i32> %b, i32 12, i32 12, i32 8, i32 2, i1 false) ; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call <2 x float> @llvm.genx.GenISA.sub.group.dpas.v2f32.v2f32.v2i16.v8i32(<2 x float> %c2F, <2 x i16> %a2, <8 x i32> %b, i32 12, i32 12, i32 8, i32 2, i1 false)
; CHECK: store <2 x float> [[DPAS1]], <2 x float>* %res2 ; CHECK-GENISA: store <2 x float> [[DPAS1]], <2 x float>* %res2
; CHECK: [[DPAS2:%[A-z0-9]*]] = call <4 x float> @llvm.genx.GenISA.sub.group.dpas.v4f32.v4f32.v4i16.v8i32(<4 x float> %c4F, <4 x i16> %a4, <8 x i32> %b, i32 12, i32 12, i32 8, i32 4, i1 false) ; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <4 x float> @llvm.genx.GenISA.sub.group.dpas.v4f32.v4f32.v4i16.v8i32(<4 x float> %c4F, <4 x i16> %a4, <8 x i32> %b, i32 12, i32 12, i32 8, i32 4, i1 false)
; CHECK: store <4 x float> [[DPAS2]], <4 x float>* %res4 ; CHECK-GENISA: store <4 x float> [[DPAS2]], <4 x float>* %res4
; CHECK: [[DPAS3:%[A-z0-9]*]] = call <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x float> %c8F, <8 x i16> %a8, <8 x i32> %b, i32 12, i32 12, i32 8, i32 8, i1 false) ; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x float> %c8F, <8 x i16> %a8, <8 x i32> %b, i32 12, i32 12, i32 8, i32 8, i1 false)
; CHECK: store <8 x float> [[DPAS3]], <8 x float>* %res8 ; CHECK-GENISA: store <8 x float> [[DPAS3]], <8 x float>* %res8
; CHECK-VISAASM-LABEL: .kernel "test_v9"
; CHECK-VISAASM-DAG: dpas.hf.hf.8.1 (M1, 16) [[D1:[A-z0-9_]*]].0 [[C1:[A-z0-9_]*]].0 [[B1:[A-z0-9_]*]].0 [[A1:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D1]] v_type=G type=f num_elts=16
; CHECK-VISAASM-DAG: .decl [[C1]] v_type=G type=f num_elts=16
; CHECK-VISAASM-DAG: .decl [[B1]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A1]] v_type=G type=ud num_elts=8 align=wordx32 alias=<[[A1_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A1_ALIAS]] v_type=G type=w num_elts=16
; CHECK-VISAASM-DAG: dpas.hf.hf.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=f num_elts=32
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=f num_elts=32
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=w num_elts=32
; CHECK-VISAASM-DAG: dpas.hf.hf.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=f num_elts=64
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=f num_elts=64
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=w num_elts=64
; CHECK-VISAASM-DAG: dpas.hf.hf.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=f num_elts=128
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=f num_elts=128
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=w num_elts=128
%call32 = call spir_func float @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_ifi(i32 16, i16 %a1, <8 x i32> %b, float %cF, i32 3072) %call32 = call spir_func float @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_ifi(i32 16, i16 %a1, <8 x i32> %b, float %cF, i32 3072)
store float %call32, float* %resF store float %call32, float* %resF
@ -291,15 +527,41 @@ define spir_kernel void @test_v10(float* %resF, <2 x float>* %res2, <4 x float>
<8 x i32> %b, <8 x i32> %b,
float %cF, <2 x float> %c2F, <4 x float> %c4F, <8 x float> %c8F) !intel_reqd_sub_group_size !100 { float %cF, <2 x float> %c2F, <4 x float> %c4F, <8 x float> %c8F) !intel_reqd_sub_group_size !100 {
entry: entry:
; CHECK-LABEL: @test_v10( ; CHECK-GENISA-LABEL: @test_v10(
; CHECK: [[DPAS:%[A-z0-9]*]] = call float @llvm.genx.GenISA.sub.group.dpas.f32.f32.i16.v8i32(float %cF, i16 %a1, <8 x i32> %b, i32 11, i32 11, i32 8, i32 1, i1 false) ; CHECK-GENISA: [[DPAS:%[A-z0-9]*]] = call float @llvm.genx.GenISA.sub.group.dpas.f32.f32.i16.v8i32(float %cF, i16 %a1, <8 x i32> %b, i32 11, i32 11, i32 8, i32 1, i1 false)
; CHECK: store float [[DPAS]], float* %resF ; CHECK-GENISA: store float [[DPAS]], float* %resF
; CHECK: [[DPAS1:%[A-z0-9]*]] = call <2 x float> @llvm.genx.GenISA.sub.group.dpas.v2f32.v2f32.v2i16.v8i32(<2 x float> %c2F, <2 x i16> %a2, <8 x i32> %b, i32 11, i32 11, i32 8, i32 2, i1 false) ; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call <2 x float> @llvm.genx.GenISA.sub.group.dpas.v2f32.v2f32.v2i16.v8i32(<2 x float> %c2F, <2 x i16> %a2, <8 x i32> %b, i32 11, i32 11, i32 8, i32 2, i1 false)
; CHECK: store <2 x float> [[DPAS1]], <2 x float>* %res2 ; CHECK-GENISA: store <2 x float> [[DPAS1]], <2 x float>* %res2
; CHECK: [[DPAS2:%[A-z0-9]*]] = call <4 x float> @llvm.genx.GenISA.sub.group.dpas.v4f32.v4f32.v4i16.v8i32(<4 x float> %c4F, <4 x i16> %a4, <8 x i32> %b, i32 11, i32 11, i32 8, i32 4, i1 false) ; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <4 x float> @llvm.genx.GenISA.sub.group.dpas.v4f32.v4f32.v4i16.v8i32(<4 x float> %c4F, <4 x i16> %a4, <8 x i32> %b, i32 11, i32 11, i32 8, i32 4, i1 false)
; CHECK: store <4 x float> [[DPAS2]], <4 x float>* %res4 ; CHECK-GENISA: store <4 x float> [[DPAS2]], <4 x float>* %res4
; CHECK: [[DPAS3:%[A-z0-9]*]] = call <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x float> %c8F, <8 x i16> %a8, <8 x i32> %b, i32 11, i32 11, i32 8, i32 8, i1 false) ; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x float> %c8F, <8 x i16> %a8, <8 x i32> %b, i32 11, i32 11, i32 8, i32 8, i1 false)
; CHECK: store <8 x float> [[DPAS3]], <8 x float>* %res8 ; CHECK-GENISA: store <8 x float> [[DPAS3]], <8 x float>* %res8
; CHECK-VISAASM-LABEL: .kernel "test_v10"
; CHECK-VISAASM-DAG: dpas.bf.bf.8.1 (M1, 16) [[D1:[A-z0-9_]*]].0 [[C1:[A-z0-9_]*]].0 [[B1:[A-z0-9_]*]].0 [[A1:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D1]] v_type=G type=f num_elts=16
; CHECK-VISAASM-DAG: .decl [[C1]] v_type=G type=f num_elts=16
; CHECK-VISAASM-DAG: .decl [[B1]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A1]] v_type=G type=ud num_elts=8 align=wordx32 alias=<[[A1_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A1_ALIAS]] v_type=G type=w num_elts=16
; CHECK-VISAASM-DAG: dpas.bf.bf.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=f num_elts=32
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=f num_elts=32
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=w num_elts=32
; CHECK-VISAASM-DAG: dpas.bf.bf.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=f num_elts=64
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=f num_elts=64
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=w num_elts=64
; CHECK-VISAASM-DAG: dpas.bf.bf.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=f num_elts=128
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=f num_elts=128
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=w num_elts=128
%call36 = call spir_func float @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_ifi(i32 16, i16 %a1, <8 x i32> %b, float %cF, i32 12288) %call36 = call spir_func float @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_ifi(i32 16, i16 %a1, <8 x i32> %b, float %cF, i32 12288)
store float %call36, float* %resF store float %call36, float* %resF
@ -319,15 +581,41 @@ define spir_kernel void @test_v11(half* %res, <2 x half>* %res2, <4 x half>* %r
<8 x i32> %b, <8 x i32> %b,
half %c, <2 x half> %c2, <4 x half> %c4, <8 x half> %c8) !intel_reqd_sub_group_size !100 { half %c, <2 x half> %c2, <4 x half> %c4, <8 x half> %c8) !intel_reqd_sub_group_size !100 {
entry: entry:
; CHECK-LABEL: @test_v11( ; CHECK-GENISA-LABEL: @test_v11(
; CHECK: [[DPAS:%[A-z0-9]*]] = call half @llvm.genx.GenISA.sub.group.dpas.f16.f16.i16.v8i32(half %c, i16 %a1, <8 x i32> %b, i32 12, i32 12, i32 8, i32 1, i1 false) ; CHECK-GENISA: [[DPAS:%[A-z0-9]*]] = call half @llvm.genx.GenISA.sub.group.dpas.f16.f16.i16.v8i32(half %c, i16 %a1, <8 x i32> %b, i32 12, i32 12, i32 8, i32 1, i1 false)
; CHECK: store half [[DPAS]], half* %res ; CHECK-GENISA: store half [[DPAS]], half* %res
; CHECK: [[DPAS1:%[A-z0-9]*]] = call <2 x half> @llvm.genx.GenISA.sub.group.dpas.v2f16.v2f16.v2i16.v8i32(<2 x half> %c2, <2 x i16> %a2, <8 x i32> %b, i32 12, i32 12, i32 8, i32 2, i1 false) ; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call <2 x half> @llvm.genx.GenISA.sub.group.dpas.v2f16.v2f16.v2i16.v8i32(<2 x half> %c2, <2 x i16> %a2, <8 x i32> %b, i32 12, i32 12, i32 8, i32 2, i1 false)
; CHECK: store <2 x half> [[DPAS1]], <2 x half>* %res2 ; CHECK-GENISA: store <2 x half> [[DPAS1]], <2 x half>* %res2
; CHECK: [[DPAS2:%[A-z0-9]*]] = call <4 x half> @llvm.genx.GenISA.sub.group.dpas.v4f16.v4f16.v4i16.v8i32(<4 x half> %c4, <4 x i16> %a4, <8 x i32> %b, i32 12, i32 12, i32 8, i32 4, i1 false) ; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <4 x half> @llvm.genx.GenISA.sub.group.dpas.v4f16.v4f16.v4i16.v8i32(<4 x half> %c4, <4 x i16> %a4, <8 x i32> %b, i32 12, i32 12, i32 8, i32 4, i1 false)
; CHECK: store <4 x half> [[DPAS2]], <4 x half>* %res4 ; CHECK-GENISA: store <4 x half> [[DPAS2]], <4 x half>* %res4
; CHECK: [[DPAS3:%[A-z0-9]*]] = call <8 x half> @llvm.genx.GenISA.sub.group.dpas.v8f16.v8f16.v8i16.v8i32(<8 x half> %c8, <8 x i16> %a8, <8 x i32> %b, i32 12, i32 12, i32 8, i32 8, i1 false) ; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <8 x half> @llvm.genx.GenISA.sub.group.dpas.v8f16.v8f16.v8i16.v8i32(<8 x half> %c8, <8 x i16> %a8, <8 x i32> %b, i32 12, i32 12, i32 8, i32 8, i1 false)
; CHECK: store <8 x half> [[DPAS3]], <8 x half>* %res8 ; CHECK-GENISA: store <8 x half> [[DPAS3]], <8 x half>* %res8
; CHECK-VISAASM-LABEL: .kernel "test_v11"
; CHECK-VISAASM-DAG: dpas.hf.hf.8.1 (M1, 16) [[D1:[A-z0-9_]*]].0 [[C1:[A-z0-9_]*]].0 [[B1:[A-z0-9_]*]].0 [[A1:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D1]] v_type=G type=hf num_elts=16
; CHECK-VISAASM-DAG: .decl [[C1]] v_type=G type=hf num_elts=16
; CHECK-VISAASM-DAG: .decl [[B1]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A1]] v_type=G type=ud num_elts=8 align=wordx32 alias=<[[A1_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A1_ALIAS]] v_type=G type=w num_elts=16
; CHECK-VISAASM-DAG: dpas.hf.hf.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=hf num_elts=32
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=hf num_elts=32
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=w num_elts=32
; CHECK-VISAASM-DAG: dpas.hf.hf.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=hf num_elts=64
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=hf num_elts=64
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=w num_elts=64
; CHECK-VISAASM-DAG: dpas.hf.hf.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=hf num_elts=128
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=hf num_elts=128
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=w num_elts=128
%call32 = call spir_func half @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_iDhi(i32 16, i16 %a1, <8 x i32> %b, half %c, i32 3072) %call32 = call spir_func half @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_iDhi(i32 16, i16 %a1, <8 x i32> %b, half %c, i32 3072)
store half %call32, half* %res store half %call32, half* %res
@ -347,15 +635,41 @@ define spir_kernel void @test_v12(i16* %res, <2 x i16>* %res2, <4 x i16>* %res4,
<8 x i32> %b, <8 x i32> %b,
i16 %cF, <2 x i16> %c2F, <4 x i16> %c4F, <8 x i16> %c8F) !intel_reqd_sub_group_size !100 { i16 %cF, <2 x i16> %c2F, <4 x i16> %c4F, <8 x i16> %c8F) !intel_reqd_sub_group_size !100 {
entry: entry:
; CHECK-LABEL: @test_v12( ; CHECK-GENISA-LABEL: @test_v12(
; CHECK: [[DPAS:%[A-z0-9]*]] = call i16 @llvm.genx.GenISA.sub.group.dpas.i16.i16.i16.v8i32(i16 %cF, i16 %a1, <8 x i32> %b, i32 11, i32 11, i32 8, i32 1, i1 false) ; CHECK-GENISA: [[DPAS:%[A-z0-9]*]] = call i16 @llvm.genx.GenISA.sub.group.dpas.i16.i16.i16.v8i32(i16 %cF, i16 %a1, <8 x i32> %b, i32 11, i32 11, i32 8, i32 1, i1 false)
; CHECK: store i16 [[DPAS]], i16* %res ; CHECK-GENISA: store i16 [[DPAS]], i16* %res
; CHECK: [[DPAS1:%[A-z0-9]*]] = call <2 x i16> @llvm.genx.GenISA.sub.group.dpas.v2i16.v2i16.v2i16.v8i32(<2 x i16> %c2F, <2 x i16> %a2, <8 x i32> %b, i32 11, i32 11, i32 8, i32 2, i1 false) ; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call <2 x i16> @llvm.genx.GenISA.sub.group.dpas.v2i16.v2i16.v2i16.v8i32(<2 x i16> %c2F, <2 x i16> %a2, <8 x i32> %b, i32 11, i32 11, i32 8, i32 2, i1 false)
; CHECK: store <2 x i16> [[DPAS1]], <2 x i16>* %res2 ; CHECK-GENISA: store <2 x i16> [[DPAS1]], <2 x i16>* %res2
; CHECK: [[DPAS2:%[A-z0-9]*]] = call <4 x i16> @llvm.genx.GenISA.sub.group.dpas.v4i16.v4i16.v4i16.v8i32(<4 x i16> %c4F, <4 x i16> %a4, <8 x i32> %b, i32 11, i32 11, i32 8, i32 4, i1 false) ; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <4 x i16> @llvm.genx.GenISA.sub.group.dpas.v4i16.v4i16.v4i16.v8i32(<4 x i16> %c4F, <4 x i16> %a4, <8 x i32> %b, i32 11, i32 11, i32 8, i32 4, i1 false)
; CHECK: store <4 x i16> [[DPAS2]], <4 x i16>* %res4 ; CHECK-GENISA: store <4 x i16> [[DPAS2]], <4 x i16>* %res4
; CHECK: [[DPAS3:%[A-z0-9]*]] = call <8 x i16> @llvm.genx.GenISA.sub.group.dpas.v8i16.v8i16.v8i16.v8i32(<8 x i16> %c8F, <8 x i16> %a8, <8 x i32> %b, i32 11, i32 11, i32 8, i32 8, i1 false) ; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <8 x i16> @llvm.genx.GenISA.sub.group.dpas.v8i16.v8i16.v8i16.v8i32(<8 x i16> %c8F, <8 x i16> %a8, <8 x i32> %b, i32 11, i32 11, i32 8, i32 8, i1 false)
; CHECK: store <8 x i16> [[DPAS3]], <8 x i16>* %res8 ; CHECK-GENISA: store <8 x i16> [[DPAS3]], <8 x i16>* %res8
; CHECK-VISAASM-LABEL: .kernel "test_v12"
; CHECK-VISAASM-DAG: dpas.bf.bf.8.1 (M1, 16) [[D1:[A-z0-9_]*]].0 [[C1:[A-z0-9_]*]].0 [[B1:[A-z0-9_]*]].0 [[A1:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D1]] v_type=G type=bf num_elts=16
; CHECK-VISAASM-DAG: .decl [[C1]] v_type=G type=bf num_elts=16
; CHECK-VISAASM-DAG: .decl [[B1]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A1]] v_type=G type=ud num_elts=8 align=wordx32 alias=<[[A1_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A1_ALIAS]] v_type=G type=w num_elts=16
; CHECK-VISAASM-DAG: dpas.bf.bf.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=bf num_elts=32
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=bf num_elts=32
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=w num_elts=32
; CHECK-VISAASM-DAG: dpas.bf.bf.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=bf num_elts=64
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=bf num_elts=64
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=w num_elts=64
; CHECK-VISAASM-DAG: dpas.bf.bf.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=bf num_elts=128
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=bf num_elts=128
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=w num_elts=128
%call36 = call spir_func i16 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_isi(i32 16, i16 %a1, <8 x i32> %b, i16 %cF, i32 12300) %call36 = call spir_func i16 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_isi(i32 16, i16 %a1, <8 x i32> %b, i16 %cF, i32 12300)
store i16 %call36, i16* %res store i16 %call36, i16* %res
@ -375,15 +689,41 @@ define spir_kernel void @test_v13(float* %resF, <2 x float>* %res2, <4 x float>
<8 x float> %b, <8 x float> %b,
float %cF, <2 x float> %c2F, <4 x float> %c4F, <8 x float> %c8F) !intel_reqd_sub_group_size !100 { float %cF, <2 x float> %c2F, <4 x float> %c4F, <8 x float> %c8F) !intel_reqd_sub_group_size !100 {
entry: entry:
; CHECK-LABEL: @test_v13( ; CHECK-GENISA-LABEL: @test_v13(
; CHECK: [[DPAS:%[A-z0-9]*]] = call float @llvm.genx.GenISA.sub.group.dpas.f32.f32.f32.v8i32(float %cF, float %a1, <8 x i32> %{{.*}}, i32 10, i32 10, i32 8, i32 1, i1 false) ; CHECK-GENISA: [[DPAS:%[A-z0-9]*]] = call float @llvm.genx.GenISA.sub.group.dpas.f32.f32.f32.v8i32(float %cF, float %a1, <8 x i32> %{{.*}}, i32 10, i32 10, i32 8, i32 1, i1 false)
; CHECK: store float [[DPAS]], float* %resF ; CHECK-GENISA: store float [[DPAS]], float* %resF
; CHECK: [[DPAS1:%[A-z0-9]*]] = call <2 x float> @llvm.genx.GenISA.sub.group.dpas.v2f32.v2f32.f32.v8i32(<2 x float> %c2F, float %a2, <8 x i32> %{{.*}}, i32 10, i32 10, i32 8, i32 2, i1 false) ; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call <2 x float> @llvm.genx.GenISA.sub.group.dpas.v2f32.v2f32.f32.v8i32(<2 x float> %c2F, float %a2, <8 x i32> %{{.*}}, i32 10, i32 10, i32 8, i32 2, i1 false)
; CHECK: store <2 x float> [[DPAS1]], <2 x float>* %res2 ; CHECK-GENISA: store <2 x float> [[DPAS1]], <2 x float>* %res2
; CHECK: [[DPAS2:%[A-z0-9]*]] = call <4 x float> @llvm.genx.GenISA.sub.group.dpas.v4f32.v4f32.v2f32.v8i32(<4 x float> %c4F, <2 x float> %a4, <8 x i32> %{{.*}}, i32 10, i32 10, i32 8, i32 4, i1 false) ; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <4 x float> @llvm.genx.GenISA.sub.group.dpas.v4f32.v4f32.v2f32.v8i32(<4 x float> %c4F, <2 x float> %a4, <8 x i32> %{{.*}}, i32 10, i32 10, i32 8, i32 4, i1 false)
; CHECK: store <4 x float> [[DPAS2]], <4 x float>* %res4 ; CHECK-GENISA: store <4 x float> [[DPAS2]], <4 x float>* %res4
; CHECK: [[DPAS3:%[A-z0-9]*]] = call <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v4f32.v8i32(<8 x float> %c8F, <4 x float> %a8, <8 x i32> %{{.*}}, i32 10, i32 10, i32 8, i32 8, i1 false) ; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v4f32.v8i32(<8 x float> %c8F, <4 x float> %a8, <8 x i32> %{{.*}}, i32 10, i32 10, i32 8, i32 8, i1 false)
; CHECK: store <8 x float> [[DPAS3]], <8 x float>* %res8 ; CHECK-GENISA: store <8 x float> [[DPAS3]], <8 x float>* %res8
; CHECK-VISAASM-LABEL: .kernel "test_v13"
; CHECK-VISAASM-DAG: dpas.tf32.tf32.8.1 (M1, 16) [[D1:[A-z0-9_]*]].0 [[C1:[A-z0-9_]*]].0 [[B1:[A-z0-9_]*]].0 [[A1:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D1]] v_type=G type=f num_elts=16
; CHECK-VISAASM-DAG: .decl [[C1]] v_type=G type=f num_elts=16
; CHECK-VISAASM-DAG: .decl [[B1]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A1]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A1_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A1_ALIAS]] v_type=G type=f num_elts=16
; CHECK-VISAASM-DAG: dpas.tf32.tf32.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=f num_elts=32
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=f num_elts=32
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=f num_elts=16
; CHECK-VISAASM-DAG: dpas.tf32.tf32.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=f num_elts=64
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=f num_elts=64
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=f num_elts=32
; CHECK-VISAASM-DAG: dpas.tf32.tf32.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=f num_elts=128
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=f num_elts=128
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=f num_elts=64
%call32 = call spir_func float @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELifDv8_ffi(i32 8, float %a1, <8 x float> %b, float %cF, i32 768) %call32 = call spir_func float @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELifDv8_ffi(i32 8, float %a1, <8 x float> %b, float %cF, i32 768)
store float %call32, float* %resF store float %call32, float* %resF

View File

@ -0,0 +1,610 @@
;=========================== begin_copyright_notice ============================
;
; Copyright (C) 2025 Intel Corporation
;
; SPDX-License-Identifier: MIT
;
;============================ end_copyright_notice =============================
; REQUIRES: llvm-spirv, pvc-supported
; RUN: llvm-as %s -o %t.bc
; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_INTEL_subgroup_matrix_multiply_accumulate -o %t.spv
; RUN: ocloc compile -spirv_input -file %t.spv -device pvc -options " -igc_opts 'PrintToConsole=1 PrintAfter=ArithmeticFuncsTranslation'" 2>&1 | FileCheck %s --check-prefix=CHECK-GENISA
; RUN: ocloc compile -spirv_input -file %t.spv -device pvc -options " -igc_opts 'DumpVISAASMToConsole=1'" 2>&1 | FileCheck %s --check-prefix=CHECK-VISAASM
; Tests DPAS called from subgroup-size=32 kernels.
; Supported dimensions M, N, K are exactly the same and don't change with subgroup-size.
; To correctly map 32 work-items, each work item contains half of the data compared to subgroup-size=16.
;
; This test is copy of SIMD16 test (dpas_pvc.ll) with modified GenISA checks. vISA ASM checks are exactly the same.
target triple = "spir64-unknown-unknown"
declare spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv4_iii(i32, i16 signext, <4 x i32>, i32, i32)
declare spir_func <2 x i32> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_sDv4_iDv2_ii(i32, <2 x i16>, <4 x i32>, <2 x i32>, i32)
declare spir_func <4 x i32> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_sDv4_iS0_i(i32, <4 x i16>, <4 x i32>, <4 x i32>, i32)
declare spir_func float @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv4_ifi(i32, i16 signext, <4 x i32>, float, i32)
declare spir_func <2 x float> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_sDv4_iDv2_fi(i32, <2 x i16>, <4 x i32>, <2 x float>, i32)
declare spir_func <4 x float> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_sDv4_iDv4_fi(i32, <4 x i16>, <4 x i32>, <4 x float>, i32)
declare spir_func half @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv4_iDhi(i32, i16 signext, <4 x i32>, half, i32)
declare spir_func <2 x half> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_sDv4_iDv2_Dhi(i32, <2 x i16>, <4 x i32>, <2 x half>, i32)
declare spir_func <4 x half> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_sDv4_iDv4_Dhi(i32, <4 x i16>, <4 x i32>, <4 x half>, i32)
declare spir_func signext i16 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv4_isi(i32, i16 signext, <4 x i32>, i16 signext, i32)
declare spir_func <2 x i16> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_sDv4_iS_i(i32, <2 x i16>, <4 x i32>, <2 x i16>, i32)
declare spir_func <4 x i16> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_sDv4_iS_i(i32, <4 x i16>, <4 x i32>, <4 x i16>, i32)
declare spir_func float @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELifDv4_ffi(i32, float, <4 x float>, float, i32)
declare spir_func <2 x float> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_fDv4_fS_i(i32, float, <4 x float>, <2 x float>, i32)
declare spir_func <4 x float> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_fS_S_i(i32, <2 x float>, <4 x float>, <4 x float>, i32)
; 8-bit integer matrix sources (signed and unsigned), 32-bit integer accumulator:
define spir_kernel void @test_v1(i32* %res1I32, i32* %res2I32, <2 x i32>* %res4I32, <4 x i32>* %res8I32,
i16 %a1, i16 %a2, <2 x i16> %a4, <4 x i16> %a8,
<4 x i32> %b,
i32 %c1I32, i32 %c2I32, <2 x i32> %c4I32, <4 x i32> %c8I32) !intel_reqd_sub_group_size !100 {
entry:
; CHECK-GENISA-LABEL: @test_v1(
; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.sub.group.dpas.i32.i32.i16.v4i32(i32 %c2I32, i16 %a2, <4 x i32> %b, i32 4, i32 4, i32 8, i32 2, i1 false)
; CHECK-GENISA: store i32 [[DPAS1]], i32* %res2I32
; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <2 x i32> @llvm.genx.GenISA.sub.group.dpas.v2i32.v2i32.v2i16.v4i32(<2 x i32> %c4I32, <2 x i16> %a4, <4 x i32> %b, i32 4, i32 4, i32 8, i32 4, i1 false)
; CHECK-GENISA: store <2 x i32> [[DPAS2]], <2 x i32>* %res4I32
; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <4 x i32> @llvm.genx.GenISA.sub.group.dpas.v4i32.v4i32.v4i16.v4i32(<4 x i32> %c8I32, <4 x i16> %a8, <4 x i32> %b, i32 4, i32 4, i32 8, i32 8, i1 false)
; CHECK-GENISA: store <4 x i32> [[DPAS3]], <4 x i32>* %res8I32
; CHECK-VISAASM-LABEL: .kernel "test_v1"
; CHECK-VISAASM-DAG: dpas.s8.s8.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=d num_elts=32
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=d num_elts=32
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=w num_elts=32
; CHECK-VISAASM-DAG: dpas.s8.s8.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=d num_elts=64
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=d num_elts=64
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=w num_elts=64
; CHECK-VISAASM-DAG: dpas.s8.s8.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=w num_elts=128
%call1 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv4_iii(i32 32, i16 %a2, <4 x i32> %b, i32 %c2I32, i32 51)
store i32 %call1, i32* %res2I32
%call2 = call spir_func <2 x i32> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_sDv4_iDv2_ii(i32 32, <2 x i16> %a4, <4 x i32> %b, <2 x i32> %c4I32, i32 51)
store <2 x i32> %call2, <2 x i32>* %res4I32
%call3 = call spir_func <4 x i32> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_sDv4_iS0_i(i32 32, <4 x i16> %a8, <4 x i32> %b, <4 x i32> %c8I32, i32 51)
store <4 x i32> %call3, <4 x i32>* %res8I32
ret void
}
define spir_kernel void @test_v2(i32* %res1I32, i32* %res2I32, <2 x i32>* %res4I32, <4 x i32>* %res8I32,
i16 %a1, i16 %a2, <2 x i16> %a4, <4 x i16> %a8,
<4 x i32> %b,
i32 %c1I32, i32 %c2I32, <2 x i32> %c4I32, <4 x i32> %c8I32) !intel_reqd_sub_group_size !100 {
entry:
; CHECK-GENISA-LABEL: @test_v2(
; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.sub.group.dpas.i32.i32.i16.v4i32(i32 %c2I32, i16 %a2, <4 x i32> %b, i32 4, i32 1, i32 8, i32 2, i1 false)
; CHECK-GENISA: store i32 [[DPAS1]], i32* %res2I32
; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <2 x i32> @llvm.genx.GenISA.sub.group.dpas.v2i32.v2i32.v2i16.v4i32(<2 x i32> %c4I32, <2 x i16> %a4, <4 x i32> %b, i32 4, i32 1, i32 8, i32 4, i1 false)
; CHECK-GENISA: store <2 x i32> [[DPAS2]], <2 x i32>* %res4I32
; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <4 x i32> @llvm.genx.GenISA.sub.group.dpas.v4i32.v4i32.v4i16.v4i32(<4 x i32> %c8I32, <4 x i16> %a8, <4 x i32> %b, i32 4, i32 1, i32 8, i32 8, i1 false)
; CHECK-GENISA: store <4 x i32> [[DPAS3]], <4 x i32>* %res8I32
; CHECK-VISAASM-LABEL: .kernel "test_v2"
; CHECK-VISAASM-DAG: dpas.u8.s8.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=d num_elts=32
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=d num_elts=32
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=w num_elts=32
; CHECK-VISAASM-DAG: dpas.u8.s8.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=d num_elts=64
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=d num_elts=64
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=w num_elts=64
; CHECK-VISAASM-DAG: dpas.u8.s8.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=w num_elts=128
%call5 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv4_iii(i32 32, i16 %a2, <4 x i32> %b, i32 %c2I32, i32 49)
store i32 %call5, i32* %res2I32
%call6 = call spir_func <2 x i32> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_sDv4_iDv2_ii(i32 32, <2 x i16> %a4, <4 x i32> %b, <2 x i32> %c4I32, i32 49)
store <2 x i32> %call6, <2 x i32>* %res4I32
%call7 = call spir_func <4 x i32> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_sDv4_iS0_i(i32 32, <4 x i16> %a8, <4 x i32> %b, <4 x i32> %c8I32, i32 49)
store <4 x i32> %call7, <4 x i32>* %res8I32
ret void
}
define spir_kernel void @test_v3(i32* %res1I32, i32* %res2I32, <2 x i32>* %res4I32, <4 x i32>* %res8I32,
i16 %a1, i16 %a2, <2 x i16> %a4, <4 x i16> %a8,
<4 x i32> %b,
i32 %c1I32, i32 %c2I32, <2 x i32> %c4I32, <4 x i32> %c8I32) !intel_reqd_sub_group_size !100 {
entry:
; CHECK-GENISA-LABEL: @test_v3(
; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.sub.group.dpas.i32.i32.i16.v4i32(i32 %c2I32, i16 %a2, <4 x i32> %b, i32 1, i32 4, i32 8, i32 2, i1 false)
; CHECK-GENISA: store i32 [[DPAS1]], i32* %res2I32
; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <2 x i32> @llvm.genx.GenISA.sub.group.dpas.v2i32.v2i32.v2i16.v4i32(<2 x i32> %c4I32, <2 x i16> %a4, <4 x i32> %b, i32 1, i32 4, i32 8, i32 4, i1 false)
; CHECK-GENISA: store <2 x i32> [[DPAS2]], <2 x i32>* %res4I32
; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <4 x i32> @llvm.genx.GenISA.sub.group.dpas.v4i32.v4i32.v4i16.v4i32(<4 x i32> %c8I32, <4 x i16> %a8, <4 x i32> %b, i32 1, i32 4, i32 8, i32 8, i1 false)
; CHECK-GENISA: store <4 x i32> [[DPAS3]], <4 x i32>* %res8I32
; CHECK-VISAASM-DAG: dpas.s8.u8.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=d num_elts=32
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=d num_elts=32
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=w num_elts=32
; CHECK-VISAASM-DAG: dpas.s8.u8.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=d num_elts=64
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=d num_elts=64
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=w num_elts=64
; CHECK-VISAASM-DAG: dpas.s8.u8.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=w num_elts=128
%call9 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv4_iii(i32 32, i16 %a2, <4 x i32> %b, i32 %c2I32, i32 50)
store i32 %call9, i32* %res2I32
%call10 = call spir_func <2 x i32> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_sDv4_iDv2_ii(i32 32, <2 x i16> %a4, <4 x i32> %b, <2 x i32> %c4I32, i32 50)
store <2 x i32> %call10, <2 x i32>* %res4I32
%call11 = call spir_func <4 x i32> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_sDv4_iS0_i(i32 32, <4 x i16> %a8, <4 x i32> %b, <4 x i32> %c8I32, i32 50)
store <4 x i32> %call11, <4 x i32>* %res8I32
ret void
}
define spir_kernel void @test_v4(i32* %res1I32, i32* %res2I32, <2 x i32>* %res4I32, <4 x i32>* %res8I32,
i16 %a1, i16 %a2, <2 x i16> %a4, <4 x i16> %a8,
<4 x i32> %b,
i32 %c1I32, i32 %c2I32, <2 x i32> %c4I32, <4 x i32> %c8I32) !intel_reqd_sub_group_size !100 {
entry:
; CHECK-GENISA-LABEL: @test_v4(
; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.sub.group.dpas.i32.i32.i16.v4i32(i32 %c2I32, i16 %a2, <4 x i32> %b, i32 1, i32 1, i32 8, i32 2, i1 false)
; CHECK-GENISA: store i32 [[DPAS1]], i32* %res2I32
; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <2 x i32> @llvm.genx.GenISA.sub.group.dpas.v2i32.v2i32.v2i16.v4i32(<2 x i32> %c4I32, <2 x i16> %a4, <4 x i32> %b, i32 1, i32 1, i32 8, i32 4, i1 false)
; CHECK-GENISA: store <2 x i32> [[DPAS2]], <2 x i32>* %res4I32
; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <4 x i32> @llvm.genx.GenISA.sub.group.dpas.v4i32.v4i32.v4i16.v4i32(<4 x i32> %c8I32, <4 x i16> %a8, <4 x i32> %b, i32 1, i32 1, i32 8, i32 8, i1 false)
; CHECK-GENISA: store <4 x i32> [[DPAS3]], <4 x i32>* %res8I32
; CHECK-VISAASM-LABEL: .kernel "test_v4"
; CHECK-VISAASM-DAG: dpas.u8.u8.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=d num_elts=32
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=d num_elts=32
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=w num_elts=32
; CHECK-VISAASM-DAG: dpas.u8.u8.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=d num_elts=64
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=d num_elts=64
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=w num_elts=64
; CHECK-VISAASM-DAG: dpas.u8.u8.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=w num_elts=128
%call13 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv4_iii(i32 32, i16 %a2, <4 x i32> %b, i32 %c2I32, i32 48)
store i32 %call13, i32* %res2I32
%call14 = call spir_func <2 x i32> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_sDv4_iDv2_ii(i32 32, <2 x i16> %a4, <4 x i32> %b, <2 x i32> %c4I32, i32 48)
store <2 x i32> %call14, <2 x i32>* %res4I32
%call15 = call spir_func <4 x i32> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_sDv4_iS0_i(i32 32, <4 x i16> %a8, <4 x i32> %b, <4 x i32> %c8I32, i32 48)
store <4 x i32> %call15, <4 x i32>* %res8I32
ret void
}
; 4-bit integer matrix sources (signed and unsigned), 32-bit integer accumulator:
define spir_kernel void @test_v5(i32* %res1I32, i32* %res2I32, <2 x i32>* %res4I32, <4 x i32>* %res8I32,
i16 %a1, i16 %a2, <2 x i16> %a4, <4 x i16> %a8,
<4 x i32> %b,
i32 %c1I32, i32 %c2I32, <2 x i32> %c4I32, <4 x i32> %c8I32) !intel_reqd_sub_group_size !100 {
entry:
; CHECK-GENISA-LABEL: @test_v5(
; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.sub.group.dpas.i32.i32.i16.v4i32(i32 %c2I32, i16 %a2, <4 x i32> %b, i32 5, i32 5, i32 8, i32 2, i1 false)
; CHECK-GENISA: store i32 [[DPAS1]], i32* %res2I32
; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <2 x i32> @llvm.genx.GenISA.sub.group.dpas.v2i32.v2i32.v2i16.v4i32(<2 x i32> %c4I32, <2 x i16> %a4, <4 x i32> %b, i32 5, i32 5, i32 8, i32 4, i1 false)
; CHECK-GENISA: store <2 x i32> [[DPAS2]], <2 x i32>* %res4I32
; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <4 x i32> @llvm.genx.GenISA.sub.group.dpas.v4i32.v4i32.v4i16.v4i32(<4 x i32> %c8I32, <4 x i16> %a8, <4 x i32> %b, i32 5, i32 5, i32 8, i32 8, i1 false)
; CHECK-GENISA: store <4 x i32> [[DPAS3]], <4 x i32>* %res8I32
; CHECK-VISAASM-LABEL: .kernel "test_v5"
; CHECK-VISAASM-DAG: dpas.s4.s4.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=d num_elts=32
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=d num_elts=32
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=w num_elts=32
; CHECK-VISAASM-DAG: dpas.s4.s4.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=d num_elts=64
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=d num_elts=64
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=w num_elts=64
; CHECK-VISAASM-DAG: dpas.s4.s4.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=w num_elts=128
%call17 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv4_iii(i32 64, i16 %a2, <4 x i32> %b, i32 %c2I32, i32 195)
store i32 %call17, i32* %res2I32
%call18 = call spir_func <2 x i32> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_sDv4_iDv2_ii(i32 64, <2 x i16> %a4, <4 x i32> %b, <2 x i32> %c4I32, i32 195)
store <2 x i32> %call18, <2 x i32>* %res4I32
%call19 = call spir_func <4 x i32> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_sDv4_iS0_i(i32 64, <4 x i16> %a8, <4 x i32> %b, <4 x i32> %c8I32, i32 195)
store <4 x i32> %call19, <4 x i32>* %res8I32
ret void
}
define spir_kernel void @test_v6(i32* %res1I32, i32* %res2I32, <2 x i32>* %res4I32, <4 x i32>* %res8I32,
i16 %a1, i16 %a2, <2 x i16> %a4, <4 x i16> %a8,
<4 x i32> %b,
i32 %c1I32, i32 %c2I32, <2 x i32> %c4I32, <4 x i32> %c8I32) !intel_reqd_sub_group_size !100 {
entry:
; CHECK-GENISA-LABEL: @test_v6(
; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.sub.group.dpas.i32.i32.i16.v4i32(i32 %c2I32, i16 %a2, <4 x i32> %b, i32 5, i32 2, i32 8, i32 2, i1 false)
; CHECK-GENISA: store i32 [[DPAS1]], i32* %res2I32
; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <2 x i32> @llvm.genx.GenISA.sub.group.dpas.v2i32.v2i32.v2i16.v4i32(<2 x i32> %c4I32, <2 x i16> %a4, <4 x i32> %b, i32 5, i32 2, i32 8, i32 4, i1 false)
; CHECK-GENISA: store <2 x i32> [[DPAS2]], <2 x i32>* %res4I32
; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <4 x i32> @llvm.genx.GenISA.sub.group.dpas.v4i32.v4i32.v4i16.v4i32(<4 x i32> %c8I32, <4 x i16> %a8, <4 x i32> %b, i32 5, i32 2, i32 8, i32 8, i1 false)
; CHECK-GENISA: store <4 x i32> [[DPAS3]], <4 x i32>* %res8I32
; CHECK-VISAASM-LABEL: .kernel "test_v6"
; CHECK-VISAASM-DAG: dpas.u4.s4.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=d num_elts=32
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=d num_elts=32
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=w num_elts=32
; CHECK-VISAASM-DAG: dpas.u4.s4.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=d num_elts=64
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=d num_elts=64
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=w num_elts=64
; CHECK-VISAASM-DAG: dpas.u4.s4.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=w num_elts=128
%call21 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv4_iii(i32 64, i16 %a2, <4 x i32> %b, i32 %c2I32, i32 193)
store i32 %call21, i32* %res2I32
%call22 = call spir_func <2 x i32> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_sDv4_iDv2_ii(i32 64, <2 x i16> %a4, <4 x i32> %b, <2 x i32> %c4I32, i32 193)
store <2 x i32> %call22, <2 x i32>* %res4I32
%call23 = call spir_func <4 x i32> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_sDv4_iS0_i(i32 64, <4 x i16> %a8, <4 x i32> %b, <4 x i32> %c8I32, i32 193)
store <4 x i32> %call23, <4 x i32>* %res8I32
ret void
}
define spir_kernel void @test_v7(i32* %res1I32, i32* %res2I32, <2 x i32>* %res4I32, <4 x i32>* %res8I32,
i16 %a1, i16 %a2, <2 x i16> %a4, <4 x i16> %a8,
<4 x i32> %b,
i32 %c1I32, i32 %c2I32, <2 x i32> %c4I32, <4 x i32> %c8I32) !intel_reqd_sub_group_size !100 {
entry:
; CHECK-GENISA-LABEL: @test_v7(
; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.sub.group.dpas.i32.i32.i16.v4i32(i32 %c2I32, i16 %a2, <4 x i32> %b, i32 2, i32 5, i32 8, i32 2, i1 false)
; CHECK-GENISA: store i32 [[DPAS1]], i32* %res2I32
; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <2 x i32> @llvm.genx.GenISA.sub.group.dpas.v2i32.v2i32.v2i16.v4i32(<2 x i32> %c4I32, <2 x i16> %a4, <4 x i32> %b, i32 2, i32 5, i32 8, i32 4, i1 false)
; CHECK-GENISA: store <2 x i32> [[DPAS2]], <2 x i32>* %res4I32
; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <4 x i32> @llvm.genx.GenISA.sub.group.dpas.v4i32.v4i32.v4i16.v4i32(<4 x i32> %c8I32, <4 x i16> %a8, <4 x i32> %b, i32 2, i32 5, i32 8, i32 8, i1 false)
; CHECK-GENISA: store <4 x i32> [[DPAS3]], <4 x i32>* %res8I32
; CHECK-VISAASM-LABEL: .kernel "test_v7"
; CHECK-VISAASM-DAG: dpas.s4.u4.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=d num_elts=32
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=d num_elts=32
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=w num_elts=32
; CHECK-VISAASM-DAG: dpas.s4.u4.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=d num_elts=64
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=d num_elts=64
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=w num_elts=64
; CHECK-VISAASM-DAG: dpas.s4.u4.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=w num_elts=128
%call25 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv4_iii(i32 64, i16 %a2, <4 x i32> %b, i32 %c2I32, i32 194)
store i32 %call25, i32* %res2I32
%call26 = call spir_func <2 x i32> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_sDv4_iDv2_ii(i32 64, <2 x i16> %a4, <4 x i32> %b, <2 x i32> %c4I32, i32 194)
store <2 x i32> %call26, <2 x i32>* %res4I32
%call27 = call spir_func <4 x i32> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_sDv4_iS0_i(i32 64, <4 x i16> %a8, <4 x i32> %b, <4 x i32> %c8I32, i32 194)
store <4 x i32> %call27, <4 x i32>* %res8I32
ret void
}
; int4 matrix sources, fp32 accumulator:
define spir_kernel void @test_v8(i32* %res1I32, i32* %res2I32, <2 x i32>* %res4I32, <4 x i32>* %res8I32,
i16 %a1, i16 %a2, <2 x i16> %a4, <4 x i16> %a8,
<4 x i32> %b,
i32 %c1I32, i32 %c2I32, <2 x i32> %c4I32, <4 x i32> %c8I32) !intel_reqd_sub_group_size !100 {
entry:
; CHECK-GENISA-LABEL: @test_v8(
; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.sub.group.dpas.i32.i32.i16.v4i32(i32 %c2I32, i16 %a2, <4 x i32> %b, i32 2, i32 2, i32 8, i32 2, i1 false)
; CHECK-GENISA: store i32 [[DPAS1]], i32* %res2I32
; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <2 x i32> @llvm.genx.GenISA.sub.group.dpas.v2i32.v2i32.v2i16.v4i32(<2 x i32> %c4I32, <2 x i16> %a4, <4 x i32> %b, i32 2, i32 2, i32 8, i32 4, i1 false)
; CHECK-GENISA: store <2 x i32> [[DPAS2]], <2 x i32>* %res4I32
; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <4 x i32> @llvm.genx.GenISA.sub.group.dpas.v4i32.v4i32.v4i16.v4i32(<4 x i32> %c8I32, <4 x i16> %a8, <4 x i32> %b, i32 2, i32 2, i32 8, i32 8, i1 false)
; CHECK-GENISA: store <4 x i32> [[DPAS3]], <4 x i32>* %res8I32
; CHECK-VISAASM-LABEL: .kernel "test_v8"
; CHECK-VISAASM-DAG: dpas.u4.u4.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=d num_elts=32
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=d num_elts=32
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=w num_elts=32
; CHECK-VISAASM-DAG: dpas.u4.u4.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=d num_elts=64
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=d num_elts=64
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=w num_elts=64
; CHECK-VISAASM-DAG: dpas.u4.u4.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=w num_elts=128
%call29 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv4_iii(i32 64, i16 %a2, <4 x i32> %b, i32 %c2I32, i32 192)
store i32 %call29, i32* %res2I32
%call30 = call spir_func <2 x i32> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_sDv4_iDv2_ii(i32 64, <2 x i16> %a4, <4 x i32> %b, <2 x i32> %c4I32, i32 192)
store <2 x i32> %call30, <2 x i32>* %res4I32
%call31 = call spir_func <4 x i32> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_sDv4_iS0_i(i32 64, <4 x i16> %a8, <4 x i32> %b, <4 x i32> %c8I32, i32 192)
store <4 x i32> %call31, <4 x i32>* %res8I32
ret void
}
; fp16 matrix sources, fp32 accumulator:
define spir_kernel void @test_v9(float* %resF, float* %res2, <2 x float>* %res4, <4 x float>* %res8,
i16 %a1, i16 %a2, <2 x i16> %a4, <4 x i16> %a8,
<4 x i32> %b,
float %cF, float %c2F, <2 x float> %c4F, <4 x float> %c8F) !intel_reqd_sub_group_size !100 {
entry:
; CHECK-GENISA-LABEL: @test_v9(
; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call float @llvm.genx.GenISA.sub.group.dpas.f32.f32.i16.v4i32(float %c2F, i16 %a2, <4 x i32> %b, i32 12, i32 12, i32 8, i32 2, i1 false)
; CHECK-GENISA: store float [[DPAS1]], float* %res2
; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <2 x float> @llvm.genx.GenISA.sub.group.dpas.v2f32.v2f32.v2i16.v4i32(<2 x float> %c4F, <2 x i16> %a4, <4 x i32> %b, i32 12, i32 12, i32 8, i32 4, i1 false)
; CHECK-GENISA: store <2 x float> [[DPAS2]], <2 x float>* %res4
; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <4 x float> @llvm.genx.GenISA.sub.group.dpas.v4f32.v4f32.v4i16.v4i32(<4 x float> %c8F, <4 x i16> %a8, <4 x i32> %b, i32 12, i32 12, i32 8, i32 8, i1 false)
; CHECK-GENISA: store <4 x float> [[DPAS3]], <4 x float>* %res8
; CHECK-VISAASM-LABEL: .kernel "test_v9"
; CHECK-VISAASM-DAG: dpas.hf.hf.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=f num_elts=32
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=f num_elts=32
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=w num_elts=32
; CHECK-VISAASM-DAG: dpas.hf.hf.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=f num_elts=64
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=f num_elts=64
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=w num_elts=64
; CHECK-VISAASM-DAG: dpas.hf.hf.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=f num_elts=128
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=f num_elts=128
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=w num_elts=128
%call33 = call spir_func float @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv4_ifi(i32 16, i16 %a2, <4 x i32> %b, float %c2F, i32 3072)
store float %call33, float* %res2
%call34 = call spir_func <2 x float> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_sDv4_iDv2_fi(i32 16, <2 x i16> %a4, <4 x i32> %b, <2 x float> %c4F, i32 3072)
store <2 x float> %call34, <2 x float>* %res4
%call35 = call spir_func <4 x float> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_sDv4_iDv4_fi(i32 16, <4 x i16> %a8, <4 x i32> %b, <4 x float> %c8F, i32 3072)
store <4 x float> %call35, <4 x float>* %res8
ret void
}
; bf16 matrix sources, fp32 accumulator:
define spir_kernel void @test_v10(float* %resF, float* %res2, <2 x float>* %res4, <4 x float>* %res8,
i16 %a1, i16 %a2, <2 x i16> %a4, <4 x i16> %a8,
<4 x i32> %b,
float %cF, float %c2F, <2 x float> %c4F, <4 x float> %c8F) !intel_reqd_sub_group_size !100 {
entry:
; CHECK-GENISA-LABEL: @test_v10(
; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call float @llvm.genx.GenISA.sub.group.dpas.f32.f32.i16.v4i32(float %c2F, i16 %a2, <4 x i32> %b, i32 11, i32 11, i32 8, i32 2, i1 false)
; CHECK-GENISA: store float [[DPAS1]], float* %res2
; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <2 x float> @llvm.genx.GenISA.sub.group.dpas.v2f32.v2f32.v2i16.v4i32(<2 x float> %c4F, <2 x i16> %a4, <4 x i32> %b, i32 11, i32 11, i32 8, i32 4, i1 false)
; CHECK-GENISA: store <2 x float> [[DPAS2]], <2 x float>* %res4
; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <4 x float> @llvm.genx.GenISA.sub.group.dpas.v4f32.v4f32.v4i16.v4i32(<4 x float> %c8F, <4 x i16> %a8, <4 x i32> %b, i32 11, i32 11, i32 8, i32 8, i1 false)
; CHECK-GENISA: store <4 x float> [[DPAS3]], <4 x float>* %res8
; CHECK-VISAASM-LABEL: .kernel "test_v10"
; CHECK-VISAASM-DAG: dpas.bf.bf.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=f num_elts=32
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=f num_elts=32
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=w num_elts=32
; CHECK-VISAASM-DAG: dpas.bf.bf.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=f num_elts=64
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=f num_elts=64
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=w num_elts=64
; CHECK-VISAASM-DAG: dpas.bf.bf.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=f num_elts=128
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=f num_elts=128
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=w num_elts=128
%call37 = call spir_func float @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv4_ifi(i32 16, i16 %a2, <4 x i32> %b, float %c2F, i32 12288)
store float %call37, float* %res2
%call38 = call spir_func <2 x float> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_sDv4_iDv2_fi(i32 16, <2 x i16> %a4, <4 x i32> %b, <2 x float> %c4F, i32 12288)
store <2 x float> %call38, <2 x float>* %res4
%call39 = call spir_func <4 x float> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_sDv4_iDv4_fi(i32 16, <4 x i16> %a8, <4 x i32> %b, <4 x float> %c8F, i32 12288)
store <4 x float> %call39, <4 x float>* %res8
ret void
}
; fp16 matrix sources, fp16 accumulator:
define spir_kernel void @test_v11(half* %res, half* %res2, <2 x half>* %res4, <4 x half>* %res8,
i16 %a1, i16 %a2, <2 x i16> %a4, <4 x i16> %a8,
<4 x i32> %b,
half %c, half %c2, <2 x half> %c4, <4 x half> %c8) !intel_reqd_sub_group_size !100 {
entry:
; CHECK-GENISA-LABEL: @test_v11(
; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call half @llvm.genx.GenISA.sub.group.dpas.f16.f16.i16.v4i32(half %c2, i16 %a2, <4 x i32> %b, i32 12, i32 12, i32 8, i32 2, i1 false)
; CHECK-GENISA: store half [[DPAS1]], half* %res2
; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <2 x half> @llvm.genx.GenISA.sub.group.dpas.v2f16.v2f16.v2i16.v4i32(<2 x half> %c4, <2 x i16> %a4, <4 x i32> %b, i32 12, i32 12, i32 8, i32 4, i1 false)
; CHECK-GENISA: store <2 x half> [[DPAS2]], <2 x half>* %res4
; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <4 x half> @llvm.genx.GenISA.sub.group.dpas.v4f16.v4f16.v4i16.v4i32(<4 x half> %c8, <4 x i16> %a8, <4 x i32> %b, i32 12, i32 12, i32 8, i32 8, i1 false)
; CHECK-GENISA: store <4 x half> [[DPAS3]], <4 x half>* %res8
; CHECK-VISAASM-LABEL: .kernel "test_v11"
; CHECK-VISAASM-DAG: dpas.hf.hf.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=hf num_elts=32
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=hf num_elts=32
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=w num_elts=32
; CHECK-VISAASM-DAG: dpas.hf.hf.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=hf num_elts=64
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=hf num_elts=64
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=w num_elts=64
; CHECK-VISAASM-DAG: dpas.hf.hf.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=hf num_elts=128
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=hf num_elts=128
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=w num_elts=128
%call33 = call spir_func half @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv4_iDhi(i32 16, i16 %a2, <4 x i32> %b, half %c2, i32 3072)
store half %call33, half* %res2
%call34 = call spir_func <2 x half> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_sDv4_iDv2_Dhi(i32 16, <2 x i16> %a4, <4 x i32> %b, <2 x half> %c4, i32 3072)
store <2 x half> %call34, <2 x half>* %res4
%call35 = call spir_func <4 x half> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_sDv4_iDv4_Dhi(i32 16, <4 x i16> %a8, <4 x i32> %b, <4 x half> %c8, i32 3072)
store <4 x half> %call35, <4 x half>* %res8
ret void
}
; bf16 matrix sources, bf16 accumulator:
define spir_kernel void @test_v12(i16* %res, i16* %res2, <2 x i16>* %res4, <4 x i16>* %res8,
i16 %a1, i16 %a2, <2 x i16> %a4, <4 x i16> %a8,
<4 x i32> %b,
i16 %cF, i16 %c2F, <2 x i16> %c4F, <4 x i16> %c8F) !intel_reqd_sub_group_size !100 {
entry:
; CHECK-GENISA-LABEL: @test_v12(
; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call i16 @llvm.genx.GenISA.sub.group.dpas.i16.i16.i16.v4i32(i16 %c2F, i16 %a2, <4 x i32> %b, i32 11, i32 11, i32 8, i32 2, i1 false)
; CHECK-GENISA: store i16 [[DPAS1]], i16* %res2
; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <2 x i16> @llvm.genx.GenISA.sub.group.dpas.v2i16.v2i16.v2i16.v4i32(<2 x i16> %c4F, <2 x i16> %a4, <4 x i32> %b, i32 11, i32 11, i32 8, i32 4, i1 false)
; CHECK-GENISA: store <2 x i16> [[DPAS2]], <2 x i16>* %res4
; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <4 x i16> @llvm.genx.GenISA.sub.group.dpas.v4i16.v4i16.v4i16.v4i32(<4 x i16> %c8F, <4 x i16> %a8, <4 x i32> %b, i32 11, i32 11, i32 8, i32 8, i1 false)
; CHECK-GENISA: store <4 x i16> [[DPAS3]], <4 x i16>* %res8
; CHECK-VISAASM-LABEL: .kernel "test_v12"
; CHECK-VISAASM-DAG: dpas.bf.bf.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=bf num_elts=32
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=bf num_elts=32
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=w num_elts=32
; CHECK-VISAASM-DAG: dpas.bf.bf.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=bf num_elts=64
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=bf num_elts=64
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=w num_elts=64
; CHECK-VISAASM-DAG: dpas.bf.bf.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=bf num_elts=128
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=bf num_elts=128
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=w num_elts=128
%call37 = call spir_func i16 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv4_isi(i32 16, i16 %a2, <4 x i32> %b, i16 %c2F, i32 12300)
store i16 %call37, i16* %res2
%call38 = call spir_func <2 x i16> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_sDv4_iS_i(i32 16, <2 x i16> %a4, <4 x i32> %b, <2 x i16> %c4F, i32 12300)
store <2 x i16> %call38, <2 x i16>* %res4
%call39 = call spir_func <4 x i16> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_sDv4_iS_i(i32 16, <4 x i16> %a8, <4 x i32> %b, <4 x i16> %c8F, i32 12300)
store <4 x i16> %call39, <4 x i16>* %res8
ret void
}
; tf32 matrix sources, fp32 accumulator:
define spir_kernel void @test_v13(float* %resF, float* %res2, <2 x float>* %res4, <4 x float>* %res8,
float %a1, float %a2, float %a4, <2 x float> %a8,
<4 x float> %b,
float %cF, float %c2F, <2 x float> %c4F, <4 x float> %c8F) !intel_reqd_sub_group_size !100 {
entry:
; CHECK-GENISA-LABEL: @test_v13(
; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call float @llvm.genx.GenISA.sub.group.dpas.f32.f32.f32.v4i32(float %c2F, float %a2, <4 x i32> %{{.*}}, i32 10, i32 10, i32 8, i32 2, i1 false)
; CHECK-GENISA: store float [[DPAS1]], float* %res2
; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <2 x float> @llvm.genx.GenISA.sub.group.dpas.v2f32.v2f32.f32.v4i32(<2 x float> %c4F, float %a4, <4 x i32> %{{.*}}, i32 10, i32 10, i32 8, i32 4, i1 false)
; CHECK-GENISA: store <2 x float> [[DPAS2]], <2 x float>* %res4
; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <4 x float> @llvm.genx.GenISA.sub.group.dpas.v4f32.v4f32.v2f32.v4i32(<4 x float> %c8F, <2 x float> %a8, <4 x i32> %{{.*}}, i32 10, i32 10, i32 8, i32 8, i1 false)
; CHECK-GENISA: store <4 x float> [[DPAS3]], <4 x float>* %res8
; CHECK-VISAASM-LABEL: .kernel "test_v13"
; CHECK-VISAASM-DAG: dpas.tf32.tf32.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=f num_elts=32
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=f num_elts=32
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=f num_elts=32
; CHECK-VISAASM-DAG: dpas.tf32.tf32.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=f num_elts=64
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=f num_elts=64
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=f num_elts=32
; CHECK-VISAASM-DAG: dpas.tf32.tf32.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=f num_elts=128
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=f num_elts=128
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=f num_elts=64
%call33 = call spir_func float @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELifDv4_ffi(i32 8, float %a2, <4 x float> %b, float %c2F, i32 768)
store float %call33, float* %res2
%call34 = call spir_func <2 x float> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_fDv4_fS_i(i32 8, float %a4, <4 x float> %b, <2 x float> %c4F, i32 768)
store <2 x float> %call34, <2 x float>* %res4
%call35 = call spir_func <4 x float> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_fS_S_i(i32 8, <2 x float> %a8, <4 x float> %b, <4 x float> %c8F, i32 768)
store <4 x float> %call35, <4 x float>* %res8
ret void
}
!100 = !{i32 32}