mirror of
https://github.com/intel/intel-graphics-compiler.git
synced 2025-10-30 08:18:26 +08:00
fix test
igc_opt returns non-zero on failed assert
This commit is contained in:
committed by
igcbot
parent
46c11fc759
commit
b717c7c181
@ -107,6 +107,8 @@ private:
|
|||||||
///
|
///
|
||||||
static const StringRef SG_PREFIX_IDPAS16;
|
static const StringRef SG_PREFIX_IDPAS16;
|
||||||
static const StringRef SG_PREFIX_FDPAS16;
|
static const StringRef SG_PREFIX_FDPAS16;
|
||||||
|
static const StringRef SG_PREFIX_IDPAS32N16;
|
||||||
|
static const StringRef SG_PREFIX_FDPAS32N16;
|
||||||
// PVC+: pure hf/bf dpas builtins
|
// PVC+: pure hf/bf dpas builtins
|
||||||
static const StringRef WI_PREFIX_HFDPAS;
|
static const StringRef WI_PREFIX_HFDPAS;
|
||||||
static const StringRef WI_PREFIX_BFDPAS;
|
static const StringRef WI_PREFIX_BFDPAS;
|
||||||
@ -200,6 +202,8 @@ const StringRef DpasFuncsResolution::WI_PREFIX_IDPAS = "__builtin_IB_idpas";
|
|||||||
const StringRef DpasFuncsResolution::WI_PREFIX_FDPAS = "__builtin_IB_fdpas";
|
const StringRef DpasFuncsResolution::WI_PREFIX_FDPAS = "__builtin_IB_fdpas";
|
||||||
const StringRef DpasFuncsResolution::SG_PREFIX_IDPAS16 = "__builtin_IB_sub_group16_idpas";
|
const StringRef DpasFuncsResolution::SG_PREFIX_IDPAS16 = "__builtin_IB_sub_group16_idpas";
|
||||||
const StringRef DpasFuncsResolution::SG_PREFIX_FDPAS16 = "__builtin_IB_sub_group16_fdpas";
|
const StringRef DpasFuncsResolution::SG_PREFIX_FDPAS16 = "__builtin_IB_sub_group16_fdpas";
|
||||||
|
const StringRef DpasFuncsResolution::SG_PREFIX_IDPAS32N16 = "__builtin_IB_sub_group32n16_idpas";
|
||||||
|
const StringRef DpasFuncsResolution::SG_PREFIX_FDPAS32N16 = "__builtin_IB_sub_group32n16_fdpas";
|
||||||
// PVC+: pure hf/bf dpas builtins
|
// PVC+: pure hf/bf dpas builtins
|
||||||
const StringRef DpasFuncsResolution::WI_PREFIX_HFDPAS = "__builtin_IB_hfdpas";
|
const StringRef DpasFuncsResolution::WI_PREFIX_HFDPAS = "__builtin_IB_hfdpas";
|
||||||
const StringRef DpasFuncsResolution::WI_PREFIX_BFDPAS = "__builtin_IB_bfdpas";
|
const StringRef DpasFuncsResolution::WI_PREFIX_BFDPAS = "__builtin_IB_bfdpas";
|
||||||
@ -263,6 +267,11 @@ void DpasFuncsResolution::visitCallInst(CallInst &CI) {
|
|||||||
|
|
||||||
bool IsDpasw = false;
|
bool IsDpasw = false;
|
||||||
bool IsIDpas = false;
|
bool IsIDpas = false;
|
||||||
|
// Dimension N is platform specific and is directly correlated to minimum subgroup-size for
|
||||||
|
// given platform. If DPAS with the same M, N, K dimensions is executed within a subgroup
|
||||||
|
// twice the size of minimum subgroup-size, each work item must contain half of the data
|
||||||
|
// compared to the minimum subgroup-size.
|
||||||
|
bool IsDoubleSubgroup = false;
|
||||||
int DstTy, AccTy, PA, PB, SD, RC;
|
int DstTy, AccTy, PA, PB, SD, RC;
|
||||||
GenISAIntrinsic::ID iid = GenISAIntrinsic::no_intrinsic;
|
GenISAIntrinsic::ID iid = GenISAIntrinsic::no_intrinsic;
|
||||||
bool doVerify = false;
|
bool doVerify = false;
|
||||||
@ -277,12 +286,26 @@ void DpasFuncsResolution::visitCallInst(CallInst &CI) {
|
|||||||
if (!demangleSuffix(funcName, SG_PREFIX_LEN, false, IsIDpas, DstTy, AccTy, PA, PB, SD, RC, nullptr))
|
if (!demangleSuffix(funcName, SG_PREFIX_LEN, false, IsIDpas, DstTy, AccTy, PA, PB, SD, RC, nullptr))
|
||||||
return;
|
return;
|
||||||
iid = GenISAIntrinsic::GenISA_sub_group_dpas;
|
iid = GenISAIntrinsic::GenISA_sub_group_dpas;
|
||||||
|
} else if (funcName.startswith(DpasFuncsResolution::SG_PREFIX_IDPAS32N16)) {
|
||||||
|
const int SG_PREFIX_LEN = DpasFuncsResolution::SG_PREFIX_IDPAS32N16.size();
|
||||||
|
IsIDpas = true;
|
||||||
|
IsDoubleSubgroup = true;
|
||||||
|
if (!demangleSuffix(funcName, SG_PREFIX_LEN, false, IsIDpas, DstTy, AccTy, PA, PB, SD, RC, nullptr))
|
||||||
|
return;
|
||||||
|
iid = GenISAIntrinsic::GenISA_sub_group_dpas;
|
||||||
} else if (funcName.startswith(DpasFuncsResolution::SG_PREFIX_FDPAS16)) {
|
} else if (funcName.startswith(DpasFuncsResolution::SG_PREFIX_FDPAS16)) {
|
||||||
const int SG_PREFIX_LEN = DpasFuncsResolution::SG_PREFIX_FDPAS16.size();
|
const int SG_PREFIX_LEN = DpasFuncsResolution::SG_PREFIX_FDPAS16.size();
|
||||||
IsIDpas = false;
|
IsIDpas = false;
|
||||||
if (!demangleSuffix(funcName, SG_PREFIX_LEN, true, IsIDpas, DstTy, AccTy, PA, PB, SD, RC, nullptr))
|
if (!demangleSuffix(funcName, SG_PREFIX_LEN, true, IsIDpas, DstTy, AccTy, PA, PB, SD, RC, nullptr))
|
||||||
return;
|
return;
|
||||||
iid = GenISAIntrinsic::GenISA_sub_group_dpas;
|
iid = GenISAIntrinsic::GenISA_sub_group_dpas;
|
||||||
|
} else if (funcName.startswith(DpasFuncsResolution::SG_PREFIX_FDPAS32N16)) {
|
||||||
|
const int SG_PREFIX_LEN = DpasFuncsResolution::SG_PREFIX_FDPAS32N16.size();
|
||||||
|
IsIDpas = false;
|
||||||
|
IsDoubleSubgroup = true;
|
||||||
|
if (!demangleSuffix(funcName, SG_PREFIX_LEN, true, IsIDpas, DstTy, AccTy, PA, PB, SD, RC, nullptr))
|
||||||
|
return;
|
||||||
|
iid = GenISAIntrinsic::GenISA_sub_group_dpas;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
return;
|
return;
|
||||||
@ -363,6 +386,14 @@ void DpasFuncsResolution::visitCallInst(CallInst &CI) {
|
|||||||
Type *A_BaseTy = ATy->getScalarType();
|
Type *A_BaseTy = ATy->getScalarType();
|
||||||
Type *B_BaseTy = BTy->getScalarType();
|
Type *B_BaseTy = BTy->getScalarType();
|
||||||
|
|
||||||
|
if (IsDoubleSubgroup) {
|
||||||
|
IGC_ASSERT_MESSAGE(RC >= 2, "ICE: repeat count of DPAS for double subgroup-size must be >= 2!");
|
||||||
|
D_nelts *= 2;
|
||||||
|
ACC_nelts *= 2;
|
||||||
|
A_nelts *= 2;
|
||||||
|
B_nelts *= 2;
|
||||||
|
}
|
||||||
|
|
||||||
if (IsIDpas) {
|
if (IsIDpas) {
|
||||||
uint32_t Abits = getPrecisionInBits((PrecisionType)PA);
|
uint32_t Abits = getPrecisionInBits((PrecisionType)PA);
|
||||||
uint32_t Bbits = getPrecisionInBits((PrecisionType)PB);
|
uint32_t Bbits = getPrecisionInBits((PrecisionType)PB);
|
||||||
|
|||||||
@ -272,7 +272,7 @@ int SpvSubgroupMMAResolution::getElemCount(const Type *Ty) const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool SpvSubgroupMMAResolution::validateElemCounts(int M, int AElemCount, int BElemCount, uint32_t Operands,
|
bool SpvSubgroupMMAResolution::validateElemCounts(int M, int AElemCount, int BElemCount, uint32_t Operands,
|
||||||
const CallInst &CI) {
|
CallInst &CI) {
|
||||||
if (M != 1 && M != 2 && M != 4 && M != 8) {
|
if (M != 1 && M != 2 && M != 4 && M != 8) {
|
||||||
emitError(
|
emitError(
|
||||||
"__spirv_SubgroupMatrixMultiplyAccumulateINTEL: M dimension must be 1, 2, 4 or 8 for targeted HW. Actual: " +
|
"__spirv_SubgroupMatrixMultiplyAccumulateINTEL: M dimension must be 1, 2, 4 or 8 for targeted HW. Actual: " +
|
||||||
@ -295,16 +295,27 @@ bool SpvSubgroupMMAResolution::validateElemCounts(int M, int AElemCount, int BEl
|
|||||||
CI);
|
CI);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (BElemCount != 8) {
|
const int expectedBCount = isDoubleSubgroup(CI) ? 4 : 8;
|
||||||
emitError("__spirv_SubgroupMatrixMultiplyAccumulateINTEL: Matrix B argument must have 8 components for targeted "
|
if (BElemCount != expectedBCount) {
|
||||||
"HW. Actual: " +
|
emitError("__spirv_SubgroupMatrixMultiplyAccumulateINTEL: Matrix B argument must have " +
|
||||||
std::to_string(BElemCount),
|
std::to_string(expectedBCount) +
|
||||||
|
" components for targeted HW. Actual: " + std::to_string(BElemCount),
|
||||||
CI);
|
CI);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Dimension N is platform specific and is directly correlated to minimum subgroup-size for
|
||||||
|
// given platform. If DPAS with the same M, N, K dimensions is executed within a subgroup
|
||||||
|
// twice the size of minimum subgroup-size, each work item must contain half of the data
|
||||||
|
// compared to the minimum subgroup-size.
|
||||||
|
bool SpvSubgroupMMAResolution::isDoubleSubgroup(CallInst &CI) {
|
||||||
|
if (!m_Ctx->platform.hasExecSize16DPAS())
|
||||||
|
return false;
|
||||||
|
return IGC::getSIMDSize(getAnalysis<MetaDataUtilsWrapper>().getMetaDataUtils(), CI.getParent()->getParent()) == 32;
|
||||||
|
}
|
||||||
|
|
||||||
SpvSubgroupMMAResolution::SupportedTable *SpvSubgroupMMAResolution::getSupportedTable() {
|
SpvSubgroupMMAResolution::SupportedTable *SpvSubgroupMMAResolution::getSupportedTable() {
|
||||||
if (m_Ctx->platform.hasExecSize16DPAS()) {
|
if (m_Ctx->platform.hasExecSize16DPAS()) {
|
||||||
if (m_Simd16Table.empty())
|
if (m_Simd16Table.empty())
|
||||||
@ -480,9 +491,16 @@ void SpvSubgroupMMAResolution::visitCallInst(CallInst &CI) {
|
|||||||
SmallVector<Type *, 3> argTypes({c->getType(), a->getType(), b->getType()});
|
SmallVector<Type *, 3> argTypes({c->getType(), a->getType(), b->getType()});
|
||||||
FunctionType *FT = FunctionType::get(CI.getType(), argTypes, false);
|
FunctionType *FT = FunctionType::get(CI.getType(), argTypes, false);
|
||||||
|
|
||||||
|
std::string subgroupSize;
|
||||||
|
if (isDoubleSubgroup(CI)) {
|
||||||
|
subgroupSize = "32n16";
|
||||||
|
M *= 2;
|
||||||
|
} else {
|
||||||
|
subgroupSize = m_Ctx->platform.hasExecSize16DPAS() ? "16" : "";
|
||||||
|
}
|
||||||
|
|
||||||
std::stringstream newFuncName;
|
std::stringstream newFuncName;
|
||||||
newFuncName << "__builtin_IB_sub_group";
|
newFuncName << "__builtin_IB_sub_group" << subgroupSize;
|
||||||
newFuncName << (m_Ctx->platform.hasExecSize16DPAS() ? "16" : "");
|
|
||||||
newFuncName << "_" << (ResultElemTy == I32 ? "i" : "f");
|
newFuncName << "_" << (ResultElemTy == I32 ? "i" : "f");
|
||||||
newFuncName << "dpas_" << OperandsIt->second.str() << "8_" << M;
|
newFuncName << "dpas_" << OperandsIt->second.str() << "8_" << M;
|
||||||
|
|
||||||
|
|||||||
@ -72,7 +72,7 @@ private:
|
|||||||
bool validateI32Constant(const llvm::Value *V, const llvm::Twine &ParamName, const llvm::CallInst &CI);
|
bool validateI32Constant(const llvm::Value *V, const llvm::Twine &ParamName, const llvm::CallInst &CI);
|
||||||
bool validateCType(const llvm::Type *ResultTy, const llvm::Type *CType, const llvm::CallInst &CI);
|
bool validateCType(const llvm::Type *ResultTy, const llvm::Type *CType, const llvm::CallInst &CI);
|
||||||
bool validateElementType(const ElType ElemTy, llvm::StringRef ParamName, const llvm::CallInst &CI);
|
bool validateElementType(const ElType ElemTy, llvm::StringRef ParamName, const llvm::CallInst &CI);
|
||||||
bool validateElemCounts(int M, int AElemCount, int BElemCount, uint32_t Operands, const llvm::CallInst &CI);
|
bool validateElemCounts(int M, int AElemCount, int BElemCount, uint32_t Operands, llvm::CallInst &CI);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
bool validateKDimInTable(const T KIt, int K, const SupportedTable *table, const llvm::CallInst &CI);
|
bool validateKDimInTable(const T KIt, int K, const SupportedTable *table, const llvm::CallInst &CI);
|
||||||
@ -89,6 +89,8 @@ private:
|
|||||||
bool validateOperands(const T OpIt, int K, ElType ResultElemTy, ElType AElemTy, ElType BElemTy, uint32_t Operands,
|
bool validateOperands(const T OpIt, int K, ElType ResultElemTy, ElType AElemTy, ElType BElemTy, uint32_t Operands,
|
||||||
const OperandsTable &operandMap, const llvm::CallInst &CI);
|
const OperandsTable &operandMap, const llvm::CallInst &CI);
|
||||||
|
|
||||||
|
bool isDoubleSubgroup(llvm::CallInst &CI);
|
||||||
|
|
||||||
llvm::DenseSet<llvm::Function *> m_BuiltinsToRemove;
|
llvm::DenseSet<llvm::Function *> m_BuiltinsToRemove;
|
||||||
bool m_Changed = false;
|
bool m_Changed = false;
|
||||||
IGC::CodeGenContext *m_Ctx = nullptr;
|
IGC::CodeGenContext *m_Ctx = nullptr;
|
||||||
|
|||||||
26
IGC/Compiler/tests/DpasFuncsResolution/dpas-pvc-simd32.ll
Normal file
26
IGC/Compiler/tests/DpasFuncsResolution/dpas-pvc-simd32.ll
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
;=========================== begin_copyright_notice ============================
|
||||||
|
;
|
||||||
|
; Copyright (C) 2025 Intel Corporation
|
||||||
|
;
|
||||||
|
; SPDX-License-Identifier: MIT
|
||||||
|
;
|
||||||
|
;============================ end_copyright_notice =============================
|
||||||
|
; UNSUPPORTED: system-windows
|
||||||
|
; REQUIRES: debug, llvm-14-plus
|
||||||
|
; RUN: not igc_opt --opaque-pointers -platformpvc --igc-arith-funcs-translation -S < %s 2>&1 | FileCheck %s
|
||||||
|
; ------------------------------------------------
|
||||||
|
; DpasFuncsResolution
|
||||||
|
; ------------------------------------------------
|
||||||
|
|
||||||
|
; Check assertion unique to DPAS in double-subgroup size.
|
||||||
|
|
||||||
|
; CHECK: RC >= 2, ICE: repeat count of DPAS for double subgroup-size must be >= 2!
|
||||||
|
|
||||||
|
define spir_kernel void @test_dpas(<4 x i32> %src, i32 %src2, ptr %dst) {
|
||||||
|
%1 = load i16, ptr %dst, align 4
|
||||||
|
%2 = call i32 @__builtin_IB_sub_group32n16_idpas_s8_s8_8_1(i32 %src2, i16 %1, <4 x i32> %src)
|
||||||
|
store i32 %2, ptr %dst, align 4
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
declare i32 @__builtin_IB_sub_group32n16_idpas_s8_s8_8_1(i32, i16, <4 x i32>)
|
||||||
@ -10,7 +10,8 @@
|
|||||||
|
|
||||||
; RUN: llvm-as %s -o %t.bc
|
; RUN: llvm-as %s -o %t.bc
|
||||||
; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_INTEL_subgroup_matrix_multiply_accumulate -o %t.spv
|
; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_INTEL_subgroup_matrix_multiply_accumulate -o %t.spv
|
||||||
; RUN: ocloc compile -spirv_input -file %t.spv -device pvc -options " -igc_opts 'PrintToConsole=1 PrintAfter=ArithmeticFuncsTranslation'" 2>&1 | FileCheck %s
|
; RUN: ocloc compile -spirv_input -file %t.spv -device pvc -options " -igc_opts 'PrintToConsole=1 PrintAfter=ArithmeticFuncsTranslation'" 2>&1 | FileCheck %s --check-prefix=CHECK-GENISA
|
||||||
|
; RUN: ocloc compile -spirv_input -file %t.spv -device pvc -options " -igc_opts 'DumpVISAASMToConsole=1'" 2>&1 | FileCheck %s --check-prefix=CHECK-VISAASM
|
||||||
|
|
||||||
target triple = "spir64-unknown-unknown"
|
target triple = "spir64-unknown-unknown"
|
||||||
|
|
||||||
@ -45,15 +46,41 @@ define spir_kernel void @test_v1(i32* %res1I32, <2 x i32>* %res2I32, <4 x i32>*
|
|||||||
<8 x i32> %b,
|
<8 x i32> %b,
|
||||||
i32 %c1I32, <2 x i32> %c2I32, <4 x i32> %c4I32, <8 x i32> %c8I32) !intel_reqd_sub_group_size !100 {
|
i32 %c1I32, <2 x i32> %c2I32, <4 x i32> %c4I32, <8 x i32> %c8I32) !intel_reqd_sub_group_size !100 {
|
||||||
entry:
|
entry:
|
||||||
; CHECK-LABEL: @test_v1(
|
; CHECK-GENISA-LABEL: @test_v1(
|
||||||
; CHECK: [[DPAS:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.sub.group.dpas.i32.i32.i16.v8i32(i32 %c1I32, i16 %a1, <8 x i32> %b, i32 4, i32 4, i32 8, i32 1, i1 false)
|
; CHECK-GENISA: [[DPAS:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.sub.group.dpas.i32.i32.i16.v8i32(i32 %c1I32, i16 %a1, <8 x i32> %b, i32 4, i32 4, i32 8, i32 1, i1 false)
|
||||||
; CHECK: store i32 [[DPAS]], i32* %res1I32
|
; CHECK-GENISA: store i32 [[DPAS]], i32* %res1I32
|
||||||
; CHECK: [[DPAS1:%[A-z0-9]*]] = call <2 x i32> @llvm.genx.GenISA.sub.group.dpas.v2i32.v2i32.v2i16.v8i32(<2 x i32> %c2I32, <2 x i16> %a2, <8 x i32> %b, i32 4, i32 4, i32 8, i32 2, i1 false)
|
; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call <2 x i32> @llvm.genx.GenISA.sub.group.dpas.v2i32.v2i32.v2i16.v8i32(<2 x i32> %c2I32, <2 x i16> %a2, <8 x i32> %b, i32 4, i32 4, i32 8, i32 2, i1 false)
|
||||||
; CHECK: store <2 x i32> [[DPAS1]], <2 x i32>* %res2I32
|
; CHECK-GENISA: store <2 x i32> [[DPAS1]], <2 x i32>* %res2I32
|
||||||
; CHECK: [[DPAS2:%[A-z0-9]*]] = call <4 x i32> @llvm.genx.GenISA.sub.group.dpas.v4i32.v4i32.v4i16.v8i32(<4 x i32> %c4I32, <4 x i16> %a4, <8 x i32> %b, i32 4, i32 4, i32 8, i32 4, i1 false)
|
; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <4 x i32> @llvm.genx.GenISA.sub.group.dpas.v4i32.v4i32.v4i16.v8i32(<4 x i32> %c4I32, <4 x i16> %a4, <8 x i32> %b, i32 4, i32 4, i32 8, i32 4, i1 false)
|
||||||
; CHECK: store <4 x i32> [[DPAS2]], <4 x i32>* %res4I32
|
; CHECK-GENISA: store <4 x i32> [[DPAS2]], <4 x i32>* %res4I32
|
||||||
; CHECK: [[DPAS3:%[A-z0-9]*]] = call <8 x i32> @llvm.genx.GenISA.sub.group.dpas.v8i32.v8i32.v8i16.v8i32(<8 x i32> %c8I32, <8 x i16> %a8, <8 x i32> %b, i32 4, i32 4, i32 8, i32 8, i1 false)
|
; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <8 x i32> @llvm.genx.GenISA.sub.group.dpas.v8i32.v8i32.v8i16.v8i32(<8 x i32> %c8I32, <8 x i16> %a8, <8 x i32> %b, i32 4, i32 4, i32 8, i32 8, i1 false)
|
||||||
; CHECK: store <8 x i32> [[DPAS3]], <8 x i32>* %res8I32
|
; CHECK-GENISA: store <8 x i32> [[DPAS3]], <8 x i32>* %res8I32
|
||||||
|
|
||||||
|
; CHECK-VISAASM-LABEL: .kernel "test_v1"
|
||||||
|
; CHECK-VISAASM-DAG: dpas.s8.s8.8.1 (M1, 16) [[D1:[A-z0-9_]*]].0 [[C1:[A-z0-9_]*]].0 [[B1:[A-z0-9_]*]].0 [[A1:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D1]] v_type=G type=d num_elts=16
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C1]] v_type=G type=d num_elts=16
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B1]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A1]] v_type=G type=ud num_elts=8 align=wordx32 alias=<[[A1_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A1_ALIAS]] v_type=G type=w num_elts=16
|
||||||
|
; CHECK-VISAASM-DAG: dpas.s8.s8.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=d num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=d num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=w num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: dpas.s8.s8.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=d num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=d num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=w num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: dpas.s8.s8.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=w num_elts=128
|
||||||
|
|
||||||
%call0 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_iii(i32 32, i16 %a1, <8 x i32> %b, i32 %c1I32, i32 51)
|
%call0 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_iii(i32 32, i16 %a1, <8 x i32> %b, i32 %c1I32, i32 51)
|
||||||
store i32 %call0, i32* %res1I32
|
store i32 %call0, i32* %res1I32
|
||||||
@ -72,15 +99,41 @@ define spir_kernel void @test_v2(i32* %res1I32, <2 x i32>* %res2I32, <4 x i32>*
|
|||||||
<8 x i32> %b,
|
<8 x i32> %b,
|
||||||
i32 %c1I32, <2 x i32> %c2I32, <4 x i32> %c4I32, <8 x i32> %c8I32) !intel_reqd_sub_group_size !100 {
|
i32 %c1I32, <2 x i32> %c2I32, <4 x i32> %c4I32, <8 x i32> %c8I32) !intel_reqd_sub_group_size !100 {
|
||||||
entry:
|
entry:
|
||||||
; CHECK-LABEL: @test_v2(
|
; CHECK-GENISA-LABEL: @test_v2(
|
||||||
; CHECK: [[DPAS:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.sub.group.dpas.i32.i32.i16.v8i32(i32 %c1I32, i16 %a1, <8 x i32> %b, i32 4, i32 1, i32 8, i32 1, i1 false)
|
; CHECK-GENISA: [[DPAS:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.sub.group.dpas.i32.i32.i16.v8i32(i32 %c1I32, i16 %a1, <8 x i32> %b, i32 4, i32 1, i32 8, i32 1, i1 false)
|
||||||
; CHECK: store i32 [[DPAS]], i32* %res1I32
|
; CHECK-GENISA: store i32 [[DPAS]], i32* %res1I32
|
||||||
; CHECK: [[DPAS1:%[A-z0-9]*]] = call <2 x i32> @llvm.genx.GenISA.sub.group.dpas.v2i32.v2i32.v2i16.v8i32(<2 x i32> %c2I32, <2 x i16> %a2, <8 x i32> %b, i32 4, i32 1, i32 8, i32 2, i1 false)
|
; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call <2 x i32> @llvm.genx.GenISA.sub.group.dpas.v2i32.v2i32.v2i16.v8i32(<2 x i32> %c2I32, <2 x i16> %a2, <8 x i32> %b, i32 4, i32 1, i32 8, i32 2, i1 false)
|
||||||
; CHECK: store <2 x i32> [[DPAS1]], <2 x i32>* %res2I32
|
; CHECK-GENISA: store <2 x i32> [[DPAS1]], <2 x i32>* %res2I32
|
||||||
; CHECK: [[DPAS2:%[A-z0-9]*]] = call <4 x i32> @llvm.genx.GenISA.sub.group.dpas.v4i32.v4i32.v4i16.v8i32(<4 x i32> %c4I32, <4 x i16> %a4, <8 x i32> %b, i32 4, i32 1, i32 8, i32 4, i1 false)
|
; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <4 x i32> @llvm.genx.GenISA.sub.group.dpas.v4i32.v4i32.v4i16.v8i32(<4 x i32> %c4I32, <4 x i16> %a4, <8 x i32> %b, i32 4, i32 1, i32 8, i32 4, i1 false)
|
||||||
; CHECK: store <4 x i32> [[DPAS2]], <4 x i32>* %res4I32
|
; CHECK-GENISA: store <4 x i32> [[DPAS2]], <4 x i32>* %res4I32
|
||||||
; CHECK: [[DPAS3:%[A-z0-9]*]] = call <8 x i32> @llvm.genx.GenISA.sub.group.dpas.v8i32.v8i32.v8i16.v8i32(<8 x i32> %c8I32, <8 x i16> %a8, <8 x i32> %b, i32 4, i32 1, i32 8, i32 8, i1 false)
|
; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <8 x i32> @llvm.genx.GenISA.sub.group.dpas.v8i32.v8i32.v8i16.v8i32(<8 x i32> %c8I32, <8 x i16> %a8, <8 x i32> %b, i32 4, i32 1, i32 8, i32 8, i1 false)
|
||||||
; CHECK: store <8 x i32> [[DPAS3]], <8 x i32>* %res8I32
|
; CHECK-GENISA: store <8 x i32> [[DPAS3]], <8 x i32>* %res8I32
|
||||||
|
|
||||||
|
; CHECK-VISAASM-LABEL: .kernel "test_v2"
|
||||||
|
; CHECK-VISAASM-DAG: dpas.u8.s8.8.1 (M1, 16) [[D1:[A-z0-9_]*]].0 [[C1:[A-z0-9_]*]].0 [[B1:[A-z0-9_]*]].0 [[A1:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D1]] v_type=G type=d num_elts=16
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C1]] v_type=G type=d num_elts=16
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B1]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A1]] v_type=G type=ud num_elts=8 align=wordx32 alias=<[[A1_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A1_ALIAS]] v_type=G type=w num_elts=16
|
||||||
|
; CHECK-VISAASM-DAG: dpas.u8.s8.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=d num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=d num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=w num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: dpas.u8.s8.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=d num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=d num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=w num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: dpas.u8.s8.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=w num_elts=128
|
||||||
|
|
||||||
%call4 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_iii(i32 32, i16 %a1, <8 x i32> %b, i32 %c1I32, i32 49)
|
%call4 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_iii(i32 32, i16 %a1, <8 x i32> %b, i32 %c1I32, i32 49)
|
||||||
store i32 %call4, i32* %res1I32
|
store i32 %call4, i32* %res1I32
|
||||||
@ -99,15 +152,41 @@ define spir_kernel void @test_v3(i32* %res1I32, <2 x i32>* %res2I32, <4 x i32>*
|
|||||||
<8 x i32> %b,
|
<8 x i32> %b,
|
||||||
i32 %c1I32, <2 x i32> %c2I32, <4 x i32> %c4I32, <8 x i32> %c8I32) !intel_reqd_sub_group_size !100 {
|
i32 %c1I32, <2 x i32> %c2I32, <4 x i32> %c4I32, <8 x i32> %c8I32) !intel_reqd_sub_group_size !100 {
|
||||||
entry:
|
entry:
|
||||||
; CHECK-LABEL: @test_v3(
|
; CHECK-GENISA-LABEL: @test_v3(
|
||||||
; CHECK: [[DPAS:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.sub.group.dpas.i32.i32.i16.v8i32(i32 %c1I32, i16 %a1, <8 x i32> %b, i32 1, i32 4, i32 8, i32 1, i1 false)
|
; CHECK-GENISA: [[DPAS:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.sub.group.dpas.i32.i32.i16.v8i32(i32 %c1I32, i16 %a1, <8 x i32> %b, i32 1, i32 4, i32 8, i32 1, i1 false)
|
||||||
; CHECK: store i32 [[DPAS]], i32* %res1I32
|
; CHECK-GENISA: store i32 [[DPAS]], i32* %res1I32
|
||||||
; CHECK: [[DPAS1:%[A-z0-9]*]] = call <2 x i32> @llvm.genx.GenISA.sub.group.dpas.v2i32.v2i32.v2i16.v8i32(<2 x i32> %c2I32, <2 x i16> %a2, <8 x i32> %b, i32 1, i32 4, i32 8, i32 2, i1 false)
|
; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call <2 x i32> @llvm.genx.GenISA.sub.group.dpas.v2i32.v2i32.v2i16.v8i32(<2 x i32> %c2I32, <2 x i16> %a2, <8 x i32> %b, i32 1, i32 4, i32 8, i32 2, i1 false)
|
||||||
; CHECK: store <2 x i32> [[DPAS1]], <2 x i32>* %res2I32
|
; CHECK-GENISA: store <2 x i32> [[DPAS1]], <2 x i32>* %res2I32
|
||||||
; CHECK: [[DPAS2:%[A-z0-9]*]] = call <4 x i32> @llvm.genx.GenISA.sub.group.dpas.v4i32.v4i32.v4i16.v8i32(<4 x i32> %c4I32, <4 x i16> %a4, <8 x i32> %b, i32 1, i32 4, i32 8, i32 4, i1 false)
|
; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <4 x i32> @llvm.genx.GenISA.sub.group.dpas.v4i32.v4i32.v4i16.v8i32(<4 x i32> %c4I32, <4 x i16> %a4, <8 x i32> %b, i32 1, i32 4, i32 8, i32 4, i1 false)
|
||||||
; CHECK: store <4 x i32> [[DPAS2]], <4 x i32>* %res4I32
|
; CHECK-GENISA: store <4 x i32> [[DPAS2]], <4 x i32>* %res4I32
|
||||||
; CHECK: [[DPAS3:%[A-z0-9]*]] = call <8 x i32> @llvm.genx.GenISA.sub.group.dpas.v8i32.v8i32.v8i16.v8i32(<8 x i32> %c8I32, <8 x i16> %a8, <8 x i32> %b, i32 1, i32 4, i32 8, i32 8, i1 false)
|
; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <8 x i32> @llvm.genx.GenISA.sub.group.dpas.v8i32.v8i32.v8i16.v8i32(<8 x i32> %c8I32, <8 x i16> %a8, <8 x i32> %b, i32 1, i32 4, i32 8, i32 8, i1 false)
|
||||||
; CHECK: store <8 x i32> [[DPAS3]], <8 x i32>* %res8I32
|
; CHECK-GENISA: store <8 x i32> [[DPAS3]], <8 x i32>* %res8I32
|
||||||
|
|
||||||
|
; CHECK-VISAASM-LABEL: .kernel "test_v3"
|
||||||
|
; CHECK-VISAASM-DAG: dpas.s8.u8.8.1 (M1, 16) [[D1:[A-z0-9_]*]].0 [[C1:[A-z0-9_]*]].0 [[B1:[A-z0-9_]*]].0 [[A1:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D1]] v_type=G type=d num_elts=16
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C1]] v_type=G type=d num_elts=16
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B1]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A1]] v_type=G type=ud num_elts=8 align=wordx32 alias=<[[A1_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A1_ALIAS]] v_type=G type=w num_elts=16
|
||||||
|
; CHECK-VISAASM-DAG: dpas.s8.u8.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=d num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=d num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=w num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: dpas.s8.u8.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=d num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=d num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=w num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: dpas.s8.u8.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=w num_elts=128
|
||||||
|
|
||||||
%call8 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_iii(i32 32, i16 %a1, <8 x i32> %b, i32 %c1I32, i32 50)
|
%call8 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_iii(i32 32, i16 %a1, <8 x i32> %b, i32 %c1I32, i32 50)
|
||||||
store i32 %call8, i32* %res1I32
|
store i32 %call8, i32* %res1I32
|
||||||
@ -126,15 +205,41 @@ define spir_kernel void @test_v4(i32* %res1I32, <2 x i32>* %res2I32, <4 x i32>*
|
|||||||
<8 x i32> %b,
|
<8 x i32> %b,
|
||||||
i32 %c1I32, <2 x i32> %c2I32, <4 x i32> %c4I32, <8 x i32> %c8I32) !intel_reqd_sub_group_size !100 {
|
i32 %c1I32, <2 x i32> %c2I32, <4 x i32> %c4I32, <8 x i32> %c8I32) !intel_reqd_sub_group_size !100 {
|
||||||
entry:
|
entry:
|
||||||
; CHECK-LABEL: @test_v4(
|
; CHECK-GENISA-LABEL: @test_v4(
|
||||||
; CHECK: [[DPAS:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.sub.group.dpas.i32.i32.i16.v8i32(i32 %c1I32, i16 %a1, <8 x i32> %b, i32 1, i32 1, i32 8, i32 1, i1 false)
|
; CHECK-GENISA: [[DPAS:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.sub.group.dpas.i32.i32.i16.v8i32(i32 %c1I32, i16 %a1, <8 x i32> %b, i32 1, i32 1, i32 8, i32 1, i1 false)
|
||||||
; CHECK: store i32 [[DPAS]], i32* %res1I32
|
; CHECK-GENISA: store i32 [[DPAS]], i32* %res1I32
|
||||||
; CHECK: [[DPAS1:%[A-z0-9]*]] = call <2 x i32> @llvm.genx.GenISA.sub.group.dpas.v2i32.v2i32.v2i16.v8i32(<2 x i32> %c2I32, <2 x i16> %a2, <8 x i32> %b, i32 1, i32 1, i32 8, i32 2, i1 false)
|
; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call <2 x i32> @llvm.genx.GenISA.sub.group.dpas.v2i32.v2i32.v2i16.v8i32(<2 x i32> %c2I32, <2 x i16> %a2, <8 x i32> %b, i32 1, i32 1, i32 8, i32 2, i1 false)
|
||||||
; CHECK: store <2 x i32> [[DPAS1]], <2 x i32>* %res2I32
|
; CHECK-GENISA: store <2 x i32> [[DPAS1]], <2 x i32>* %res2I32
|
||||||
; CHECK: [[DPAS2:%[A-z0-9]*]] = call <4 x i32> @llvm.genx.GenISA.sub.group.dpas.v4i32.v4i32.v4i16.v8i32(<4 x i32> %c4I32, <4 x i16> %a4, <8 x i32> %b, i32 1, i32 1, i32 8, i32 4, i1 false)
|
; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <4 x i32> @llvm.genx.GenISA.sub.group.dpas.v4i32.v4i32.v4i16.v8i32(<4 x i32> %c4I32, <4 x i16> %a4, <8 x i32> %b, i32 1, i32 1, i32 8, i32 4, i1 false)
|
||||||
; CHECK: store <4 x i32> [[DPAS2]], <4 x i32>* %res4I32
|
; CHECK-GENISA: store <4 x i32> [[DPAS2]], <4 x i32>* %res4I32
|
||||||
; CHECK: [[DPAS3:%[A-z0-9]*]] = call <8 x i32> @llvm.genx.GenISA.sub.group.dpas.v8i32.v8i32.v8i16.v8i32(<8 x i32> %c8I32, <8 x i16> %a8, <8 x i32> %b, i32 1, i32 1, i32 8, i32 8, i1 false)
|
; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <8 x i32> @llvm.genx.GenISA.sub.group.dpas.v8i32.v8i32.v8i16.v8i32(<8 x i32> %c8I32, <8 x i16> %a8, <8 x i32> %b, i32 1, i32 1, i32 8, i32 8, i1 false)
|
||||||
; CHECK: store <8 x i32> [[DPAS3]], <8 x i32>* %res8I32
|
; CHECK-GENISA: store <8 x i32> [[DPAS3]], <8 x i32>* %res8I32
|
||||||
|
|
||||||
|
; CHECK-VISAASM-LABEL: .kernel "test_v4"
|
||||||
|
; CHECK-VISAASM-DAG: dpas.u8.u8.8.1 (M1, 16) [[D1:[A-z0-9_]*]].0 [[C1:[A-z0-9_]*]].0 [[B1:[A-z0-9_]*]].0 [[A1:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D1]] v_type=G type=d num_elts=16
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C1]] v_type=G type=d num_elts=16
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B1]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A1]] v_type=G type=ud num_elts=8 align=wordx32 alias=<[[A1_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A1_ALIAS]] v_type=G type=w num_elts=16
|
||||||
|
; CHECK-VISAASM-DAG: dpas.u8.u8.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=d num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=d num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=w num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: dpas.u8.u8.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=d num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=d num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=w num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: dpas.u8.u8.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=w num_elts=128
|
||||||
|
|
||||||
%call12 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_iii(i32 32, i16 %a1, <8 x i32> %b, i32 %c1I32, i32 48)
|
%call12 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_iii(i32 32, i16 %a1, <8 x i32> %b, i32 %c1I32, i32 48)
|
||||||
store i32 %call12, i32* %res1I32
|
store i32 %call12, i32* %res1I32
|
||||||
@ -154,15 +259,41 @@ define spir_kernel void @test_v5(i32* %res1I32, <2 x i32>* %res2I32, <4 x i32>*
|
|||||||
<8 x i32> %b,
|
<8 x i32> %b,
|
||||||
i32 %c1I32, <2 x i32> %c2I32, <4 x i32> %c4I32, <8 x i32> %c8I32) !intel_reqd_sub_group_size !100 {
|
i32 %c1I32, <2 x i32> %c2I32, <4 x i32> %c4I32, <8 x i32> %c8I32) !intel_reqd_sub_group_size !100 {
|
||||||
entry:
|
entry:
|
||||||
; CHECK-LABEL: @test_v5(
|
; CHECK-GENISA-LABEL: @test_v5(
|
||||||
; CHECK: [[DPAS:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.sub.group.dpas.i32.i32.i16.v8i32(i32 %c1I32, i16 %a1, <8 x i32> %b, i32 5, i32 5, i32 8, i32 1, i1 false)
|
; CHECK-GENISA: [[DPAS:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.sub.group.dpas.i32.i32.i16.v8i32(i32 %c1I32, i16 %a1, <8 x i32> %b, i32 5, i32 5, i32 8, i32 1, i1 false)
|
||||||
; CHECK: store i32 [[DPAS]], i32* %res1I32
|
; CHECK-GENISA: store i32 [[DPAS]], i32* %res1I32
|
||||||
; CHECK: [[DPAS1:%[A-z0-9]*]] = call <2 x i32> @llvm.genx.GenISA.sub.group.dpas.v2i32.v2i32.v2i16.v8i32(<2 x i32> %c2I32, <2 x i16> %a2, <8 x i32> %b, i32 5, i32 5, i32 8, i32 2, i1 false)
|
; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call <2 x i32> @llvm.genx.GenISA.sub.group.dpas.v2i32.v2i32.v2i16.v8i32(<2 x i32> %c2I32, <2 x i16> %a2, <8 x i32> %b, i32 5, i32 5, i32 8, i32 2, i1 false)
|
||||||
; CHECK: store <2 x i32> [[DPAS1]], <2 x i32>* %res2I32
|
; CHECK-GENISA: store <2 x i32> [[DPAS1]], <2 x i32>* %res2I32
|
||||||
; CHECK: [[DPAS2:%[A-z0-9]*]] = call <4 x i32> @llvm.genx.GenISA.sub.group.dpas.v4i32.v4i32.v4i16.v8i32(<4 x i32> %c4I32, <4 x i16> %a4, <8 x i32> %b, i32 5, i32 5, i32 8, i32 4, i1 false)
|
; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <4 x i32> @llvm.genx.GenISA.sub.group.dpas.v4i32.v4i32.v4i16.v8i32(<4 x i32> %c4I32, <4 x i16> %a4, <8 x i32> %b, i32 5, i32 5, i32 8, i32 4, i1 false)
|
||||||
; CHECK: store <4 x i32> [[DPAS2]], <4 x i32>* %res4I32
|
; CHECK-GENISA: store <4 x i32> [[DPAS2]], <4 x i32>* %res4I32
|
||||||
; CHECK: [[DPAS3:%[A-z0-9]*]] = call <8 x i32> @llvm.genx.GenISA.sub.group.dpas.v8i32.v8i32.v8i16.v8i32(<8 x i32> %c8I32, <8 x i16> %a8, <8 x i32> %b, i32 5, i32 5, i32 8, i32 8, i1 false)
|
; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <8 x i32> @llvm.genx.GenISA.sub.group.dpas.v8i32.v8i32.v8i16.v8i32(<8 x i32> %c8I32, <8 x i16> %a8, <8 x i32> %b, i32 5, i32 5, i32 8, i32 8, i1 false)
|
||||||
; CHECK: store <8 x i32> [[DPAS3]], <8 x i32>* %res8I32
|
; CHECK-GENISA: store <8 x i32> [[DPAS3]], <8 x i32>* %res8I32
|
||||||
|
|
||||||
|
; CHECK-VISAASM-LABEL: .kernel "test_v5"
|
||||||
|
; CHECK-VISAASM-DAG: dpas.s4.s4.8.1 (M1, 16) [[D1:[A-z0-9_]*]].0 [[C1:[A-z0-9_]*]].0 [[B1:[A-z0-9_]*]].0 [[A1:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D1]] v_type=G type=d num_elts=16
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C1]] v_type=G type=d num_elts=16
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B1]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A1]] v_type=G type=ud num_elts=8 align=wordx32 alias=<[[A1_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A1_ALIAS]] v_type=G type=w num_elts=16
|
||||||
|
; CHECK-VISAASM-DAG: dpas.s4.s4.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=d num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=d num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=w num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: dpas.s4.s4.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=d num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=d num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=w num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: dpas.s4.s4.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=w num_elts=128
|
||||||
|
|
||||||
%call16 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_iii(i32 64, i16 %a1, <8 x i32> %b, i32 %c1I32, i32 195)
|
%call16 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_iii(i32 64, i16 %a1, <8 x i32> %b, i32 %c1I32, i32 195)
|
||||||
store i32 %call16, i32* %res1I32
|
store i32 %call16, i32* %res1I32
|
||||||
@ -181,15 +312,41 @@ define spir_kernel void @test_v6(i32* %res1I32, <2 x i32>* %res2I32, <4 x i32>*
|
|||||||
<8 x i32> %b,
|
<8 x i32> %b,
|
||||||
i32 %c1I32, <2 x i32> %c2I32, <4 x i32> %c4I32, <8 x i32> %c8I32) !intel_reqd_sub_group_size !100 {
|
i32 %c1I32, <2 x i32> %c2I32, <4 x i32> %c4I32, <8 x i32> %c8I32) !intel_reqd_sub_group_size !100 {
|
||||||
entry:
|
entry:
|
||||||
; CHECK-LABEL: @test_v6(
|
; CHECK-GENISA-LABEL: @test_v6(
|
||||||
; CHECK: [[DPAS:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.sub.group.dpas.i32.i32.i16.v8i32(i32 %c1I32, i16 %a1, <8 x i32> %b, i32 5, i32 2, i32 8, i32 1, i1 false)
|
; CHECK-GENISA: [[DPAS:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.sub.group.dpas.i32.i32.i16.v8i32(i32 %c1I32, i16 %a1, <8 x i32> %b, i32 5, i32 2, i32 8, i32 1, i1 false)
|
||||||
; CHECK: store i32 [[DPAS]], i32* %res1I32
|
; CHECK-GENISA: store i32 [[DPAS]], i32* %res1I32
|
||||||
; CHECK: [[DPAS1:%[A-z0-9]*]] = call <2 x i32> @llvm.genx.GenISA.sub.group.dpas.v2i32.v2i32.v2i16.v8i32(<2 x i32> %c2I32, <2 x i16> %a2, <8 x i32> %b, i32 5, i32 2, i32 8, i32 2, i1 false)
|
; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call <2 x i32> @llvm.genx.GenISA.sub.group.dpas.v2i32.v2i32.v2i16.v8i32(<2 x i32> %c2I32, <2 x i16> %a2, <8 x i32> %b, i32 5, i32 2, i32 8, i32 2, i1 false)
|
||||||
; CHECK: store <2 x i32> [[DPAS1]], <2 x i32>* %res2I32
|
; CHECK-GENISA: store <2 x i32> [[DPAS1]], <2 x i32>* %res2I32
|
||||||
; CHECK: [[DPAS2:%[A-z0-9]*]] = call <4 x i32> @llvm.genx.GenISA.sub.group.dpas.v4i32.v4i32.v4i16.v8i32(<4 x i32> %c4I32, <4 x i16> %a4, <8 x i32> %b, i32 5, i32 2, i32 8, i32 4, i1 false)
|
; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <4 x i32> @llvm.genx.GenISA.sub.group.dpas.v4i32.v4i32.v4i16.v8i32(<4 x i32> %c4I32, <4 x i16> %a4, <8 x i32> %b, i32 5, i32 2, i32 8, i32 4, i1 false)
|
||||||
; CHECK: store <4 x i32> [[DPAS2]], <4 x i32>* %res4I32
|
; CHECK-GENISA: store <4 x i32> [[DPAS2]], <4 x i32>* %res4I32
|
||||||
; CHECK: [[DPAS3:%[A-z0-9]*]] = call <8 x i32> @llvm.genx.GenISA.sub.group.dpas.v8i32.v8i32.v8i16.v8i32(<8 x i32> %c8I32, <8 x i16> %a8, <8 x i32> %b, i32 5, i32 2, i32 8, i32 8, i1 false)
|
; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <8 x i32> @llvm.genx.GenISA.sub.group.dpas.v8i32.v8i32.v8i16.v8i32(<8 x i32> %c8I32, <8 x i16> %a8, <8 x i32> %b, i32 5, i32 2, i32 8, i32 8, i1 false)
|
||||||
; CHECK: store <8 x i32> [[DPAS3]], <8 x i32>* %res8I32
|
; CHECK-GENISA: store <8 x i32> [[DPAS3]], <8 x i32>* %res8I32
|
||||||
|
|
||||||
|
; CHECK-VISAASM-LABEL: .kernel "test_v6"
|
||||||
|
; CHECK-VISAASM-DAG: dpas.u4.s4.8.1 (M1, 16) [[D1:[A-z0-9_]*]].0 [[C1:[A-z0-9_]*]].0 [[B1:[A-z0-9_]*]].0 [[A1:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D1]] v_type=G type=d num_elts=16
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C1]] v_type=G type=d num_elts=16
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B1]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A1]] v_type=G type=ud num_elts=8 align=wordx32 alias=<[[A1_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A1_ALIAS]] v_type=G type=w num_elts=16
|
||||||
|
; CHECK-VISAASM-DAG: dpas.u4.s4.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=d num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=d num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=w num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: dpas.u4.s4.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=d num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=d num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=w num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: dpas.u4.s4.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=w num_elts=128
|
||||||
|
|
||||||
%call20 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_iii(i32 64, i16 %a1, <8 x i32> %b, i32 %c1I32, i32 193)
|
%call20 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_iii(i32 64, i16 %a1, <8 x i32> %b, i32 %c1I32, i32 193)
|
||||||
store i32 %call20, i32* %res1I32
|
store i32 %call20, i32* %res1I32
|
||||||
@ -208,15 +365,41 @@ define spir_kernel void @test_v7(i32* %res1I32, <2 x i32>* %res2I32, <4 x i32>*
|
|||||||
<8 x i32> %b,
|
<8 x i32> %b,
|
||||||
i32 %c1I32, <2 x i32> %c2I32, <4 x i32> %c4I32, <8 x i32> %c8I32) !intel_reqd_sub_group_size !100 {
|
i32 %c1I32, <2 x i32> %c2I32, <4 x i32> %c4I32, <8 x i32> %c8I32) !intel_reqd_sub_group_size !100 {
|
||||||
entry:
|
entry:
|
||||||
; CHECK-LABEL: @test_v7(
|
; CHECK-GENISA-LABEL: @test_v7(
|
||||||
; CHECK: [[DPAS:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.sub.group.dpas.i32.i32.i16.v8i32(i32 %c1I32, i16 %a1, <8 x i32> %b, i32 2, i32 5, i32 8, i32 1, i1 false)
|
; CHECK-GENISA: [[DPAS:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.sub.group.dpas.i32.i32.i16.v8i32(i32 %c1I32, i16 %a1, <8 x i32> %b, i32 2, i32 5, i32 8, i32 1, i1 false)
|
||||||
; CHECK: store i32 [[DPAS]], i32* %res1I32
|
; CHECK-GENISA: store i32 [[DPAS]], i32* %res1I32
|
||||||
; CHECK: [[DPAS1:%[A-z0-9]*]] = call <2 x i32> @llvm.genx.GenISA.sub.group.dpas.v2i32.v2i32.v2i16.v8i32(<2 x i32> %c2I32, <2 x i16> %a2, <8 x i32> %b, i32 2, i32 5, i32 8, i32 2, i1 false)
|
; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call <2 x i32> @llvm.genx.GenISA.sub.group.dpas.v2i32.v2i32.v2i16.v8i32(<2 x i32> %c2I32, <2 x i16> %a2, <8 x i32> %b, i32 2, i32 5, i32 8, i32 2, i1 false)
|
||||||
; CHECK: store <2 x i32> [[DPAS1]], <2 x i32>* %res2I32
|
; CHECK-GENISA: store <2 x i32> [[DPAS1]], <2 x i32>* %res2I32
|
||||||
; CHECK: [[DPAS2:%[A-z0-9]*]] = call <4 x i32> @llvm.genx.GenISA.sub.group.dpas.v4i32.v4i32.v4i16.v8i32(<4 x i32> %c4I32, <4 x i16> %a4, <8 x i32> %b, i32 2, i32 5, i32 8, i32 4, i1 false)
|
; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <4 x i32> @llvm.genx.GenISA.sub.group.dpas.v4i32.v4i32.v4i16.v8i32(<4 x i32> %c4I32, <4 x i16> %a4, <8 x i32> %b, i32 2, i32 5, i32 8, i32 4, i1 false)
|
||||||
; CHECK: store <4 x i32> [[DPAS2]], <4 x i32>* %res4I32
|
; CHECK-GENISA: store <4 x i32> [[DPAS2]], <4 x i32>* %res4I32
|
||||||
; CHECK: [[DPAS3:%[A-z0-9]*]] = call <8 x i32> @llvm.genx.GenISA.sub.group.dpas.v8i32.v8i32.v8i16.v8i32(<8 x i32> %c8I32, <8 x i16> %a8, <8 x i32> %b, i32 2, i32 5, i32 8, i32 8, i1 false)
|
; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <8 x i32> @llvm.genx.GenISA.sub.group.dpas.v8i32.v8i32.v8i16.v8i32(<8 x i32> %c8I32, <8 x i16> %a8, <8 x i32> %b, i32 2, i32 5, i32 8, i32 8, i1 false)
|
||||||
; CHECK: store <8 x i32> [[DPAS3]], <8 x i32>* %res8I32
|
; CHECK-GENISA: store <8 x i32> [[DPAS3]], <8 x i32>* %res8I32
|
||||||
|
|
||||||
|
; CHECK-VISAASM-LABEL: .kernel "test_v7"
|
||||||
|
; CHECK-VISAASM-DAG: dpas.s4.u4.8.1 (M1, 16) [[D1:[A-z0-9_]*]].0 [[C1:[A-z0-9_]*]].0 [[B1:[A-z0-9_]*]].0 [[A1:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D1]] v_type=G type=d num_elts=16
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C1]] v_type=G type=d num_elts=16
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B1]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A1]] v_type=G type=ud num_elts=8 align=wordx32 alias=<[[A1_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A1_ALIAS]] v_type=G type=w num_elts=16
|
||||||
|
; CHECK-VISAASM-DAG: dpas.s4.u4.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=d num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=d num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=w num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: dpas.s4.u4.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=d num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=d num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=w num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: dpas.s4.u4.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=w num_elts=128
|
||||||
|
|
||||||
%call24 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_iii(i32 64, i16 %a1, <8 x i32> %b, i32 %c1I32, i32 194)
|
%call24 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_iii(i32 64, i16 %a1, <8 x i32> %b, i32 %c1I32, i32 194)
|
||||||
store i32 %call24, i32* %res1I32
|
store i32 %call24, i32* %res1I32
|
||||||
@ -230,20 +413,47 @@ entry:
|
|||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; int4 matrix sources, fp32 accumulator:
|
||||||
define spir_kernel void @test_v8(i32* %res1I32, <2 x i32>* %res2I32, <4 x i32>* %res4I32, <8 x i32>* %res8I32,
|
define spir_kernel void @test_v8(i32* %res1I32, <2 x i32>* %res2I32, <4 x i32>* %res4I32, <8 x i32>* %res8I32,
|
||||||
i16 %a1, <2 x i16> %a2, <4 x i16> %a4, <8 x i16> %a8,
|
i16 %a1, <2 x i16> %a2, <4 x i16> %a4, <8 x i16> %a8,
|
||||||
<8 x i32> %b,
|
<8 x i32> %b,
|
||||||
i32 %c1I32, <2 x i32> %c2I32, <4 x i32> %c4I32, <8 x i32> %c8I32) !intel_reqd_sub_group_size !100 {
|
i32 %c1I32, <2 x i32> %c2I32, <4 x i32> %c4I32, <8 x i32> %c8I32) !intel_reqd_sub_group_size !100 {
|
||||||
entry:
|
entry:
|
||||||
; CHECK-LABEL: @test_v8(
|
; CHECK-GENISA-LABEL: @test_v8(
|
||||||
; CHECK: [[DPAS:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.sub.group.dpas.i32.i32.i16.v8i32(i32 %c1I32, i16 %a1, <8 x i32> %b, i32 2, i32 2, i32 8, i32 1, i1 false)
|
; CHECK-GENISA: [[DPAS:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.sub.group.dpas.i32.i32.i16.v8i32(i32 %c1I32, i16 %a1, <8 x i32> %b, i32 2, i32 2, i32 8, i32 1, i1 false)
|
||||||
; CHECK: store i32 [[DPAS]], i32* %res1I32
|
; CHECK-GENISA: store i32 [[DPAS]], i32* %res1I32
|
||||||
; CHECK: [[DPAS1:%[A-z0-9]*]] = call <2 x i32> @llvm.genx.GenISA.sub.group.dpas.v2i32.v2i32.v2i16.v8i32(<2 x i32> %c2I32, <2 x i16> %a2, <8 x i32> %b, i32 2, i32 2, i32 8, i32 2, i1 false)
|
; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call <2 x i32> @llvm.genx.GenISA.sub.group.dpas.v2i32.v2i32.v2i16.v8i32(<2 x i32> %c2I32, <2 x i16> %a2, <8 x i32> %b, i32 2, i32 2, i32 8, i32 2, i1 false)
|
||||||
; CHECK: store <2 x i32> [[DPAS1]], <2 x i32>* %res2I32
|
; CHECK-GENISA: store <2 x i32> [[DPAS1]], <2 x i32>* %res2I32
|
||||||
; CHECK: [[DPAS2:%[A-z0-9]*]] = call <4 x i32> @llvm.genx.GenISA.sub.group.dpas.v4i32.v4i32.v4i16.v8i32(<4 x i32> %c4I32, <4 x i16> %a4, <8 x i32> %b, i32 2, i32 2, i32 8, i32 4, i1 false)
|
; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <4 x i32> @llvm.genx.GenISA.sub.group.dpas.v4i32.v4i32.v4i16.v8i32(<4 x i32> %c4I32, <4 x i16> %a4, <8 x i32> %b, i32 2, i32 2, i32 8, i32 4, i1 false)
|
||||||
; CHECK: store <4 x i32> [[DPAS2]], <4 x i32>* %res4I32
|
; CHECK-GENISA: store <4 x i32> [[DPAS2]], <4 x i32>* %res4I32
|
||||||
; CHECK: [[DPAS3:%[A-z0-9]*]] = call <8 x i32> @llvm.genx.GenISA.sub.group.dpas.v8i32.v8i32.v8i16.v8i32(<8 x i32> %c8I32, <8 x i16> %a8, <8 x i32> %b, i32 2, i32 2, i32 8, i32 8, i1 false)
|
; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <8 x i32> @llvm.genx.GenISA.sub.group.dpas.v8i32.v8i32.v8i16.v8i32(<8 x i32> %c8I32, <8 x i16> %a8, <8 x i32> %b, i32 2, i32 2, i32 8, i32 8, i1 false)
|
||||||
; CHECK: store <8 x i32> [[DPAS3]], <8 x i32>* %res8I32
|
; CHECK-GENISA: store <8 x i32> [[DPAS3]], <8 x i32>* %res8I32
|
||||||
|
|
||||||
|
; CHECK-VISAASM-LABEL: .kernel "test_v8"
|
||||||
|
; CHECK-VISAASM-DAG: dpas.u4.u4.8.1 (M1, 16) [[D1:[A-z0-9_]*]].0 [[C1:[A-z0-9_]*]].0 [[B1:[A-z0-9_]*]].0 [[A1:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D1]] v_type=G type=d num_elts=16
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C1]] v_type=G type=d num_elts=16
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B1]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A1]] v_type=G type=ud num_elts=8 align=wordx32 alias=<[[A1_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A1_ALIAS]] v_type=G type=w num_elts=16
|
||||||
|
; CHECK-VISAASM-DAG: dpas.u4.u4.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=d num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=d num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=w num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: dpas.u4.u4.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=d num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=d num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=w num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: dpas.u4.u4.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=w num_elts=128
|
||||||
|
|
||||||
%call28 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_iii(i32 64, i16 %a1, <8 x i32> %b, i32 %c1I32, i32 192)
|
%call28 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_iii(i32 64, i16 %a1, <8 x i32> %b, i32 %c1I32, i32 192)
|
||||||
store i32 %call28, i32* %res1I32
|
store i32 %call28, i32* %res1I32
|
||||||
@ -263,15 +473,41 @@ define spir_kernel void @test_v9(float* %resF, <2 x float>* %res2, <4 x float>*
|
|||||||
<8 x i32> %b,
|
<8 x i32> %b,
|
||||||
float %cF, <2 x float> %c2F, <4 x float> %c4F, <8 x float> %c8F) !intel_reqd_sub_group_size !100 {
|
float %cF, <2 x float> %c2F, <4 x float> %c4F, <8 x float> %c8F) !intel_reqd_sub_group_size !100 {
|
||||||
entry:
|
entry:
|
||||||
; CHECK-LABEL: @test_v9(
|
; CHECK-GENISA-LABEL: @test_v9(
|
||||||
; CHECK: [[DPAS:%[A-z0-9]*]] = call float @llvm.genx.GenISA.sub.group.dpas.f32.f32.i16.v8i32(float %cF, i16 %a1, <8 x i32> %b, i32 12, i32 12, i32 8, i32 1, i1 false)
|
; CHECK-GENISA: [[DPAS:%[A-z0-9]*]] = call float @llvm.genx.GenISA.sub.group.dpas.f32.f32.i16.v8i32(float %cF, i16 %a1, <8 x i32> %b, i32 12, i32 12, i32 8, i32 1, i1 false)
|
||||||
; CHECK: store float [[DPAS]], float* %resF
|
; CHECK-GENISA: store float [[DPAS]], float* %resF
|
||||||
; CHECK: [[DPAS1:%[A-z0-9]*]] = call <2 x float> @llvm.genx.GenISA.sub.group.dpas.v2f32.v2f32.v2i16.v8i32(<2 x float> %c2F, <2 x i16> %a2, <8 x i32> %b, i32 12, i32 12, i32 8, i32 2, i1 false)
|
; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call <2 x float> @llvm.genx.GenISA.sub.group.dpas.v2f32.v2f32.v2i16.v8i32(<2 x float> %c2F, <2 x i16> %a2, <8 x i32> %b, i32 12, i32 12, i32 8, i32 2, i1 false)
|
||||||
; CHECK: store <2 x float> [[DPAS1]], <2 x float>* %res2
|
; CHECK-GENISA: store <2 x float> [[DPAS1]], <2 x float>* %res2
|
||||||
; CHECK: [[DPAS2:%[A-z0-9]*]] = call <4 x float> @llvm.genx.GenISA.sub.group.dpas.v4f32.v4f32.v4i16.v8i32(<4 x float> %c4F, <4 x i16> %a4, <8 x i32> %b, i32 12, i32 12, i32 8, i32 4, i1 false)
|
; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <4 x float> @llvm.genx.GenISA.sub.group.dpas.v4f32.v4f32.v4i16.v8i32(<4 x float> %c4F, <4 x i16> %a4, <8 x i32> %b, i32 12, i32 12, i32 8, i32 4, i1 false)
|
||||||
; CHECK: store <4 x float> [[DPAS2]], <4 x float>* %res4
|
; CHECK-GENISA: store <4 x float> [[DPAS2]], <4 x float>* %res4
|
||||||
; CHECK: [[DPAS3:%[A-z0-9]*]] = call <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x float> %c8F, <8 x i16> %a8, <8 x i32> %b, i32 12, i32 12, i32 8, i32 8, i1 false)
|
; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x float> %c8F, <8 x i16> %a8, <8 x i32> %b, i32 12, i32 12, i32 8, i32 8, i1 false)
|
||||||
; CHECK: store <8 x float> [[DPAS3]], <8 x float>* %res8
|
; CHECK-GENISA: store <8 x float> [[DPAS3]], <8 x float>* %res8
|
||||||
|
|
||||||
|
; CHECK-VISAASM-LABEL: .kernel "test_v9"
|
||||||
|
; CHECK-VISAASM-DAG: dpas.hf.hf.8.1 (M1, 16) [[D1:[A-z0-9_]*]].0 [[C1:[A-z0-9_]*]].0 [[B1:[A-z0-9_]*]].0 [[A1:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D1]] v_type=G type=f num_elts=16
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C1]] v_type=G type=f num_elts=16
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B1]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A1]] v_type=G type=ud num_elts=8 align=wordx32 alias=<[[A1_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A1_ALIAS]] v_type=G type=w num_elts=16
|
||||||
|
; CHECK-VISAASM-DAG: dpas.hf.hf.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=f num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=f num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=w num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: dpas.hf.hf.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=f num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=f num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=w num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: dpas.hf.hf.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=f num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=f num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=w num_elts=128
|
||||||
|
|
||||||
%call32 = call spir_func float @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_ifi(i32 16, i16 %a1, <8 x i32> %b, float %cF, i32 3072)
|
%call32 = call spir_func float @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_ifi(i32 16, i16 %a1, <8 x i32> %b, float %cF, i32 3072)
|
||||||
store float %call32, float* %resF
|
store float %call32, float* %resF
|
||||||
@ -291,15 +527,41 @@ define spir_kernel void @test_v10(float* %resF, <2 x float>* %res2, <4 x float>
|
|||||||
<8 x i32> %b,
|
<8 x i32> %b,
|
||||||
float %cF, <2 x float> %c2F, <4 x float> %c4F, <8 x float> %c8F) !intel_reqd_sub_group_size !100 {
|
float %cF, <2 x float> %c2F, <4 x float> %c4F, <8 x float> %c8F) !intel_reqd_sub_group_size !100 {
|
||||||
entry:
|
entry:
|
||||||
; CHECK-LABEL: @test_v10(
|
; CHECK-GENISA-LABEL: @test_v10(
|
||||||
; CHECK: [[DPAS:%[A-z0-9]*]] = call float @llvm.genx.GenISA.sub.group.dpas.f32.f32.i16.v8i32(float %cF, i16 %a1, <8 x i32> %b, i32 11, i32 11, i32 8, i32 1, i1 false)
|
; CHECK-GENISA: [[DPAS:%[A-z0-9]*]] = call float @llvm.genx.GenISA.sub.group.dpas.f32.f32.i16.v8i32(float %cF, i16 %a1, <8 x i32> %b, i32 11, i32 11, i32 8, i32 1, i1 false)
|
||||||
; CHECK: store float [[DPAS]], float* %resF
|
; CHECK-GENISA: store float [[DPAS]], float* %resF
|
||||||
; CHECK: [[DPAS1:%[A-z0-9]*]] = call <2 x float> @llvm.genx.GenISA.sub.group.dpas.v2f32.v2f32.v2i16.v8i32(<2 x float> %c2F, <2 x i16> %a2, <8 x i32> %b, i32 11, i32 11, i32 8, i32 2, i1 false)
|
; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call <2 x float> @llvm.genx.GenISA.sub.group.dpas.v2f32.v2f32.v2i16.v8i32(<2 x float> %c2F, <2 x i16> %a2, <8 x i32> %b, i32 11, i32 11, i32 8, i32 2, i1 false)
|
||||||
; CHECK: store <2 x float> [[DPAS1]], <2 x float>* %res2
|
; CHECK-GENISA: store <2 x float> [[DPAS1]], <2 x float>* %res2
|
||||||
; CHECK: [[DPAS2:%[A-z0-9]*]] = call <4 x float> @llvm.genx.GenISA.sub.group.dpas.v4f32.v4f32.v4i16.v8i32(<4 x float> %c4F, <4 x i16> %a4, <8 x i32> %b, i32 11, i32 11, i32 8, i32 4, i1 false)
|
; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <4 x float> @llvm.genx.GenISA.sub.group.dpas.v4f32.v4f32.v4i16.v8i32(<4 x float> %c4F, <4 x i16> %a4, <8 x i32> %b, i32 11, i32 11, i32 8, i32 4, i1 false)
|
||||||
; CHECK: store <4 x float> [[DPAS2]], <4 x float>* %res4
|
; CHECK-GENISA: store <4 x float> [[DPAS2]], <4 x float>* %res4
|
||||||
; CHECK: [[DPAS3:%[A-z0-9]*]] = call <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x float> %c8F, <8 x i16> %a8, <8 x i32> %b, i32 11, i32 11, i32 8, i32 8, i1 false)
|
; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v8i16.v8i32(<8 x float> %c8F, <8 x i16> %a8, <8 x i32> %b, i32 11, i32 11, i32 8, i32 8, i1 false)
|
||||||
; CHECK: store <8 x float> [[DPAS3]], <8 x float>* %res8
|
; CHECK-GENISA: store <8 x float> [[DPAS3]], <8 x float>* %res8
|
||||||
|
|
||||||
|
; CHECK-VISAASM-LABEL: .kernel "test_v10"
|
||||||
|
; CHECK-VISAASM-DAG: dpas.bf.bf.8.1 (M1, 16) [[D1:[A-z0-9_]*]].0 [[C1:[A-z0-9_]*]].0 [[B1:[A-z0-9_]*]].0 [[A1:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D1]] v_type=G type=f num_elts=16
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C1]] v_type=G type=f num_elts=16
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B1]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A1]] v_type=G type=ud num_elts=8 align=wordx32 alias=<[[A1_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A1_ALIAS]] v_type=G type=w num_elts=16
|
||||||
|
; CHECK-VISAASM-DAG: dpas.bf.bf.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=f num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=f num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=w num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: dpas.bf.bf.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=f num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=f num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=w num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: dpas.bf.bf.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=f num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=f num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=w num_elts=128
|
||||||
|
|
||||||
%call36 = call spir_func float @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_ifi(i32 16, i16 %a1, <8 x i32> %b, float %cF, i32 12288)
|
%call36 = call spir_func float @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_ifi(i32 16, i16 %a1, <8 x i32> %b, float %cF, i32 12288)
|
||||||
store float %call36, float* %resF
|
store float %call36, float* %resF
|
||||||
@ -319,15 +581,41 @@ define spir_kernel void @test_v11(half* %res, <2 x half>* %res2, <4 x half>* %r
|
|||||||
<8 x i32> %b,
|
<8 x i32> %b,
|
||||||
half %c, <2 x half> %c2, <4 x half> %c4, <8 x half> %c8) !intel_reqd_sub_group_size !100 {
|
half %c, <2 x half> %c2, <4 x half> %c4, <8 x half> %c8) !intel_reqd_sub_group_size !100 {
|
||||||
entry:
|
entry:
|
||||||
; CHECK-LABEL: @test_v11(
|
; CHECK-GENISA-LABEL: @test_v11(
|
||||||
; CHECK: [[DPAS:%[A-z0-9]*]] = call half @llvm.genx.GenISA.sub.group.dpas.f16.f16.i16.v8i32(half %c, i16 %a1, <8 x i32> %b, i32 12, i32 12, i32 8, i32 1, i1 false)
|
; CHECK-GENISA: [[DPAS:%[A-z0-9]*]] = call half @llvm.genx.GenISA.sub.group.dpas.f16.f16.i16.v8i32(half %c, i16 %a1, <8 x i32> %b, i32 12, i32 12, i32 8, i32 1, i1 false)
|
||||||
; CHECK: store half [[DPAS]], half* %res
|
; CHECK-GENISA: store half [[DPAS]], half* %res
|
||||||
; CHECK: [[DPAS1:%[A-z0-9]*]] = call <2 x half> @llvm.genx.GenISA.sub.group.dpas.v2f16.v2f16.v2i16.v8i32(<2 x half> %c2, <2 x i16> %a2, <8 x i32> %b, i32 12, i32 12, i32 8, i32 2, i1 false)
|
; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call <2 x half> @llvm.genx.GenISA.sub.group.dpas.v2f16.v2f16.v2i16.v8i32(<2 x half> %c2, <2 x i16> %a2, <8 x i32> %b, i32 12, i32 12, i32 8, i32 2, i1 false)
|
||||||
; CHECK: store <2 x half> [[DPAS1]], <2 x half>* %res2
|
; CHECK-GENISA: store <2 x half> [[DPAS1]], <2 x half>* %res2
|
||||||
; CHECK: [[DPAS2:%[A-z0-9]*]] = call <4 x half> @llvm.genx.GenISA.sub.group.dpas.v4f16.v4f16.v4i16.v8i32(<4 x half> %c4, <4 x i16> %a4, <8 x i32> %b, i32 12, i32 12, i32 8, i32 4, i1 false)
|
; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <4 x half> @llvm.genx.GenISA.sub.group.dpas.v4f16.v4f16.v4i16.v8i32(<4 x half> %c4, <4 x i16> %a4, <8 x i32> %b, i32 12, i32 12, i32 8, i32 4, i1 false)
|
||||||
; CHECK: store <4 x half> [[DPAS2]], <4 x half>* %res4
|
; CHECK-GENISA: store <4 x half> [[DPAS2]], <4 x half>* %res4
|
||||||
; CHECK: [[DPAS3:%[A-z0-9]*]] = call <8 x half> @llvm.genx.GenISA.sub.group.dpas.v8f16.v8f16.v8i16.v8i32(<8 x half> %c8, <8 x i16> %a8, <8 x i32> %b, i32 12, i32 12, i32 8, i32 8, i1 false)
|
; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <8 x half> @llvm.genx.GenISA.sub.group.dpas.v8f16.v8f16.v8i16.v8i32(<8 x half> %c8, <8 x i16> %a8, <8 x i32> %b, i32 12, i32 12, i32 8, i32 8, i1 false)
|
||||||
; CHECK: store <8 x half> [[DPAS3]], <8 x half>* %res8
|
; CHECK-GENISA: store <8 x half> [[DPAS3]], <8 x half>* %res8
|
||||||
|
|
||||||
|
; CHECK-VISAASM-LABEL: .kernel "test_v11"
|
||||||
|
; CHECK-VISAASM-DAG: dpas.hf.hf.8.1 (M1, 16) [[D1:[A-z0-9_]*]].0 [[C1:[A-z0-9_]*]].0 [[B1:[A-z0-9_]*]].0 [[A1:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D1]] v_type=G type=hf num_elts=16
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C1]] v_type=G type=hf num_elts=16
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B1]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A1]] v_type=G type=ud num_elts=8 align=wordx32 alias=<[[A1_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A1_ALIAS]] v_type=G type=w num_elts=16
|
||||||
|
; CHECK-VISAASM-DAG: dpas.hf.hf.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=hf num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=hf num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=w num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: dpas.hf.hf.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=hf num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=hf num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=w num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: dpas.hf.hf.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=hf num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=hf num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=w num_elts=128
|
||||||
|
|
||||||
%call32 = call spir_func half @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_iDhi(i32 16, i16 %a1, <8 x i32> %b, half %c, i32 3072)
|
%call32 = call spir_func half @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_iDhi(i32 16, i16 %a1, <8 x i32> %b, half %c, i32 3072)
|
||||||
store half %call32, half* %res
|
store half %call32, half* %res
|
||||||
@ -347,15 +635,41 @@ define spir_kernel void @test_v12(i16* %res, <2 x i16>* %res2, <4 x i16>* %res4,
|
|||||||
<8 x i32> %b,
|
<8 x i32> %b,
|
||||||
i16 %cF, <2 x i16> %c2F, <4 x i16> %c4F, <8 x i16> %c8F) !intel_reqd_sub_group_size !100 {
|
i16 %cF, <2 x i16> %c2F, <4 x i16> %c4F, <8 x i16> %c8F) !intel_reqd_sub_group_size !100 {
|
||||||
entry:
|
entry:
|
||||||
; CHECK-LABEL: @test_v12(
|
; CHECK-GENISA-LABEL: @test_v12(
|
||||||
; CHECK: [[DPAS:%[A-z0-9]*]] = call i16 @llvm.genx.GenISA.sub.group.dpas.i16.i16.i16.v8i32(i16 %cF, i16 %a1, <8 x i32> %b, i32 11, i32 11, i32 8, i32 1, i1 false)
|
; CHECK-GENISA: [[DPAS:%[A-z0-9]*]] = call i16 @llvm.genx.GenISA.sub.group.dpas.i16.i16.i16.v8i32(i16 %cF, i16 %a1, <8 x i32> %b, i32 11, i32 11, i32 8, i32 1, i1 false)
|
||||||
; CHECK: store i16 [[DPAS]], i16* %res
|
; CHECK-GENISA: store i16 [[DPAS]], i16* %res
|
||||||
; CHECK: [[DPAS1:%[A-z0-9]*]] = call <2 x i16> @llvm.genx.GenISA.sub.group.dpas.v2i16.v2i16.v2i16.v8i32(<2 x i16> %c2F, <2 x i16> %a2, <8 x i32> %b, i32 11, i32 11, i32 8, i32 2, i1 false)
|
; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call <2 x i16> @llvm.genx.GenISA.sub.group.dpas.v2i16.v2i16.v2i16.v8i32(<2 x i16> %c2F, <2 x i16> %a2, <8 x i32> %b, i32 11, i32 11, i32 8, i32 2, i1 false)
|
||||||
; CHECK: store <2 x i16> [[DPAS1]], <2 x i16>* %res2
|
; CHECK-GENISA: store <2 x i16> [[DPAS1]], <2 x i16>* %res2
|
||||||
; CHECK: [[DPAS2:%[A-z0-9]*]] = call <4 x i16> @llvm.genx.GenISA.sub.group.dpas.v4i16.v4i16.v4i16.v8i32(<4 x i16> %c4F, <4 x i16> %a4, <8 x i32> %b, i32 11, i32 11, i32 8, i32 4, i1 false)
|
; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <4 x i16> @llvm.genx.GenISA.sub.group.dpas.v4i16.v4i16.v4i16.v8i32(<4 x i16> %c4F, <4 x i16> %a4, <8 x i32> %b, i32 11, i32 11, i32 8, i32 4, i1 false)
|
||||||
; CHECK: store <4 x i16> [[DPAS2]], <4 x i16>* %res4
|
; CHECK-GENISA: store <4 x i16> [[DPAS2]], <4 x i16>* %res4
|
||||||
; CHECK: [[DPAS3:%[A-z0-9]*]] = call <8 x i16> @llvm.genx.GenISA.sub.group.dpas.v8i16.v8i16.v8i16.v8i32(<8 x i16> %c8F, <8 x i16> %a8, <8 x i32> %b, i32 11, i32 11, i32 8, i32 8, i1 false)
|
; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <8 x i16> @llvm.genx.GenISA.sub.group.dpas.v8i16.v8i16.v8i16.v8i32(<8 x i16> %c8F, <8 x i16> %a8, <8 x i32> %b, i32 11, i32 11, i32 8, i32 8, i1 false)
|
||||||
; CHECK: store <8 x i16> [[DPAS3]], <8 x i16>* %res8
|
; CHECK-GENISA: store <8 x i16> [[DPAS3]], <8 x i16>* %res8
|
||||||
|
|
||||||
|
; CHECK-VISAASM-LABEL: .kernel "test_v12"
|
||||||
|
; CHECK-VISAASM-DAG: dpas.bf.bf.8.1 (M1, 16) [[D1:[A-z0-9_]*]].0 [[C1:[A-z0-9_]*]].0 [[B1:[A-z0-9_]*]].0 [[A1:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D1]] v_type=G type=bf num_elts=16
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C1]] v_type=G type=bf num_elts=16
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B1]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A1]] v_type=G type=ud num_elts=8 align=wordx32 alias=<[[A1_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A1_ALIAS]] v_type=G type=w num_elts=16
|
||||||
|
; CHECK-VISAASM-DAG: dpas.bf.bf.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=bf num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=bf num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=w num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: dpas.bf.bf.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=bf num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=bf num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=w num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: dpas.bf.bf.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=bf num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=bf num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=w num_elts=128
|
||||||
|
|
||||||
%call36 = call spir_func i16 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_isi(i32 16, i16 %a1, <8 x i32> %b, i16 %cF, i32 12300)
|
%call36 = call spir_func i16 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_isi(i32 16, i16 %a1, <8 x i32> %b, i16 %cF, i32 12300)
|
||||||
store i16 %call36, i16* %res
|
store i16 %call36, i16* %res
|
||||||
@ -375,15 +689,41 @@ define spir_kernel void @test_v13(float* %resF, <2 x float>* %res2, <4 x float>
|
|||||||
<8 x float> %b,
|
<8 x float> %b,
|
||||||
float %cF, <2 x float> %c2F, <4 x float> %c4F, <8 x float> %c8F) !intel_reqd_sub_group_size !100 {
|
float %cF, <2 x float> %c2F, <4 x float> %c4F, <8 x float> %c8F) !intel_reqd_sub_group_size !100 {
|
||||||
entry:
|
entry:
|
||||||
; CHECK-LABEL: @test_v13(
|
; CHECK-GENISA-LABEL: @test_v13(
|
||||||
; CHECK: [[DPAS:%[A-z0-9]*]] = call float @llvm.genx.GenISA.sub.group.dpas.f32.f32.f32.v8i32(float %cF, float %a1, <8 x i32> %{{.*}}, i32 10, i32 10, i32 8, i32 1, i1 false)
|
; CHECK-GENISA: [[DPAS:%[A-z0-9]*]] = call float @llvm.genx.GenISA.sub.group.dpas.f32.f32.f32.v8i32(float %cF, float %a1, <8 x i32> %{{.*}}, i32 10, i32 10, i32 8, i32 1, i1 false)
|
||||||
; CHECK: store float [[DPAS]], float* %resF
|
; CHECK-GENISA: store float [[DPAS]], float* %resF
|
||||||
; CHECK: [[DPAS1:%[A-z0-9]*]] = call <2 x float> @llvm.genx.GenISA.sub.group.dpas.v2f32.v2f32.f32.v8i32(<2 x float> %c2F, float %a2, <8 x i32> %{{.*}}, i32 10, i32 10, i32 8, i32 2, i1 false)
|
; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call <2 x float> @llvm.genx.GenISA.sub.group.dpas.v2f32.v2f32.f32.v8i32(<2 x float> %c2F, float %a2, <8 x i32> %{{.*}}, i32 10, i32 10, i32 8, i32 2, i1 false)
|
||||||
; CHECK: store <2 x float> [[DPAS1]], <2 x float>* %res2
|
; CHECK-GENISA: store <2 x float> [[DPAS1]], <2 x float>* %res2
|
||||||
; CHECK: [[DPAS2:%[A-z0-9]*]] = call <4 x float> @llvm.genx.GenISA.sub.group.dpas.v4f32.v4f32.v2f32.v8i32(<4 x float> %c4F, <2 x float> %a4, <8 x i32> %{{.*}}, i32 10, i32 10, i32 8, i32 4, i1 false)
|
; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <4 x float> @llvm.genx.GenISA.sub.group.dpas.v4f32.v4f32.v2f32.v8i32(<4 x float> %c4F, <2 x float> %a4, <8 x i32> %{{.*}}, i32 10, i32 10, i32 8, i32 4, i1 false)
|
||||||
; CHECK: store <4 x float> [[DPAS2]], <4 x float>* %res4
|
; CHECK-GENISA: store <4 x float> [[DPAS2]], <4 x float>* %res4
|
||||||
; CHECK: [[DPAS3:%[A-z0-9]*]] = call <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v4f32.v8i32(<8 x float> %c8F, <4 x float> %a8, <8 x i32> %{{.*}}, i32 10, i32 10, i32 8, i32 8, i1 false)
|
; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <8 x float> @llvm.genx.GenISA.sub.group.dpas.v8f32.v8f32.v4f32.v8i32(<8 x float> %c8F, <4 x float> %a8, <8 x i32> %{{.*}}, i32 10, i32 10, i32 8, i32 8, i1 false)
|
||||||
; CHECK: store <8 x float> [[DPAS3]], <8 x float>* %res8
|
; CHECK-GENISA: store <8 x float> [[DPAS3]], <8 x float>* %res8
|
||||||
|
|
||||||
|
; CHECK-VISAASM-LABEL: .kernel "test_v13"
|
||||||
|
; CHECK-VISAASM-DAG: dpas.tf32.tf32.8.1 (M1, 16) [[D1:[A-z0-9_]*]].0 [[C1:[A-z0-9_]*]].0 [[B1:[A-z0-9_]*]].0 [[A1:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D1]] v_type=G type=f num_elts=16
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C1]] v_type=G type=f num_elts=16
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B1]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A1]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A1_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A1_ALIAS]] v_type=G type=f num_elts=16
|
||||||
|
; CHECK-VISAASM-DAG: dpas.tf32.tf32.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=f num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=f num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=f num_elts=16
|
||||||
|
; CHECK-VISAASM-DAG: dpas.tf32.tf32.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=f num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=f num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=f num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: dpas.tf32.tf32.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=f num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=f num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=f num_elts=64
|
||||||
|
|
||||||
%call32 = call spir_func float @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELifDv8_ffi(i32 8, float %a1, <8 x float> %b, float %cF, i32 768)
|
%call32 = call spir_func float @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELifDv8_ffi(i32 8, float %a1, <8 x float> %b, float %cF, i32 768)
|
||||||
store float %call32, float* %resF
|
store float %call32, float* %resF
|
||||||
|
|||||||
@ -0,0 +1,610 @@
|
|||||||
|
;=========================== begin_copyright_notice ============================
|
||||||
|
;
|
||||||
|
; Copyright (C) 2025 Intel Corporation
|
||||||
|
;
|
||||||
|
; SPDX-License-Identifier: MIT
|
||||||
|
;
|
||||||
|
;============================ end_copyright_notice =============================
|
||||||
|
|
||||||
|
; REQUIRES: llvm-spirv, pvc-supported
|
||||||
|
|
||||||
|
; RUN: llvm-as %s -o %t.bc
|
||||||
|
; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_INTEL_subgroup_matrix_multiply_accumulate -o %t.spv
|
||||||
|
; RUN: ocloc compile -spirv_input -file %t.spv -device pvc -options " -igc_opts 'PrintToConsole=1 PrintAfter=ArithmeticFuncsTranslation'" 2>&1 | FileCheck %s --check-prefix=CHECK-GENISA
|
||||||
|
; RUN: ocloc compile -spirv_input -file %t.spv -device pvc -options " -igc_opts 'DumpVISAASMToConsole=1'" 2>&1 | FileCheck %s --check-prefix=CHECK-VISAASM
|
||||||
|
|
||||||
|
; Tests DPAS called from subgroup-size=32 kernels.
|
||||||
|
; Supported dimensions M, N, K are exactly the same and don't change with subgroup-size.
|
||||||
|
; To correctly map 32 work-items, each work item contains half of the data compared to subgroup-size=16.
|
||||||
|
;
|
||||||
|
; This test is copy of SIMD16 test (dpas_pvc.ll) with modified GenISA checks. vISA ASM checks are exactly the same.
|
||||||
|
|
||||||
|
target triple = "spir64-unknown-unknown"
|
||||||
|
|
||||||
|
declare spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv4_iii(i32, i16 signext, <4 x i32>, i32, i32)
|
||||||
|
declare spir_func <2 x i32> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_sDv4_iDv2_ii(i32, <2 x i16>, <4 x i32>, <2 x i32>, i32)
|
||||||
|
declare spir_func <4 x i32> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_sDv4_iS0_i(i32, <4 x i16>, <4 x i32>, <4 x i32>, i32)
|
||||||
|
|
||||||
|
declare spir_func float @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv4_ifi(i32, i16 signext, <4 x i32>, float, i32)
|
||||||
|
declare spir_func <2 x float> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_sDv4_iDv2_fi(i32, <2 x i16>, <4 x i32>, <2 x float>, i32)
|
||||||
|
declare spir_func <4 x float> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_sDv4_iDv4_fi(i32, <4 x i16>, <4 x i32>, <4 x float>, i32)
|
||||||
|
|
||||||
|
declare spir_func half @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv4_iDhi(i32, i16 signext, <4 x i32>, half, i32)
|
||||||
|
declare spir_func <2 x half> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_sDv4_iDv2_Dhi(i32, <2 x i16>, <4 x i32>, <2 x half>, i32)
|
||||||
|
declare spir_func <4 x half> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_sDv4_iDv4_Dhi(i32, <4 x i16>, <4 x i32>, <4 x half>, i32)
|
||||||
|
|
||||||
|
declare spir_func signext i16 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv4_isi(i32, i16 signext, <4 x i32>, i16 signext, i32)
|
||||||
|
declare spir_func <2 x i16> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_sDv4_iS_i(i32, <2 x i16>, <4 x i32>, <2 x i16>, i32)
|
||||||
|
declare spir_func <4 x i16> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_sDv4_iS_i(i32, <4 x i16>, <4 x i32>, <4 x i16>, i32)
|
||||||
|
|
||||||
|
declare spir_func float @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELifDv4_ffi(i32, float, <4 x float>, float, i32)
|
||||||
|
declare spir_func <2 x float> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_fDv4_fS_i(i32, float, <4 x float>, <2 x float>, i32)
|
||||||
|
declare spir_func <4 x float> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_fS_S_i(i32, <2 x float>, <4 x float>, <4 x float>, i32)
|
||||||
|
|
||||||
|
; 8-bit integer matrix sources (signed and unsigned), 32-bit integer accumulator:
|
||||||
|
define spir_kernel void @test_v1(i32* %res1I32, i32* %res2I32, <2 x i32>* %res4I32, <4 x i32>* %res8I32,
|
||||||
|
i16 %a1, i16 %a2, <2 x i16> %a4, <4 x i16> %a8,
|
||||||
|
<4 x i32> %b,
|
||||||
|
i32 %c1I32, i32 %c2I32, <2 x i32> %c4I32, <4 x i32> %c8I32) !intel_reqd_sub_group_size !100 {
|
||||||
|
entry:
|
||||||
|
; CHECK-GENISA-LABEL: @test_v1(
|
||||||
|
; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.sub.group.dpas.i32.i32.i16.v4i32(i32 %c2I32, i16 %a2, <4 x i32> %b, i32 4, i32 4, i32 8, i32 2, i1 false)
|
||||||
|
; CHECK-GENISA: store i32 [[DPAS1]], i32* %res2I32
|
||||||
|
; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <2 x i32> @llvm.genx.GenISA.sub.group.dpas.v2i32.v2i32.v2i16.v4i32(<2 x i32> %c4I32, <2 x i16> %a4, <4 x i32> %b, i32 4, i32 4, i32 8, i32 4, i1 false)
|
||||||
|
; CHECK-GENISA: store <2 x i32> [[DPAS2]], <2 x i32>* %res4I32
|
||||||
|
; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <4 x i32> @llvm.genx.GenISA.sub.group.dpas.v4i32.v4i32.v4i16.v4i32(<4 x i32> %c8I32, <4 x i16> %a8, <4 x i32> %b, i32 4, i32 4, i32 8, i32 8, i1 false)
|
||||||
|
; CHECK-GENISA: store <4 x i32> [[DPAS3]], <4 x i32>* %res8I32
|
||||||
|
|
||||||
|
; CHECK-VISAASM-LABEL: .kernel "test_v1"
|
||||||
|
; CHECK-VISAASM-DAG: dpas.s8.s8.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=d num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=d num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=w num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: dpas.s8.s8.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=d num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=d num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=w num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: dpas.s8.s8.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=w num_elts=128
|
||||||
|
|
||||||
|
%call1 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv4_iii(i32 32, i16 %a2, <4 x i32> %b, i32 %c2I32, i32 51)
|
||||||
|
store i32 %call1, i32* %res2I32
|
||||||
|
%call2 = call spir_func <2 x i32> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_sDv4_iDv2_ii(i32 32, <2 x i16> %a4, <4 x i32> %b, <2 x i32> %c4I32, i32 51)
|
||||||
|
store <2 x i32> %call2, <2 x i32>* %res4I32
|
||||||
|
%call3 = call spir_func <4 x i32> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_sDv4_iS0_i(i32 32, <4 x i16> %a8, <4 x i32> %b, <4 x i32> %c8I32, i32 51)
|
||||||
|
store <4 x i32> %call3, <4 x i32>* %res8I32
|
||||||
|
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define spir_kernel void @test_v2(i32* %res1I32, i32* %res2I32, <2 x i32>* %res4I32, <4 x i32>* %res8I32,
|
||||||
|
i16 %a1, i16 %a2, <2 x i16> %a4, <4 x i16> %a8,
|
||||||
|
<4 x i32> %b,
|
||||||
|
i32 %c1I32, i32 %c2I32, <2 x i32> %c4I32, <4 x i32> %c8I32) !intel_reqd_sub_group_size !100 {
|
||||||
|
entry:
|
||||||
|
; CHECK-GENISA-LABEL: @test_v2(
|
||||||
|
; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.sub.group.dpas.i32.i32.i16.v4i32(i32 %c2I32, i16 %a2, <4 x i32> %b, i32 4, i32 1, i32 8, i32 2, i1 false)
|
||||||
|
; CHECK-GENISA: store i32 [[DPAS1]], i32* %res2I32
|
||||||
|
; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <2 x i32> @llvm.genx.GenISA.sub.group.dpas.v2i32.v2i32.v2i16.v4i32(<2 x i32> %c4I32, <2 x i16> %a4, <4 x i32> %b, i32 4, i32 1, i32 8, i32 4, i1 false)
|
||||||
|
; CHECK-GENISA: store <2 x i32> [[DPAS2]], <2 x i32>* %res4I32
|
||||||
|
; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <4 x i32> @llvm.genx.GenISA.sub.group.dpas.v4i32.v4i32.v4i16.v4i32(<4 x i32> %c8I32, <4 x i16> %a8, <4 x i32> %b, i32 4, i32 1, i32 8, i32 8, i1 false)
|
||||||
|
; CHECK-GENISA: store <4 x i32> [[DPAS3]], <4 x i32>* %res8I32
|
||||||
|
|
||||||
|
; CHECK-VISAASM-LABEL: .kernel "test_v2"
|
||||||
|
; CHECK-VISAASM-DAG: dpas.u8.s8.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=d num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=d num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=w num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: dpas.u8.s8.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=d num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=d num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=w num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: dpas.u8.s8.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=w num_elts=128
|
||||||
|
|
||||||
|
%call5 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv4_iii(i32 32, i16 %a2, <4 x i32> %b, i32 %c2I32, i32 49)
|
||||||
|
store i32 %call5, i32* %res2I32
|
||||||
|
%call6 = call spir_func <2 x i32> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_sDv4_iDv2_ii(i32 32, <2 x i16> %a4, <4 x i32> %b, <2 x i32> %c4I32, i32 49)
|
||||||
|
store <2 x i32> %call6, <2 x i32>* %res4I32
|
||||||
|
%call7 = call spir_func <4 x i32> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_sDv4_iS0_i(i32 32, <4 x i16> %a8, <4 x i32> %b, <4 x i32> %c8I32, i32 49)
|
||||||
|
store <4 x i32> %call7, <4 x i32>* %res8I32
|
||||||
|
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define spir_kernel void @test_v3(i32* %res1I32, i32* %res2I32, <2 x i32>* %res4I32, <4 x i32>* %res8I32,
|
||||||
|
i16 %a1, i16 %a2, <2 x i16> %a4, <4 x i16> %a8,
|
||||||
|
<4 x i32> %b,
|
||||||
|
i32 %c1I32, i32 %c2I32, <2 x i32> %c4I32, <4 x i32> %c8I32) !intel_reqd_sub_group_size !100 {
|
||||||
|
entry:
|
||||||
|
; CHECK-GENISA-LABEL: @test_v3(
|
||||||
|
; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.sub.group.dpas.i32.i32.i16.v4i32(i32 %c2I32, i16 %a2, <4 x i32> %b, i32 1, i32 4, i32 8, i32 2, i1 false)
|
||||||
|
; CHECK-GENISA: store i32 [[DPAS1]], i32* %res2I32
|
||||||
|
; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <2 x i32> @llvm.genx.GenISA.sub.group.dpas.v2i32.v2i32.v2i16.v4i32(<2 x i32> %c4I32, <2 x i16> %a4, <4 x i32> %b, i32 1, i32 4, i32 8, i32 4, i1 false)
|
||||||
|
; CHECK-GENISA: store <2 x i32> [[DPAS2]], <2 x i32>* %res4I32
|
||||||
|
; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <4 x i32> @llvm.genx.GenISA.sub.group.dpas.v4i32.v4i32.v4i16.v4i32(<4 x i32> %c8I32, <4 x i16> %a8, <4 x i32> %b, i32 1, i32 4, i32 8, i32 8, i1 false)
|
||||||
|
; CHECK-GENISA: store <4 x i32> [[DPAS3]], <4 x i32>* %res8I32
|
||||||
|
|
||||||
|
; CHECK-VISAASM-DAG: dpas.s8.u8.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=d num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=d num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=w num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: dpas.s8.u8.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=d num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=d num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=w num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: dpas.s8.u8.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=w num_elts=128
|
||||||
|
|
||||||
|
%call9 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv4_iii(i32 32, i16 %a2, <4 x i32> %b, i32 %c2I32, i32 50)
|
||||||
|
store i32 %call9, i32* %res2I32
|
||||||
|
%call10 = call spir_func <2 x i32> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_sDv4_iDv2_ii(i32 32, <2 x i16> %a4, <4 x i32> %b, <2 x i32> %c4I32, i32 50)
|
||||||
|
store <2 x i32> %call10, <2 x i32>* %res4I32
|
||||||
|
%call11 = call spir_func <4 x i32> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_sDv4_iS0_i(i32 32, <4 x i16> %a8, <4 x i32> %b, <4 x i32> %c8I32, i32 50)
|
||||||
|
store <4 x i32> %call11, <4 x i32>* %res8I32
|
||||||
|
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define spir_kernel void @test_v4(i32* %res1I32, i32* %res2I32, <2 x i32>* %res4I32, <4 x i32>* %res8I32,
|
||||||
|
i16 %a1, i16 %a2, <2 x i16> %a4, <4 x i16> %a8,
|
||||||
|
<4 x i32> %b,
|
||||||
|
i32 %c1I32, i32 %c2I32, <2 x i32> %c4I32, <4 x i32> %c8I32) !intel_reqd_sub_group_size !100 {
|
||||||
|
entry:
|
||||||
|
; CHECK-GENISA-LABEL: @test_v4(
|
||||||
|
; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.sub.group.dpas.i32.i32.i16.v4i32(i32 %c2I32, i16 %a2, <4 x i32> %b, i32 1, i32 1, i32 8, i32 2, i1 false)
|
||||||
|
; CHECK-GENISA: store i32 [[DPAS1]], i32* %res2I32
|
||||||
|
; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <2 x i32> @llvm.genx.GenISA.sub.group.dpas.v2i32.v2i32.v2i16.v4i32(<2 x i32> %c4I32, <2 x i16> %a4, <4 x i32> %b, i32 1, i32 1, i32 8, i32 4, i1 false)
|
||||||
|
; CHECK-GENISA: store <2 x i32> [[DPAS2]], <2 x i32>* %res4I32
|
||||||
|
; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <4 x i32> @llvm.genx.GenISA.sub.group.dpas.v4i32.v4i32.v4i16.v4i32(<4 x i32> %c8I32, <4 x i16> %a8, <4 x i32> %b, i32 1, i32 1, i32 8, i32 8, i1 false)
|
||||||
|
; CHECK-GENISA: store <4 x i32> [[DPAS3]], <4 x i32>* %res8I32
|
||||||
|
|
||||||
|
; CHECK-VISAASM-LABEL: .kernel "test_v4"
|
||||||
|
; CHECK-VISAASM-DAG: dpas.u8.u8.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=d num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=d num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=w num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: dpas.u8.u8.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=d num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=d num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=w num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: dpas.u8.u8.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=w num_elts=128
|
||||||
|
|
||||||
|
%call13 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv4_iii(i32 32, i16 %a2, <4 x i32> %b, i32 %c2I32, i32 48)
|
||||||
|
store i32 %call13, i32* %res2I32
|
||||||
|
%call14 = call spir_func <2 x i32> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_sDv4_iDv2_ii(i32 32, <2 x i16> %a4, <4 x i32> %b, <2 x i32> %c4I32, i32 48)
|
||||||
|
store <2 x i32> %call14, <2 x i32>* %res4I32
|
||||||
|
%call15 = call spir_func <4 x i32> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_sDv4_iS0_i(i32 32, <4 x i16> %a8, <4 x i32> %b, <4 x i32> %c8I32, i32 48)
|
||||||
|
store <4 x i32> %call15, <4 x i32>* %res8I32
|
||||||
|
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; 4-bit integer matrix sources (signed and unsigned), 32-bit integer accumulator:
|
||||||
|
define spir_kernel void @test_v5(i32* %res1I32, i32* %res2I32, <2 x i32>* %res4I32, <4 x i32>* %res8I32,
|
||||||
|
i16 %a1, i16 %a2, <2 x i16> %a4, <4 x i16> %a8,
|
||||||
|
<4 x i32> %b,
|
||||||
|
i32 %c1I32, i32 %c2I32, <2 x i32> %c4I32, <4 x i32> %c8I32) !intel_reqd_sub_group_size !100 {
|
||||||
|
entry:
|
||||||
|
; CHECK-GENISA-LABEL: @test_v5(
|
||||||
|
; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.sub.group.dpas.i32.i32.i16.v4i32(i32 %c2I32, i16 %a2, <4 x i32> %b, i32 5, i32 5, i32 8, i32 2, i1 false)
|
||||||
|
; CHECK-GENISA: store i32 [[DPAS1]], i32* %res2I32
|
||||||
|
; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <2 x i32> @llvm.genx.GenISA.sub.group.dpas.v2i32.v2i32.v2i16.v4i32(<2 x i32> %c4I32, <2 x i16> %a4, <4 x i32> %b, i32 5, i32 5, i32 8, i32 4, i1 false)
|
||||||
|
; CHECK-GENISA: store <2 x i32> [[DPAS2]], <2 x i32>* %res4I32
|
||||||
|
; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <4 x i32> @llvm.genx.GenISA.sub.group.dpas.v4i32.v4i32.v4i16.v4i32(<4 x i32> %c8I32, <4 x i16> %a8, <4 x i32> %b, i32 5, i32 5, i32 8, i32 8, i1 false)
|
||||||
|
; CHECK-GENISA: store <4 x i32> [[DPAS3]], <4 x i32>* %res8I32
|
||||||
|
|
||||||
|
; CHECK-VISAASM-LABEL: .kernel "test_v5"
|
||||||
|
; CHECK-VISAASM-DAG: dpas.s4.s4.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=d num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=d num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=w num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: dpas.s4.s4.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=d num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=d num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=w num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: dpas.s4.s4.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=w num_elts=128
|
||||||
|
|
||||||
|
%call17 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv4_iii(i32 64, i16 %a2, <4 x i32> %b, i32 %c2I32, i32 195)
|
||||||
|
store i32 %call17, i32* %res2I32
|
||||||
|
%call18 = call spir_func <2 x i32> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_sDv4_iDv2_ii(i32 64, <2 x i16> %a4, <4 x i32> %b, <2 x i32> %c4I32, i32 195)
|
||||||
|
store <2 x i32> %call18, <2 x i32>* %res4I32
|
||||||
|
%call19 = call spir_func <4 x i32> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_sDv4_iS0_i(i32 64, <4 x i16> %a8, <4 x i32> %b, <4 x i32> %c8I32, i32 195)
|
||||||
|
store <4 x i32> %call19, <4 x i32>* %res8I32
|
||||||
|
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define spir_kernel void @test_v6(i32* %res1I32, i32* %res2I32, <2 x i32>* %res4I32, <4 x i32>* %res8I32,
|
||||||
|
i16 %a1, i16 %a2, <2 x i16> %a4, <4 x i16> %a8,
|
||||||
|
<4 x i32> %b,
|
||||||
|
i32 %c1I32, i32 %c2I32, <2 x i32> %c4I32, <4 x i32> %c8I32) !intel_reqd_sub_group_size !100 {
|
||||||
|
entry:
|
||||||
|
; CHECK-GENISA-LABEL: @test_v6(
|
||||||
|
; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.sub.group.dpas.i32.i32.i16.v4i32(i32 %c2I32, i16 %a2, <4 x i32> %b, i32 5, i32 2, i32 8, i32 2, i1 false)
|
||||||
|
; CHECK-GENISA: store i32 [[DPAS1]], i32* %res2I32
|
||||||
|
; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <2 x i32> @llvm.genx.GenISA.sub.group.dpas.v2i32.v2i32.v2i16.v4i32(<2 x i32> %c4I32, <2 x i16> %a4, <4 x i32> %b, i32 5, i32 2, i32 8, i32 4, i1 false)
|
||||||
|
; CHECK-GENISA: store <2 x i32> [[DPAS2]], <2 x i32>* %res4I32
|
||||||
|
; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <4 x i32> @llvm.genx.GenISA.sub.group.dpas.v4i32.v4i32.v4i16.v4i32(<4 x i32> %c8I32, <4 x i16> %a8, <4 x i32> %b, i32 5, i32 2, i32 8, i32 8, i1 false)
|
||||||
|
; CHECK-GENISA: store <4 x i32> [[DPAS3]], <4 x i32>* %res8I32
|
||||||
|
|
||||||
|
; CHECK-VISAASM-LABEL: .kernel "test_v6"
|
||||||
|
; CHECK-VISAASM-DAG: dpas.u4.s4.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=d num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=d num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=w num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: dpas.u4.s4.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=d num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=d num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=w num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: dpas.u4.s4.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=w num_elts=128
|
||||||
|
|
||||||
|
%call21 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv4_iii(i32 64, i16 %a2, <4 x i32> %b, i32 %c2I32, i32 193)
|
||||||
|
store i32 %call21, i32* %res2I32
|
||||||
|
%call22 = call spir_func <2 x i32> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_sDv4_iDv2_ii(i32 64, <2 x i16> %a4, <4 x i32> %b, <2 x i32> %c4I32, i32 193)
|
||||||
|
store <2 x i32> %call22, <2 x i32>* %res4I32
|
||||||
|
%call23 = call spir_func <4 x i32> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_sDv4_iS0_i(i32 64, <4 x i16> %a8, <4 x i32> %b, <4 x i32> %c8I32, i32 193)
|
||||||
|
store <4 x i32> %call23, <4 x i32>* %res8I32
|
||||||
|
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define spir_kernel void @test_v7(i32* %res1I32, i32* %res2I32, <2 x i32>* %res4I32, <4 x i32>* %res8I32,
|
||||||
|
i16 %a1, i16 %a2, <2 x i16> %a4, <4 x i16> %a8,
|
||||||
|
<4 x i32> %b,
|
||||||
|
i32 %c1I32, i32 %c2I32, <2 x i32> %c4I32, <4 x i32> %c8I32) !intel_reqd_sub_group_size !100 {
|
||||||
|
entry:
|
||||||
|
; CHECK-GENISA-LABEL: @test_v7(
|
||||||
|
; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.sub.group.dpas.i32.i32.i16.v4i32(i32 %c2I32, i16 %a2, <4 x i32> %b, i32 2, i32 5, i32 8, i32 2, i1 false)
|
||||||
|
; CHECK-GENISA: store i32 [[DPAS1]], i32* %res2I32
|
||||||
|
; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <2 x i32> @llvm.genx.GenISA.sub.group.dpas.v2i32.v2i32.v2i16.v4i32(<2 x i32> %c4I32, <2 x i16> %a4, <4 x i32> %b, i32 2, i32 5, i32 8, i32 4, i1 false)
|
||||||
|
; CHECK-GENISA: store <2 x i32> [[DPAS2]], <2 x i32>* %res4I32
|
||||||
|
; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <4 x i32> @llvm.genx.GenISA.sub.group.dpas.v4i32.v4i32.v4i16.v4i32(<4 x i32> %c8I32, <4 x i16> %a8, <4 x i32> %b, i32 2, i32 5, i32 8, i32 8, i1 false)
|
||||||
|
; CHECK-GENISA: store <4 x i32> [[DPAS3]], <4 x i32>* %res8I32
|
||||||
|
|
||||||
|
; CHECK-VISAASM-LABEL: .kernel "test_v7"
|
||||||
|
; CHECK-VISAASM-DAG: dpas.s4.u4.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=d num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=d num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=w num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: dpas.s4.u4.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=d num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=d num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=w num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: dpas.s4.u4.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=w num_elts=128
|
||||||
|
|
||||||
|
%call25 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv4_iii(i32 64, i16 %a2, <4 x i32> %b, i32 %c2I32, i32 194)
|
||||||
|
store i32 %call25, i32* %res2I32
|
||||||
|
%call26 = call spir_func <2 x i32> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_sDv4_iDv2_ii(i32 64, <2 x i16> %a4, <4 x i32> %b, <2 x i32> %c4I32, i32 194)
|
||||||
|
store <2 x i32> %call26, <2 x i32>* %res4I32
|
||||||
|
%call27 = call spir_func <4 x i32> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_sDv4_iS0_i(i32 64, <4 x i16> %a8, <4 x i32> %b, <4 x i32> %c8I32, i32 194)
|
||||||
|
store <4 x i32> %call27, <4 x i32>* %res8I32
|
||||||
|
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; int4 matrix sources, fp32 accumulator:
|
||||||
|
define spir_kernel void @test_v8(i32* %res1I32, i32* %res2I32, <2 x i32>* %res4I32, <4 x i32>* %res8I32,
|
||||||
|
i16 %a1, i16 %a2, <2 x i16> %a4, <4 x i16> %a8,
|
||||||
|
<4 x i32> %b,
|
||||||
|
i32 %c1I32, i32 %c2I32, <2 x i32> %c4I32, <4 x i32> %c8I32) !intel_reqd_sub_group_size !100 {
|
||||||
|
entry:
|
||||||
|
; CHECK-GENISA-LABEL: @test_v8(
|
||||||
|
; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call i32 @llvm.genx.GenISA.sub.group.dpas.i32.i32.i16.v4i32(i32 %c2I32, i16 %a2, <4 x i32> %b, i32 2, i32 2, i32 8, i32 2, i1 false)
|
||||||
|
; CHECK-GENISA: store i32 [[DPAS1]], i32* %res2I32
|
||||||
|
; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <2 x i32> @llvm.genx.GenISA.sub.group.dpas.v2i32.v2i32.v2i16.v4i32(<2 x i32> %c4I32, <2 x i16> %a4, <4 x i32> %b, i32 2, i32 2, i32 8, i32 4, i1 false)
|
||||||
|
; CHECK-GENISA: store <2 x i32> [[DPAS2]], <2 x i32>* %res4I32
|
||||||
|
; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <4 x i32> @llvm.genx.GenISA.sub.group.dpas.v4i32.v4i32.v4i16.v4i32(<4 x i32> %c8I32, <4 x i16> %a8, <4 x i32> %b, i32 2, i32 2, i32 8, i32 8, i1 false)
|
||||||
|
; CHECK-GENISA: store <4 x i32> [[DPAS3]], <4 x i32>* %res8I32
|
||||||
|
|
||||||
|
; CHECK-VISAASM-LABEL: .kernel "test_v8"
|
||||||
|
; CHECK-VISAASM-DAG: dpas.u4.u4.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=d num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=d num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=w num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: dpas.u4.u4.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=d num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=d num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=w num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: dpas.u4.u4.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=w num_elts=128
|
||||||
|
|
||||||
|
%call29 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv4_iii(i32 64, i16 %a2, <4 x i32> %b, i32 %c2I32, i32 192)
|
||||||
|
store i32 %call29, i32* %res2I32
|
||||||
|
%call30 = call spir_func <2 x i32> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_sDv4_iDv2_ii(i32 64, <2 x i16> %a4, <4 x i32> %b, <2 x i32> %c4I32, i32 192)
|
||||||
|
store <2 x i32> %call30, <2 x i32>* %res4I32
|
||||||
|
%call31 = call spir_func <4 x i32> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_sDv4_iS0_i(i32 64, <4 x i16> %a8, <4 x i32> %b, <4 x i32> %c8I32, i32 192)
|
||||||
|
store <4 x i32> %call31, <4 x i32>* %res8I32
|
||||||
|
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; fp16 matrix sources, fp32 accumulator:
|
||||||
|
define spir_kernel void @test_v9(float* %resF, float* %res2, <2 x float>* %res4, <4 x float>* %res8,
|
||||||
|
i16 %a1, i16 %a2, <2 x i16> %a4, <4 x i16> %a8,
|
||||||
|
<4 x i32> %b,
|
||||||
|
float %cF, float %c2F, <2 x float> %c4F, <4 x float> %c8F) !intel_reqd_sub_group_size !100 {
|
||||||
|
entry:
|
||||||
|
; CHECK-GENISA-LABEL: @test_v9(
|
||||||
|
; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call float @llvm.genx.GenISA.sub.group.dpas.f32.f32.i16.v4i32(float %c2F, i16 %a2, <4 x i32> %b, i32 12, i32 12, i32 8, i32 2, i1 false)
|
||||||
|
; CHECK-GENISA: store float [[DPAS1]], float* %res2
|
||||||
|
; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <2 x float> @llvm.genx.GenISA.sub.group.dpas.v2f32.v2f32.v2i16.v4i32(<2 x float> %c4F, <2 x i16> %a4, <4 x i32> %b, i32 12, i32 12, i32 8, i32 4, i1 false)
|
||||||
|
; CHECK-GENISA: store <2 x float> [[DPAS2]], <2 x float>* %res4
|
||||||
|
; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <4 x float> @llvm.genx.GenISA.sub.group.dpas.v4f32.v4f32.v4i16.v4i32(<4 x float> %c8F, <4 x i16> %a8, <4 x i32> %b, i32 12, i32 12, i32 8, i32 8, i1 false)
|
||||||
|
; CHECK-GENISA: store <4 x float> [[DPAS3]], <4 x float>* %res8
|
||||||
|
|
||||||
|
; CHECK-VISAASM-LABEL: .kernel "test_v9"
|
||||||
|
; CHECK-VISAASM-DAG: dpas.hf.hf.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=f num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=f num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=w num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: dpas.hf.hf.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=f num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=f num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=w num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: dpas.hf.hf.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=f num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=f num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=w num_elts=128
|
||||||
|
|
||||||
|
%call33 = call spir_func float @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv4_ifi(i32 16, i16 %a2, <4 x i32> %b, float %c2F, i32 3072)
|
||||||
|
store float %call33, float* %res2
|
||||||
|
%call34 = call spir_func <2 x float> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_sDv4_iDv2_fi(i32 16, <2 x i16> %a4, <4 x i32> %b, <2 x float> %c4F, i32 3072)
|
||||||
|
store <2 x float> %call34, <2 x float>* %res4
|
||||||
|
%call35 = call spir_func <4 x float> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_sDv4_iDv4_fi(i32 16, <4 x i16> %a8, <4 x i32> %b, <4 x float> %c8F, i32 3072)
|
||||||
|
store <4 x float> %call35, <4 x float>* %res8
|
||||||
|
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; bf16 matrix sources, fp32 accumulator:
|
||||||
|
define spir_kernel void @test_v10(float* %resF, float* %res2, <2 x float>* %res4, <4 x float>* %res8,
|
||||||
|
i16 %a1, i16 %a2, <2 x i16> %a4, <4 x i16> %a8,
|
||||||
|
<4 x i32> %b,
|
||||||
|
float %cF, float %c2F, <2 x float> %c4F, <4 x float> %c8F) !intel_reqd_sub_group_size !100 {
|
||||||
|
entry:
|
||||||
|
; CHECK-GENISA-LABEL: @test_v10(
|
||||||
|
; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call float @llvm.genx.GenISA.sub.group.dpas.f32.f32.i16.v4i32(float %c2F, i16 %a2, <4 x i32> %b, i32 11, i32 11, i32 8, i32 2, i1 false)
|
||||||
|
; CHECK-GENISA: store float [[DPAS1]], float* %res2
|
||||||
|
; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <2 x float> @llvm.genx.GenISA.sub.group.dpas.v2f32.v2f32.v2i16.v4i32(<2 x float> %c4F, <2 x i16> %a4, <4 x i32> %b, i32 11, i32 11, i32 8, i32 4, i1 false)
|
||||||
|
; CHECK-GENISA: store <2 x float> [[DPAS2]], <2 x float>* %res4
|
||||||
|
; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <4 x float> @llvm.genx.GenISA.sub.group.dpas.v4f32.v4f32.v4i16.v4i32(<4 x float> %c8F, <4 x i16> %a8, <4 x i32> %b, i32 11, i32 11, i32 8, i32 8, i1 false)
|
||||||
|
; CHECK-GENISA: store <4 x float> [[DPAS3]], <4 x float>* %res8
|
||||||
|
|
||||||
|
; CHECK-VISAASM-LABEL: .kernel "test_v10"
|
||||||
|
; CHECK-VISAASM-DAG: dpas.bf.bf.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=f num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=f num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=w num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: dpas.bf.bf.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=f num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=f num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=w num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: dpas.bf.bf.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=f num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=f num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=w num_elts=128
|
||||||
|
|
||||||
|
%call37 = call spir_func float @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv4_ifi(i32 16, i16 %a2, <4 x i32> %b, float %c2F, i32 12288)
|
||||||
|
store float %call37, float* %res2
|
||||||
|
%call38 = call spir_func <2 x float> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_sDv4_iDv2_fi(i32 16, <2 x i16> %a4, <4 x i32> %b, <2 x float> %c4F, i32 12288)
|
||||||
|
store <2 x float> %call38, <2 x float>* %res4
|
||||||
|
%call39 = call spir_func <4 x float> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_sDv4_iDv4_fi(i32 16, <4 x i16> %a8, <4 x i32> %b, <4 x float> %c8F, i32 12288)
|
||||||
|
store <4 x float> %call39, <4 x float>* %res8
|
||||||
|
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; fp16 matrix sources, fp16 accumulator:
|
||||||
|
define spir_kernel void @test_v11(half* %res, half* %res2, <2 x half>* %res4, <4 x half>* %res8,
|
||||||
|
i16 %a1, i16 %a2, <2 x i16> %a4, <4 x i16> %a8,
|
||||||
|
<4 x i32> %b,
|
||||||
|
half %c, half %c2, <2 x half> %c4, <4 x half> %c8) !intel_reqd_sub_group_size !100 {
|
||||||
|
entry:
|
||||||
|
; CHECK-GENISA-LABEL: @test_v11(
|
||||||
|
; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call half @llvm.genx.GenISA.sub.group.dpas.f16.f16.i16.v4i32(half %c2, i16 %a2, <4 x i32> %b, i32 12, i32 12, i32 8, i32 2, i1 false)
|
||||||
|
; CHECK-GENISA: store half [[DPAS1]], half* %res2
|
||||||
|
; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <2 x half> @llvm.genx.GenISA.sub.group.dpas.v2f16.v2f16.v2i16.v4i32(<2 x half> %c4, <2 x i16> %a4, <4 x i32> %b, i32 12, i32 12, i32 8, i32 4, i1 false)
|
||||||
|
; CHECK-GENISA: store <2 x half> [[DPAS2]], <2 x half>* %res4
|
||||||
|
; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <4 x half> @llvm.genx.GenISA.sub.group.dpas.v4f16.v4f16.v4i16.v4i32(<4 x half> %c8, <4 x i16> %a8, <4 x i32> %b, i32 12, i32 12, i32 8, i32 8, i1 false)
|
||||||
|
; CHECK-GENISA: store <4 x half> [[DPAS3]], <4 x half>* %res8
|
||||||
|
|
||||||
|
; CHECK-VISAASM-LABEL: .kernel "test_v11"
|
||||||
|
; CHECK-VISAASM-DAG: dpas.hf.hf.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=hf num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=hf num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=w num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: dpas.hf.hf.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=hf num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=hf num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=w num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: dpas.hf.hf.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=hf num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=hf num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=w num_elts=128
|
||||||
|
|
||||||
|
%call33 = call spir_func half @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv4_iDhi(i32 16, i16 %a2, <4 x i32> %b, half %c2, i32 3072)
|
||||||
|
store half %call33, half* %res2
|
||||||
|
%call34 = call spir_func <2 x half> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_sDv4_iDv2_Dhi(i32 16, <2 x i16> %a4, <4 x i32> %b, <2 x half> %c4, i32 3072)
|
||||||
|
store <2 x half> %call34, <2 x half>* %res4
|
||||||
|
%call35 = call spir_func <4 x half> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_sDv4_iDv4_Dhi(i32 16, <4 x i16> %a8, <4 x i32> %b, <4 x half> %c8, i32 3072)
|
||||||
|
store <4 x half> %call35, <4 x half>* %res8
|
||||||
|
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; bf16 matrix sources, bf16 accumulator:
|
||||||
|
define spir_kernel void @test_v12(i16* %res, i16* %res2, <2 x i16>* %res4, <4 x i16>* %res8,
|
||||||
|
i16 %a1, i16 %a2, <2 x i16> %a4, <4 x i16> %a8,
|
||||||
|
<4 x i32> %b,
|
||||||
|
i16 %cF, i16 %c2F, <2 x i16> %c4F, <4 x i16> %c8F) !intel_reqd_sub_group_size !100 {
|
||||||
|
entry:
|
||||||
|
; CHECK-GENISA-LABEL: @test_v12(
|
||||||
|
; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call i16 @llvm.genx.GenISA.sub.group.dpas.i16.i16.i16.v4i32(i16 %c2F, i16 %a2, <4 x i32> %b, i32 11, i32 11, i32 8, i32 2, i1 false)
|
||||||
|
; CHECK-GENISA: store i16 [[DPAS1]], i16* %res2
|
||||||
|
; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <2 x i16> @llvm.genx.GenISA.sub.group.dpas.v2i16.v2i16.v2i16.v4i32(<2 x i16> %c4F, <2 x i16> %a4, <4 x i32> %b, i32 11, i32 11, i32 8, i32 4, i1 false)
|
||||||
|
; CHECK-GENISA: store <2 x i16> [[DPAS2]], <2 x i16>* %res4
|
||||||
|
; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <4 x i16> @llvm.genx.GenISA.sub.group.dpas.v4i16.v4i16.v4i16.v4i32(<4 x i16> %c8F, <4 x i16> %a8, <4 x i32> %b, i32 11, i32 11, i32 8, i32 8, i1 false)
|
||||||
|
; CHECK-GENISA: store <4 x i16> [[DPAS3]], <4 x i16>* %res8
|
||||||
|
|
||||||
|
; CHECK-VISAASM-LABEL: .kernel "test_v12"
|
||||||
|
; CHECK-VISAASM-DAG: dpas.bf.bf.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=bf num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=bf num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=16 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=w num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: dpas.bf.bf.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=bf num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=bf num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=w num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: dpas.bf.bf.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=bf num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=bf num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=w num_elts=128
|
||||||
|
|
||||||
|
%call37 = call spir_func i16 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv4_isi(i32 16, i16 %a2, <4 x i32> %b, i16 %c2F, i32 12300)
|
||||||
|
store i16 %call37, i16* %res2
|
||||||
|
%call38 = call spir_func <2 x i16> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_sDv4_iS_i(i32 16, <2 x i16> %a4, <4 x i32> %b, <2 x i16> %c4F, i32 12300)
|
||||||
|
store <2 x i16> %call38, <2 x i16>* %res4
|
||||||
|
%call39 = call spir_func <4 x i16> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_sDv4_iS_i(i32 16, <4 x i16> %a8, <4 x i32> %b, <4 x i16> %c8F, i32 12300)
|
||||||
|
store <4 x i16> %call39, <4 x i16>* %res8
|
||||||
|
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; tf32 matrix sources, fp32 accumulator:
|
||||||
|
define spir_kernel void @test_v13(float* %resF, float* %res2, <2 x float>* %res4, <4 x float>* %res8,
|
||||||
|
float %a1, float %a2, float %a4, <2 x float> %a8,
|
||||||
|
<4 x float> %b,
|
||||||
|
float %cF, float %c2F, <2 x float> %c4F, <4 x float> %c8F) !intel_reqd_sub_group_size !100 {
|
||||||
|
entry:
|
||||||
|
; CHECK-GENISA-LABEL: @test_v13(
|
||||||
|
; CHECK-GENISA: [[DPAS1:%[A-z0-9]*]] = call float @llvm.genx.GenISA.sub.group.dpas.f32.f32.f32.v4i32(float %c2F, float %a2, <4 x i32> %{{.*}}, i32 10, i32 10, i32 8, i32 2, i1 false)
|
||||||
|
; CHECK-GENISA: store float [[DPAS1]], float* %res2
|
||||||
|
; CHECK-GENISA: [[DPAS2:%[A-z0-9]*]] = call <2 x float> @llvm.genx.GenISA.sub.group.dpas.v2f32.v2f32.f32.v4i32(<2 x float> %c4F, float %a4, <4 x i32> %{{.*}}, i32 10, i32 10, i32 8, i32 4, i1 false)
|
||||||
|
; CHECK-GENISA: store <2 x float> [[DPAS2]], <2 x float>* %res4
|
||||||
|
; CHECK-GENISA: [[DPAS3:%[A-z0-9]*]] = call <4 x float> @llvm.genx.GenISA.sub.group.dpas.v4f32.v4f32.v2f32.v4i32(<4 x float> %c8F, <2 x float> %a8, <4 x i32> %{{.*}}, i32 10, i32 10, i32 8, i32 8, i1 false)
|
||||||
|
; CHECK-GENISA: store <4 x float> [[DPAS3]], <4 x float>* %res8
|
||||||
|
|
||||||
|
; CHECK-VISAASM-LABEL: .kernel "test_v13"
|
||||||
|
; CHECK-VISAASM-DAG: dpas.tf32.tf32.8.2 (M1, 16) [[D2:[A-z0-9_]*]].0 [[C2:[A-z0-9_]*]].0 [[B2:[A-z0-9_]*]].0 [[A2:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D2]] v_type=G type=f num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C2]] v_type=G type=f num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B2]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A2_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A2_ALIAS]] v_type=G type=f num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: dpas.tf32.tf32.8.4 (M1, 16) [[D4:[A-z0-9_]*]].0 [[C4:[A-z0-9_]*]].0 [[B4:[A-z0-9_]*]].0 [[A4:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D4]] v_type=G type=f num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C4]] v_type=G type=f num_elts=64
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B4]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4]] v_type=G type=ud num_elts=32 align=wordx32 alias=<[[A4_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A4_ALIAS]] v_type=G type=f num_elts=32
|
||||||
|
; CHECK-VISAASM-DAG: dpas.tf32.tf32.8.8 (M1, 16) [[D8:[A-z0-9_]*]].0 [[C8:[A-z0-9_]*]].0 [[B8:[A-z0-9_]*]].0 [[A8:[A-z0-9_]*]](0,0)
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[D8]] v_type=G type=f num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[C8]] v_type=G type=f num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[B8]] v_type=G type=d num_elts=128
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8]] v_type=G type=ud num_elts=64 align=wordx32 alias=<[[A8_ALIAS:[A-z0-9_]*]], 0>
|
||||||
|
; CHECK-VISAASM-DAG: .decl [[A8_ALIAS]] v_type=G type=f num_elts=64
|
||||||
|
|
||||||
|
%call33 = call spir_func float @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELifDv4_ffi(i32 8, float %a2, <4 x float> %b, float %c2F, i32 768)
|
||||||
|
store float %call33, float* %res2
|
||||||
|
%call34 = call spir_func <2 x float> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_fDv4_fS_i(i32 8, float %a4, <4 x float> %b, <2 x float> %c4F, i32 768)
|
||||||
|
store <2 x float> %call34, <2 x float>* %res4
|
||||||
|
%call35 = call spir_func <4 x float> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_fS_S_i(i32 8, <2 x float> %a8, <4 x float> %b, <4 x float> %c8F, i32 768)
|
||||||
|
store <4 x float> %call35, <4 x float>* %res8
|
||||||
|
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
!100 = !{i32 32}
|
||||||
Reference in New Issue
Block a user