mirror of
https://github.com/intel/llvm.git
synced 2026-01-27 06:06:34 +08:00
[AMDGPU] Split dot2-insts feature
Split out some of the instructions predicated on the dot2-insts target feature into a new dot7-insts, in preparation for subtargets that have some but not all of these instructions. NFCI. Differential Revision: https://reviews.llvm.org/D98717
This commit is contained in:
@@ -193,13 +193,13 @@ TARGET_BUILTIN(__builtin_amdgcn_fmed3h, "hhhh", "nc", "gfx9-insts")
|
||||
// Deep learning builtins.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
TARGET_BUILTIN(__builtin_amdgcn_fdot2, "fV2hV2hfIb", "nc", "dot2-insts")
|
||||
TARGET_BUILTIN(__builtin_amdgcn_fdot2, "fV2hV2hfIb", "nc", "dot7-insts")
|
||||
TARGET_BUILTIN(__builtin_amdgcn_sdot2, "SiV2SsV2SsSiIb", "nc", "dot2-insts")
|
||||
TARGET_BUILTIN(__builtin_amdgcn_udot2, "UiV2UsV2UsUiIb", "nc", "dot2-insts")
|
||||
TARGET_BUILTIN(__builtin_amdgcn_sdot4, "SiSiSiSiIb", "nc", "dot1-insts")
|
||||
TARGET_BUILTIN(__builtin_amdgcn_udot4, "UiUiUiUiIb", "nc", "dot2-insts")
|
||||
TARGET_BUILTIN(__builtin_amdgcn_udot4, "UiUiUiUiIb", "nc", "dot7-insts")
|
||||
TARGET_BUILTIN(__builtin_amdgcn_sdot8, "SiSiSiSiIb", "nc", "dot1-insts")
|
||||
TARGET_BUILTIN(__builtin_amdgcn_udot8, "UiUiUiUiIb", "nc", "dot2-insts")
|
||||
TARGET_BUILTIN(__builtin_amdgcn_udot8, "UiUiUiUiIb", "nc", "dot7-insts")
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// GFX10+ only builtins.
|
||||
|
||||
@@ -183,6 +183,7 @@ bool AMDGPUTargetInfo::initFeatureMap(
|
||||
Features["dot2-insts"] = true;
|
||||
Features["dot5-insts"] = true;
|
||||
Features["dot6-insts"] = true;
|
||||
Features["dot7-insts"] = true;
|
||||
Features["dl-insts"] = true;
|
||||
Features["flat-address-space"] = true;
|
||||
Features["16-bit-insts"] = true;
|
||||
@@ -200,6 +201,7 @@ bool AMDGPUTargetInfo::initFeatureMap(
|
||||
Features["dot2-insts"] = true;
|
||||
Features["dot5-insts"] = true;
|
||||
Features["dot6-insts"] = true;
|
||||
Features["dot7-insts"] = true;
|
||||
LLVM_FALLTHROUGH;
|
||||
case GK_GFX1010:
|
||||
Features["dl-insts"] = true;
|
||||
@@ -227,6 +229,7 @@ bool AMDGPUTargetInfo::initFeatureMap(
|
||||
Features["dl-insts"] = true;
|
||||
Features["dot1-insts"] = true;
|
||||
Features["dot2-insts"] = true;
|
||||
Features["dot7-insts"] = true;
|
||||
LLVM_FALLTHROUGH;
|
||||
case GK_GFX90C:
|
||||
case GK_GFX909:
|
||||
|
||||
@@ -50,17 +50,17 @@
|
||||
// GFX900: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
|
||||
// GFX902: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
|
||||
// GFX904: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
|
||||
// GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
|
||||
// GFX908: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst"
|
||||
// GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
|
||||
// GFX908: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst"
|
||||
// GFX909: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
|
||||
// GFX90A: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst"
|
||||
// GFX90A: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst"
|
||||
// GFX90C: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
|
||||
// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
|
||||
// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
|
||||
// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
|
||||
// GFX1030: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
|
||||
// GFX1031: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
|
||||
// GFX1032: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
|
||||
// GFX1033: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
|
||||
// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
|
||||
// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
|
||||
// GFX1030: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
|
||||
// GFX1031: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
|
||||
// GFX1032: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
|
||||
// GFX1033: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
|
||||
|
||||
kernel void test() {}
|
||||
|
||||
@@ -13,8 +13,8 @@ kernel void builtins_amdgcn_dl_insts_err(
|
||||
half2 v2hA, half2 v2hB, float fC,
|
||||
short2 v2ssA, short2 v2ssB, int siA, int siB, int siC,
|
||||
ushort2 v2usA, ushort2 v2usB, uint uiA, uint uiB, uint uiC) {
|
||||
fOut[0] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, false); // expected-error {{'__builtin_amdgcn_fdot2' needs target feature dot2-insts}}
|
||||
fOut[1] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, true); // expected-error {{'__builtin_amdgcn_fdot2' needs target feature dot2-insts}}
|
||||
fOut[0] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, false); // expected-error {{'__builtin_amdgcn_fdot2' needs target feature dot7-insts}}
|
||||
fOut[1] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, true); // expected-error {{'__builtin_amdgcn_fdot2' needs target feature dot7-insts}}
|
||||
|
||||
siOut[0] = __builtin_amdgcn_sdot2(v2ssA, v2ssB, siC, false); // expected-error {{'__builtin_amdgcn_sdot2' needs target feature dot2-insts}}
|
||||
siOut[1] = __builtin_amdgcn_sdot2(v2ssA, v2ssB, siC, true); // expected-error {{'__builtin_amdgcn_sdot2' needs target feature dot2-insts}}
|
||||
@@ -25,12 +25,12 @@ kernel void builtins_amdgcn_dl_insts_err(
|
||||
siOut[2] = __builtin_amdgcn_sdot4(siA, siB, siC, false); // expected-error {{'__builtin_amdgcn_sdot4' needs target feature dot1-insts}}
|
||||
siOut[3] = __builtin_amdgcn_sdot4(siA, siB, siC, true); // expected-error {{'__builtin_amdgcn_sdot4' needs target feature dot1-insts}}
|
||||
|
||||
uiOut[2] = __builtin_amdgcn_udot4(uiA, uiB, uiC, false); // expected-error {{'__builtin_amdgcn_udot4' needs target feature dot2-insts}}
|
||||
uiOut[3] = __builtin_amdgcn_udot4(uiA, uiB, uiC, true); // expected-error {{'__builtin_amdgcn_udot4' needs target feature dot2-insts}}
|
||||
uiOut[2] = __builtin_amdgcn_udot4(uiA, uiB, uiC, false); // expected-error {{'__builtin_amdgcn_udot4' needs target feature dot7-insts}}
|
||||
uiOut[3] = __builtin_amdgcn_udot4(uiA, uiB, uiC, true); // expected-error {{'__builtin_amdgcn_udot4' needs target feature dot7-insts}}
|
||||
|
||||
siOut[4] = __builtin_amdgcn_sdot8(siA, siB, siC, false); // expected-error {{'__builtin_amdgcn_sdot8' needs target feature dot1-insts}}
|
||||
siOut[5] = __builtin_amdgcn_sdot8(siA, siB, siC, true); // expected-error {{'__builtin_amdgcn_sdot8' needs target feature dot1-insts}}
|
||||
|
||||
uiOut[4] = __builtin_amdgcn_udot8(uiA, uiB, uiC, false); // expected-error {{'__builtin_amdgcn_udot8' needs target feature dot2-insts}}
|
||||
uiOut[5] = __builtin_amdgcn_udot8(uiA, uiB, uiC, true); // expected-error {{'__builtin_amdgcn_udot8' needs target feature dot2-insts}}
|
||||
uiOut[4] = __builtin_amdgcn_udot8(uiA, uiB, uiC, false); // expected-error {{'__builtin_amdgcn_udot8' needs target feature dot7-insts}}
|
||||
uiOut[5] = __builtin_amdgcn_udot8(uiA, uiB, uiC, true); // expected-error {{'__builtin_amdgcn_udot8' needs target feature dot7-insts}}
|
||||
}
|
||||
|
||||
@@ -480,7 +480,7 @@ def FeatureDot1Insts : SubtargetFeature<"dot1-insts",
|
||||
def FeatureDot2Insts : SubtargetFeature<"dot2-insts",
|
||||
"HasDot2Insts",
|
||||
"true",
|
||||
"Has v_dot2_f32_f16, v_dot2_i32_i16, v_dot2_u32_u16, v_dot4_u32_u8, v_dot8_u32_u4 instructions"
|
||||
"Has v_dot2_i32_i16, v_dot2_u32_u16 instructions"
|
||||
>;
|
||||
|
||||
def FeatureDot3Insts : SubtargetFeature<"dot3-insts",
|
||||
@@ -507,6 +507,12 @@ def FeatureDot6Insts : SubtargetFeature<"dot6-insts",
|
||||
"Has v_dot4c_i32_i8 instruction"
|
||||
>;
|
||||
|
||||
def FeatureDot7Insts : SubtargetFeature<"dot7-insts",
|
||||
"HasDot7Insts",
|
||||
"true",
|
||||
"Has v_dot2_f32_f16, v_dot4_u32_u8, v_dot8_u32_u4 instructions"
|
||||
>;
|
||||
|
||||
def FeatureMAIInsts : SubtargetFeature<"mai-insts",
|
||||
"HasMAIInsts",
|
||||
"true",
|
||||
@@ -902,6 +908,7 @@ def FeatureISAVersion9_0_6 : FeatureSet<
|
||||
FeatureDLInsts,
|
||||
FeatureDot1Insts,
|
||||
FeatureDot2Insts,
|
||||
FeatureDot7Insts,
|
||||
FeatureSupportsSRAMECC,
|
||||
FeatureImageGather4D16Bug]>;
|
||||
|
||||
@@ -920,6 +927,7 @@ def FeatureISAVersion9_0_8 : FeatureSet<
|
||||
FeatureDot4Insts,
|
||||
FeatureDot5Insts,
|
||||
FeatureDot6Insts,
|
||||
FeatureDot7Insts,
|
||||
FeatureMAIInsts,
|
||||
FeaturePkFmacF16Inst,
|
||||
FeatureAtomicFaddInsts,
|
||||
@@ -948,6 +956,7 @@ def FeatureISAVersion9_0_A : FeatureSet<
|
||||
FeatureDot4Insts,
|
||||
FeatureDot5Insts,
|
||||
FeatureDot6Insts,
|
||||
FeatureDot7Insts,
|
||||
Feature64BitDPP,
|
||||
FeaturePackedFP32Ops,
|
||||
FeatureMAIInsts,
|
||||
@@ -1008,6 +1017,7 @@ def FeatureISAVersion10_1_1 : FeatureSet<
|
||||
FeatureDot2Insts,
|
||||
FeatureDot5Insts,
|
||||
FeatureDot6Insts,
|
||||
FeatureDot7Insts,
|
||||
FeatureNSAEncoding,
|
||||
FeatureWavefrontSize32,
|
||||
FeatureScalarStores,
|
||||
@@ -1028,6 +1038,7 @@ def FeatureISAVersion10_1_2 : FeatureSet<
|
||||
FeatureDot2Insts,
|
||||
FeatureDot5Insts,
|
||||
FeatureDot6Insts,
|
||||
FeatureDot7Insts,
|
||||
FeatureNSAEncoding,
|
||||
FeatureWavefrontSize32,
|
||||
FeatureScalarStores,
|
||||
@@ -1049,6 +1060,7 @@ def FeatureISAVersion10_3_0 : FeatureSet<
|
||||
FeatureDot2Insts,
|
||||
FeatureDot5Insts,
|
||||
FeatureDot6Insts,
|
||||
FeatureDot7Insts,
|
||||
FeatureNSAEncoding,
|
||||
FeatureWavefrontSize32,
|
||||
FeatureShaderCyclesRegister]>;
|
||||
@@ -1373,6 +1385,9 @@ def HasDot5Insts : Predicate<"Subtarget->hasDot5Insts()">,
|
||||
def HasDot6Insts : Predicate<"Subtarget->hasDot6Insts()">,
|
||||
AssemblerPredicate<(all_of FeatureDot6Insts)>;
|
||||
|
||||
def HasDot7Insts : Predicate<"Subtarget->hasDot7Insts()">,
|
||||
AssemblerPredicate<(all_of FeatureDot7Insts)>;
|
||||
|
||||
def HasGetWaveIdInst : Predicate<"Subtarget->hasGetWaveIdInst()">,
|
||||
AssemblerPredicate<(all_of FeatureGetWaveIdInst)>;
|
||||
|
||||
|
||||
@@ -267,6 +267,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
|
||||
HasDot4Insts(false),
|
||||
HasDot5Insts(false),
|
||||
HasDot6Insts(false),
|
||||
HasDot7Insts(false),
|
||||
HasMAIInsts(false),
|
||||
HasPkFmacF16Inst(false),
|
||||
HasAtomicFaddInsts(false),
|
||||
|
||||
@@ -150,6 +150,7 @@ protected:
|
||||
bool HasDot4Insts;
|
||||
bool HasDot5Insts;
|
||||
bool HasDot6Insts;
|
||||
bool HasDot7Insts;
|
||||
bool HasMAIInsts;
|
||||
bool HasPkFmacF16Inst;
|
||||
bool HasAtomicFaddInsts;
|
||||
@@ -687,6 +688,10 @@ public:
|
||||
return HasDot6Insts;
|
||||
}
|
||||
|
||||
bool hasDot7Insts() const {
|
||||
return HasDot7Insts;
|
||||
}
|
||||
|
||||
bool hasMAIInsts() const {
|
||||
return HasMAIInsts;
|
||||
}
|
||||
|
||||
@@ -10486,7 +10486,7 @@ SDValue SITargetLowering::performFMACombine(SDNode *N,
|
||||
EVT VT = N->getValueType(0);
|
||||
SDLoc SL(N);
|
||||
|
||||
if (!Subtarget->hasDot2Insts() || VT != MVT::f32)
|
||||
if (!Subtarget->hasDot7Insts() || VT != MVT::f32)
|
||||
return SDValue();
|
||||
|
||||
// FMA((F32)S0.x, (F32)S1. x, FMA((F32)S0.y, (F32)S1.y, (F32)z)) ->
|
||||
|
||||
@@ -287,19 +287,24 @@ class SDot2Pat<Instruction Inst> : GCNPat <
|
||||
let IsDOT = 1 in {
|
||||
let SubtargetPredicate = HasDot2Insts in {
|
||||
|
||||
def V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16",
|
||||
VOP3_Profile<VOP_F32_V2F16_V2F16_F32>,
|
||||
AMDGPUfdot2, 1/*ExplicitClamp*/>;
|
||||
def V_DOT2_I32_I16 : VOP3PInst<"v_dot2_i32_i16",
|
||||
VOP3_Profile<VOP_I32_V2I16_V2I16_I32>, int_amdgcn_sdot2, 1>;
|
||||
def V_DOT2_U32_U16 : VOP3PInst<"v_dot2_u32_u16",
|
||||
VOP3_Profile<VOP_I32_V2I16_V2I16_I32>, int_amdgcn_udot2, 1>;
|
||||
|
||||
} // End SubtargetPredicate = HasDot2Insts
|
||||
|
||||
let SubtargetPredicate = HasDot7Insts in {
|
||||
|
||||
def V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16",
|
||||
VOP3_Profile<VOP_F32_V2F16_V2F16_F32>,
|
||||
AMDGPUfdot2, 1/*ExplicitClamp*/>;
|
||||
def V_DOT4_U32_U8 : VOP3PInst<"v_dot4_u32_u8",
|
||||
VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot4, 1>;
|
||||
def V_DOT8_U32_U4 : VOP3PInst<"v_dot8_u32_u4",
|
||||
VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot8, 1>;
|
||||
|
||||
} // End SubtargetPredicate = HasDot2Insts
|
||||
} // End SubtargetPredicate = HasDot7Insts
|
||||
|
||||
let SubtargetPredicate = HasDot1Insts in {
|
||||
|
||||
@@ -564,13 +569,18 @@ defm V_FMA_MIXHI_F16 : VOP3P_Real_vi <0x22>;
|
||||
|
||||
let SubtargetPredicate = HasDot2Insts in {
|
||||
|
||||
defm V_DOT2_F32_F16 : VOP3P_Real_vi <0x23>;
|
||||
defm V_DOT2_I32_I16 : VOP3P_Real_vi <0x26>;
|
||||
defm V_DOT2_U32_U16 : VOP3P_Real_vi <0x27>;
|
||||
|
||||
} // End SubtargetPredicate = HasDot2Insts
|
||||
|
||||
let SubtargetPredicate = HasDot7Insts in {
|
||||
|
||||
defm V_DOT2_F32_F16 : VOP3P_Real_vi <0x23>;
|
||||
defm V_DOT4_U32_U8 : VOP3P_Real_vi <0x29>;
|
||||
defm V_DOT8_U32_U4 : VOP3P_Real_vi <0x2b>;
|
||||
|
||||
} // End SubtargetPredicate = HasDot2Insts
|
||||
} // End SubtargetPredicate = HasDot7Insts
|
||||
|
||||
let SubtargetPredicate = HasDot1Insts in {
|
||||
|
||||
@@ -657,13 +667,18 @@ defm V_FMA_MIXHI_F16 : VOP3P_Real_gfx10<0x22>;
|
||||
|
||||
let SubtargetPredicate = HasDot2Insts in {
|
||||
|
||||
defm V_DOT2_F32_F16 : VOP3P_Real_gfx10 <0x13>;
|
||||
defm V_DOT2_I32_I16 : VOP3P_Real_gfx10 <0x14>;
|
||||
defm V_DOT2_U32_U16 : VOP3P_Real_gfx10 <0x15>;
|
||||
|
||||
} // End SubtargetPredicate = HasDot2Insts
|
||||
|
||||
let SubtargetPredicate = HasDot7Insts in {
|
||||
|
||||
defm V_DOT2_F32_F16 : VOP3P_Real_gfx10 <0x13>;
|
||||
defm V_DOT4_U32_U8 : VOP3P_Real_gfx10 <0x17>;
|
||||
defm V_DOT8_U32_U4 : VOP3P_Real_gfx10 <0x19>;
|
||||
|
||||
} // End SubtargetPredicate = HasDot2Insts
|
||||
} // End SubtargetPredicate = HasDot7Insts
|
||||
|
||||
let SubtargetPredicate = HasDot1Insts in {
|
||||
|
||||
|
||||
Reference in New Issue
Block a user