[AMDGPU] Split dot2-insts feature

Split out some of the instructions predicated on the dot2-insts target
feature into a new dot7-insts, in preparation for subtargets that have
some but not all of these instructions. NFCI.

Differential Revision: https://reviews.llvm.org/D98717
This commit is contained in:
Jay Foad
2021-03-16 16:01:03 +00:00
parent 6718ce4037
commit 967b64beb4
9 changed files with 67 additions and 28 deletions

View File

@@ -193,13 +193,13 @@ TARGET_BUILTIN(__builtin_amdgcn_fmed3h, "hhhh", "nc", "gfx9-insts")
// Deep learning builtins.
//===----------------------------------------------------------------------===//
TARGET_BUILTIN(__builtin_amdgcn_fdot2, "fV2hV2hfIb", "nc", "dot2-insts")
TARGET_BUILTIN(__builtin_amdgcn_fdot2, "fV2hV2hfIb", "nc", "dot7-insts")
TARGET_BUILTIN(__builtin_amdgcn_sdot2, "SiV2SsV2SsSiIb", "nc", "dot2-insts")
TARGET_BUILTIN(__builtin_amdgcn_udot2, "UiV2UsV2UsUiIb", "nc", "dot2-insts")
TARGET_BUILTIN(__builtin_amdgcn_sdot4, "SiSiSiSiIb", "nc", "dot1-insts")
TARGET_BUILTIN(__builtin_amdgcn_udot4, "UiUiUiUiIb", "nc", "dot2-insts")
TARGET_BUILTIN(__builtin_amdgcn_udot4, "UiUiUiUiIb", "nc", "dot7-insts")
TARGET_BUILTIN(__builtin_amdgcn_sdot8, "SiSiSiSiIb", "nc", "dot1-insts")
TARGET_BUILTIN(__builtin_amdgcn_udot8, "UiUiUiUiIb", "nc", "dot2-insts")
TARGET_BUILTIN(__builtin_amdgcn_udot8, "UiUiUiUiIb", "nc", "dot7-insts")
//===----------------------------------------------------------------------===//
// GFX10+ only builtins.

View File

@@ -183,6 +183,7 @@ bool AMDGPUTargetInfo::initFeatureMap(
Features["dot2-insts"] = true;
Features["dot5-insts"] = true;
Features["dot6-insts"] = true;
Features["dot7-insts"] = true;
Features["dl-insts"] = true;
Features["flat-address-space"] = true;
Features["16-bit-insts"] = true;
@@ -200,6 +201,7 @@ bool AMDGPUTargetInfo::initFeatureMap(
Features["dot2-insts"] = true;
Features["dot5-insts"] = true;
Features["dot6-insts"] = true;
Features["dot7-insts"] = true;
LLVM_FALLTHROUGH;
case GK_GFX1010:
Features["dl-insts"] = true;
@@ -227,6 +229,7 @@ bool AMDGPUTargetInfo::initFeatureMap(
Features["dl-insts"] = true;
Features["dot1-insts"] = true;
Features["dot2-insts"] = true;
Features["dot7-insts"] = true;
LLVM_FALLTHROUGH;
case GK_GFX90C:
case GK_GFX909:

View File

@@ -50,17 +50,17 @@
// GFX900: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
// GFX902: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
// GFX904: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
// GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
// GFX908: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst"
// GFX906: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
// GFX908: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+mai-insts,+s-memrealtime,+s-memtime-inst"
// GFX909: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
// GFX90A: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst"
// GFX90A: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst"
// GFX90C: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
// GFX1030: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
// GFX1031: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
// GFX1032: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
// GFX1033: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
// GFX1030: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
// GFX1031: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
// GFX1032: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
// GFX1033: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
kernel void test() {}

View File

@@ -13,8 +13,8 @@ kernel void builtins_amdgcn_dl_insts_err(
half2 v2hA, half2 v2hB, float fC,
short2 v2ssA, short2 v2ssB, int siA, int siB, int siC,
ushort2 v2usA, ushort2 v2usB, uint uiA, uint uiB, uint uiC) {
fOut[0] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, false); // expected-error {{'__builtin_amdgcn_fdot2' needs target feature dot2-insts}}
fOut[1] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, true); // expected-error {{'__builtin_amdgcn_fdot2' needs target feature dot2-insts}}
fOut[0] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, false); // expected-error {{'__builtin_amdgcn_fdot2' needs target feature dot7-insts}}
fOut[1] = __builtin_amdgcn_fdot2(v2hA, v2hB, fC, true); // expected-error {{'__builtin_amdgcn_fdot2' needs target feature dot7-insts}}
siOut[0] = __builtin_amdgcn_sdot2(v2ssA, v2ssB, siC, false); // expected-error {{'__builtin_amdgcn_sdot2' needs target feature dot2-insts}}
siOut[1] = __builtin_amdgcn_sdot2(v2ssA, v2ssB, siC, true); // expected-error {{'__builtin_amdgcn_sdot2' needs target feature dot2-insts}}
@@ -25,12 +25,12 @@ kernel void builtins_amdgcn_dl_insts_err(
siOut[2] = __builtin_amdgcn_sdot4(siA, siB, siC, false); // expected-error {{'__builtin_amdgcn_sdot4' needs target feature dot1-insts}}
siOut[3] = __builtin_amdgcn_sdot4(siA, siB, siC, true); // expected-error {{'__builtin_amdgcn_sdot4' needs target feature dot1-insts}}
uiOut[2] = __builtin_amdgcn_udot4(uiA, uiB, uiC, false); // expected-error {{'__builtin_amdgcn_udot4' needs target feature dot2-insts}}
uiOut[3] = __builtin_amdgcn_udot4(uiA, uiB, uiC, true); // expected-error {{'__builtin_amdgcn_udot4' needs target feature dot2-insts}}
uiOut[2] = __builtin_amdgcn_udot4(uiA, uiB, uiC, false); // expected-error {{'__builtin_amdgcn_udot4' needs target feature dot7-insts}}
uiOut[3] = __builtin_amdgcn_udot4(uiA, uiB, uiC, true); // expected-error {{'__builtin_amdgcn_udot4' needs target feature dot7-insts}}
siOut[4] = __builtin_amdgcn_sdot8(siA, siB, siC, false); // expected-error {{'__builtin_amdgcn_sdot8' needs target feature dot1-insts}}
siOut[5] = __builtin_amdgcn_sdot8(siA, siB, siC, true); // expected-error {{'__builtin_amdgcn_sdot8' needs target feature dot1-insts}}
uiOut[4] = __builtin_amdgcn_udot8(uiA, uiB, uiC, false); // expected-error {{'__builtin_amdgcn_udot8' needs target feature dot2-insts}}
uiOut[5] = __builtin_amdgcn_udot8(uiA, uiB, uiC, true); // expected-error {{'__builtin_amdgcn_udot8' needs target feature dot2-insts}}
uiOut[4] = __builtin_amdgcn_udot8(uiA, uiB, uiC, false); // expected-error {{'__builtin_amdgcn_udot8' needs target feature dot7-insts}}
uiOut[5] = __builtin_amdgcn_udot8(uiA, uiB, uiC, true); // expected-error {{'__builtin_amdgcn_udot8' needs target feature dot7-insts}}
}

View File

@@ -480,7 +480,7 @@ def FeatureDot1Insts : SubtargetFeature<"dot1-insts",
def FeatureDot2Insts : SubtargetFeature<"dot2-insts",
"HasDot2Insts",
"true",
"Has v_dot2_f32_f16, v_dot2_i32_i16, v_dot2_u32_u16, v_dot4_u32_u8, v_dot8_u32_u4 instructions"
"Has v_dot2_i32_i16, v_dot2_u32_u16 instructions"
>;
def FeatureDot3Insts : SubtargetFeature<"dot3-insts",
@@ -507,6 +507,12 @@ def FeatureDot6Insts : SubtargetFeature<"dot6-insts",
"Has v_dot4c_i32_i8 instruction"
>;
def FeatureDot7Insts : SubtargetFeature<"dot7-insts",
"HasDot7Insts",
"true",
"Has v_dot2_f32_f16, v_dot4_u32_u8, v_dot8_u32_u4 instructions"
>;
def FeatureMAIInsts : SubtargetFeature<"mai-insts",
"HasMAIInsts",
"true",
@@ -902,6 +908,7 @@ def FeatureISAVersion9_0_6 : FeatureSet<
FeatureDLInsts,
FeatureDot1Insts,
FeatureDot2Insts,
FeatureDot7Insts,
FeatureSupportsSRAMECC,
FeatureImageGather4D16Bug]>;
@@ -920,6 +927,7 @@ def FeatureISAVersion9_0_8 : FeatureSet<
FeatureDot4Insts,
FeatureDot5Insts,
FeatureDot6Insts,
FeatureDot7Insts,
FeatureMAIInsts,
FeaturePkFmacF16Inst,
FeatureAtomicFaddInsts,
@@ -948,6 +956,7 @@ def FeatureISAVersion9_0_A : FeatureSet<
FeatureDot4Insts,
FeatureDot5Insts,
FeatureDot6Insts,
FeatureDot7Insts,
Feature64BitDPP,
FeaturePackedFP32Ops,
FeatureMAIInsts,
@@ -1008,6 +1017,7 @@ def FeatureISAVersion10_1_1 : FeatureSet<
FeatureDot2Insts,
FeatureDot5Insts,
FeatureDot6Insts,
FeatureDot7Insts,
FeatureNSAEncoding,
FeatureWavefrontSize32,
FeatureScalarStores,
@@ -1028,6 +1038,7 @@ def FeatureISAVersion10_1_2 : FeatureSet<
FeatureDot2Insts,
FeatureDot5Insts,
FeatureDot6Insts,
FeatureDot7Insts,
FeatureNSAEncoding,
FeatureWavefrontSize32,
FeatureScalarStores,
@@ -1049,6 +1060,7 @@ def FeatureISAVersion10_3_0 : FeatureSet<
FeatureDot2Insts,
FeatureDot5Insts,
FeatureDot6Insts,
FeatureDot7Insts,
FeatureNSAEncoding,
FeatureWavefrontSize32,
FeatureShaderCyclesRegister]>;
@@ -1373,6 +1385,9 @@ def HasDot5Insts : Predicate<"Subtarget->hasDot5Insts()">,
def HasDot6Insts : Predicate<"Subtarget->hasDot6Insts()">,
AssemblerPredicate<(all_of FeatureDot6Insts)>;
def HasDot7Insts : Predicate<"Subtarget->hasDot7Insts()">,
AssemblerPredicate<(all_of FeatureDot7Insts)>;
def HasGetWaveIdInst : Predicate<"Subtarget->hasGetWaveIdInst()">,
AssemblerPredicate<(all_of FeatureGetWaveIdInst)>;

View File

@@ -267,6 +267,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
HasDot4Insts(false),
HasDot5Insts(false),
HasDot6Insts(false),
HasDot7Insts(false),
HasMAIInsts(false),
HasPkFmacF16Inst(false),
HasAtomicFaddInsts(false),

View File

@@ -150,6 +150,7 @@ protected:
bool HasDot4Insts;
bool HasDot5Insts;
bool HasDot6Insts;
bool HasDot7Insts;
bool HasMAIInsts;
bool HasPkFmacF16Inst;
bool HasAtomicFaddInsts;
@@ -687,6 +688,10 @@ public:
return HasDot6Insts;
}
bool hasDot7Insts() const {
return HasDot7Insts;
}
bool hasMAIInsts() const {
return HasMAIInsts;
}

View File

@@ -10486,7 +10486,7 @@ SDValue SITargetLowering::performFMACombine(SDNode *N,
EVT VT = N->getValueType(0);
SDLoc SL(N);
if (!Subtarget->hasDot2Insts() || VT != MVT::f32)
if (!Subtarget->hasDot7Insts() || VT != MVT::f32)
return SDValue();
// FMA((F32)S0.x, (F32)S1. x, FMA((F32)S0.y, (F32)S1.y, (F32)z)) ->

View File

@@ -287,19 +287,24 @@ class SDot2Pat<Instruction Inst> : GCNPat <
let IsDOT = 1 in {
let SubtargetPredicate = HasDot2Insts in {
def V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16",
VOP3_Profile<VOP_F32_V2F16_V2F16_F32>,
AMDGPUfdot2, 1/*ExplicitClamp*/>;
def V_DOT2_I32_I16 : VOP3PInst<"v_dot2_i32_i16",
VOP3_Profile<VOP_I32_V2I16_V2I16_I32>, int_amdgcn_sdot2, 1>;
def V_DOT2_U32_U16 : VOP3PInst<"v_dot2_u32_u16",
VOP3_Profile<VOP_I32_V2I16_V2I16_I32>, int_amdgcn_udot2, 1>;
} // End SubtargetPredicate = HasDot2Insts
let SubtargetPredicate = HasDot7Insts in {
def V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16",
VOP3_Profile<VOP_F32_V2F16_V2F16_F32>,
AMDGPUfdot2, 1/*ExplicitClamp*/>;
def V_DOT4_U32_U8 : VOP3PInst<"v_dot4_u32_u8",
VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot4, 1>;
def V_DOT8_U32_U4 : VOP3PInst<"v_dot8_u32_u4",
VOP3_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot8, 1>;
} // End SubtargetPredicate = HasDot2Insts
} // End SubtargetPredicate = HasDot7Insts
let SubtargetPredicate = HasDot1Insts in {
@@ -564,13 +569,18 @@ defm V_FMA_MIXHI_F16 : VOP3P_Real_vi <0x22>;
let SubtargetPredicate = HasDot2Insts in {
defm V_DOT2_F32_F16 : VOP3P_Real_vi <0x23>;
defm V_DOT2_I32_I16 : VOP3P_Real_vi <0x26>;
defm V_DOT2_U32_U16 : VOP3P_Real_vi <0x27>;
} // End SubtargetPredicate = HasDot2Insts
let SubtargetPredicate = HasDot7Insts in {
defm V_DOT2_F32_F16 : VOP3P_Real_vi <0x23>;
defm V_DOT4_U32_U8 : VOP3P_Real_vi <0x29>;
defm V_DOT8_U32_U4 : VOP3P_Real_vi <0x2b>;
} // End SubtargetPredicate = HasDot2Insts
} // End SubtargetPredicate = HasDot7Insts
let SubtargetPredicate = HasDot1Insts in {
@@ -657,13 +667,18 @@ defm V_FMA_MIXHI_F16 : VOP3P_Real_gfx10<0x22>;
let SubtargetPredicate = HasDot2Insts in {
defm V_DOT2_F32_F16 : VOP3P_Real_gfx10 <0x13>;
defm V_DOT2_I32_I16 : VOP3P_Real_gfx10 <0x14>;
defm V_DOT2_U32_U16 : VOP3P_Real_gfx10 <0x15>;
} // End SubtargetPredicate = HasDot2Insts
let SubtargetPredicate = HasDot7Insts in {
defm V_DOT2_F32_F16 : VOP3P_Real_gfx10 <0x13>;
defm V_DOT4_U32_U8 : VOP3P_Real_gfx10 <0x17>;
defm V_DOT8_U32_U4 : VOP3P_Real_gfx10 <0x19>;
} // End SubtargetPredicate = HasDot2Insts
} // End SubtargetPredicate = HasDot7Insts
let SubtargetPredicate = HasDot1Insts in {