mirror of
https://github.com/intel/llvm.git
synced 2026-01-27 06:06:34 +08:00
[AMDGPU] Don't transform illegal intrinsics to V_ILLEGAL
This reverts parts of D123693. The functionality of allowing unsupported intrinsics to select has been superseded by D139000 "Remove function with incompatible features". Retain assembler/disassembler support for v_illegal on GFX10+ only, where it is documented. Differential Revision: https://reviews.llvm.org/D148127
This commit is contained in:
@@ -6749,7 +6749,8 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
|
||||
Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx90a,
|
||||
NumVDataDwords, NumVAddrDwords);
|
||||
if (Opcode == -1)
|
||||
return makeV_ILLEGAL(Op, DAG);
|
||||
report_fatal_error(
|
||||
"requested image instruction is not supported on this GPU");
|
||||
}
|
||||
if (Opcode == -1 &&
|
||||
Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
|
||||
@@ -7911,11 +7912,6 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
|
||||
DAG.setNodeMemRefs(NewNode, {MemRef});
|
||||
return SDValue(NewNode, 0);
|
||||
}
|
||||
case Intrinsic::amdgcn_global_atomic_fadd: {
|
||||
if (!Subtarget->hasAtomicFaddNoRtnInsts())
|
||||
return makeV_ILLEGAL(Op, DAG);
|
||||
return SDValue();
|
||||
}
|
||||
case Intrinsic::amdgcn_global_atomic_fmin:
|
||||
case Intrinsic::amdgcn_global_atomic_fmax:
|
||||
case Intrinsic::amdgcn_flat_atomic_fmin:
|
||||
@@ -8508,27 +8504,6 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
|
||||
}
|
||||
}
|
||||
|
||||
SDValue SITargetLowering::makeV_ILLEGAL(SDValue Op, SelectionDAG & DAG) const {
|
||||
// Create the V_ILLEGAL node.
|
||||
SDLoc DL(Op);
|
||||
auto Opcode = Subtarget->getGeneration() < AMDGPUSubtarget::GFX10 ?
|
||||
AMDGPU::V_ILLEGAL_gfx6_gfx7_gfx8_gfx9 : AMDGPU::V_ILLEGAL;
|
||||
auto EntryNode = DAG.getEntryNode();
|
||||
auto IllegalNode = DAG.getMachineNode(Opcode, DL, MVT::Other, EntryNode);
|
||||
auto IllegalVal = SDValue(IllegalNode, 0u);
|
||||
|
||||
// Add the V_ILLEGAL node to the root chain to prevent its removal.
|
||||
auto Chains = SmallVector<SDValue, 2u>();
|
||||
Chains.push_back(IllegalVal);
|
||||
Chains.push_back(DAG.getRoot());
|
||||
auto Root = DAG.getTokenFactor(SDLoc(Chains.back()), Chains);
|
||||
DAG.setRoot(Root);
|
||||
|
||||
// Merge with UNDEF to satisfy return value requirements.
|
||||
auto UndefVal = DAG.getUNDEF(Op.getValueType());
|
||||
return DAG.getMergeValues({UndefVal, IllegalVal}, DL);
|
||||
}
|
||||
|
||||
// The raw.(t)buffer and struct.(t)buffer intrinsics have two offset args:
|
||||
// offset (the offset that is included in bounds checking and swizzling, to be
|
||||
// split between the instruction's voffset and immoffset fields) and soffset
|
||||
|
||||
@@ -87,8 +87,6 @@ private:
|
||||
SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
SDValue makeV_ILLEGAL(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
// The raw.tbuffer and struct.tbuffer intrinsics have two offset args: offset
|
||||
// (the offset that is included in bounds checking and swizzling, to be split
|
||||
// between the instruction's voffset and immoffset fields) and soffset (the
|
||||
|
||||
@@ -3638,15 +3638,6 @@ def G_FPTRUNC_ROUND_DOWNWARD : AMDGPUGenericInstruction {
|
||||
// Dummy Instructions
|
||||
//============================================================================//
|
||||
|
||||
def V_ILLEGAL_gfx6_gfx7_gfx8_gfx9 : Enc32, InstSI<(outs), (ins), "v_illegal"> {
|
||||
let Inst{31-0} = 0xFFFFFFFF;
|
||||
let FixedSize = 1;
|
||||
let Size = 4;
|
||||
let Uses = [EXEC];
|
||||
let hasSideEffects = 1;
|
||||
let SubtargetPredicate = isGFX6GFX7GFX8GFX9;
|
||||
}
|
||||
|
||||
def V_ILLEGAL : Enc32, InstSI<(outs), (ins), "v_illegal"> {
|
||||
let Inst{31-0} = 0x00000000;
|
||||
let FixedSize = 1;
|
||||
|
||||
28
llvm/test/CodeGen/AMDGPU/unsupported-atomics.ll
Normal file
28
llvm/test/CodeGen/AMDGPU/unsupported-atomics.ll
Normal file
@@ -0,0 +1,28 @@
|
||||
; RUN: not --crash llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefixes=GFX906 %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX908 %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90A %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940 %s
|
||||
; RUN: not --crash llc -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefixes=GFX1030 %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1100 %s
|
||||
|
||||
; GFX906: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.global.atomic.fadd
|
||||
|
||||
; GFX908-LABEL: fadd_test:
|
||||
; GFX908: global_atomic_add_f32
|
||||
|
||||
; GFX90A-LABEL: fadd_test:
|
||||
; GFX90A: global_atomic_add_f32
|
||||
|
||||
; GFX940-LABEL: fadd_test:
|
||||
; GFX940: global_atomic_add_f32
|
||||
|
||||
; GFX1030: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.global.atomic.fadd
|
||||
|
||||
; GFX1100-LABEL: fadd_test:
|
||||
; GFX1100: global_atomic_add_f32
|
||||
|
||||
define fastcc void @fadd_test(ptr addrspace(1) nocapture noundef %0, float noundef %1) unnamed_addr {
|
||||
%3 = tail call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) noundef %0, float noundef %1)
|
||||
ret void
|
||||
}
|
||||
declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) nocapture, float)
|
||||
@@ -1,32 +1,24 @@
|
||||
; RUN: llc -O0 -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906 %s
|
||||
; RUN: llc -O0 -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX908 %s
|
||||
; RUN: llc -O0 -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90A %s
|
||||
; RUN: llc -O0 -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940 %s
|
||||
; RUN: not --crash llc -O0 -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefixes=GFX90A %s
|
||||
; RUN: not --crash llc -O0 -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefixes=GFX940 %s
|
||||
; RUN: llc -O0 -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1030 %s
|
||||
; RUN: llc -O0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1100 %s
|
||||
|
||||
; GFX906-LABEL: image_sample_test:
|
||||
; GFX906-NOT: v_illegal
|
||||
; GFX906: image_sample_lz
|
||||
|
||||
; GFX908-LABEL: image_sample_test:
|
||||
; GFX908-NOT: v_illegal
|
||||
; GFX908: image_sample_lz
|
||||
|
||||
; GFX90A-LABEL: image_sample_test:
|
||||
; GFX90A-NOT: image_sample_lz
|
||||
; GFX90A: v_illegal
|
||||
; GFX90A: LLVM ERROR: requested image instruction is not supported on this GPU
|
||||
|
||||
; GFX940-LABEL: image_sample_test:
|
||||
; GFX940-NOT: image_sample_lz
|
||||
; GFX940: v_illegal
|
||||
; GFX940: LLVM ERROR: requested image instruction is not supported on this GPU
|
||||
|
||||
; GFX1030-LABEL: image_sample_test:
|
||||
; GFX1030-NOT: v_illegal
|
||||
; GFX1030: image_sample_lz
|
||||
|
||||
; GFX1100-LABEL: image_sample_test:
|
||||
; GFX1100-NOT: v_illegal
|
||||
; GFX1100: image_sample_lz
|
||||
|
||||
define amdgpu_kernel void @image_sample_test(ptr addrspace(1) %out, float %arg1, float %arg2, <8 x i32> %arg3, <4 x i32> %arg4) {
|
||||
@@ -1,63 +0,0 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906-ASM %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX908-ASM %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90A-ASM %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX940-ASM %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1030-ASM %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1100-ASM %s
|
||||
|
||||
; GFX906-ASM-LABEL: fadd_test:
|
||||
; GFX906-ASM-NOT: global_atomic_add_f32
|
||||
; GFX906-ASM: v_illegal
|
||||
|
||||
; GFX908-ASM-LABEL: fadd_test:
|
||||
; GFX908-ASM-NOT: v_illegal
|
||||
; GFX908-ASM: global_atomic_add_f32
|
||||
|
||||
; GFX90A-ASM-LABEL: fadd_test:
|
||||
; GFX90A-ASM-NOT: v_illegal
|
||||
; GFX90A-ASM: global_atomic_add_f32
|
||||
|
||||
; GFX940-ASM-LABEL: fadd_test:
|
||||
; GFX940-ASM-NOT: v_illegal
|
||||
; GFX940-ASM: global_atomic_add_f32
|
||||
|
||||
; GFX1030-ASM-LABEL: fadd_test:
|
||||
; GFX1030-ASM-NOT: global_atomic_add_f32
|
||||
; GFX1030-ASM: v_illegal
|
||||
|
||||
; GFX1100-ASM-LABEL: fadd_test:
|
||||
; GFX1100-ASM-NOT: v_illegal
|
||||
; GFX1100-ASM: global_atomic_add_f32
|
||||
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -filetype=obj -verify-machineinstrs < %s | llvm-objdump --triple=amdgcn--amdhsa --mcpu=gfx906 -d - | FileCheck --check-prefix=GFX906-OBJ %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj -verify-machineinstrs < %s | llvm-objdump --triple=amdgcn--amdhsa --mcpu=gfx908 -d - | FileCheck --check-prefix=GFX908-OBJ %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -filetype=obj -verify-machineinstrs < %s | llvm-objdump --triple=amdgcn--amdhsa --mcpu=gfx90a -d - | FileCheck --check-prefix=GFX90A-OBJ %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=obj -verify-machineinstrs < %s | llvm-objdump --triple=amdgcn--amdhsa --mcpu=gfx940 -d - | FileCheck --check-prefix=GFX940-OBJ %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -filetype=obj -verify-machineinstrs < %s | llvm-objdump --triple=amdgcn--amdhsa --mcpu=gfx1030 -d - | FileCheck --check-prefix=GFX1030-OBJ %s
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -filetype=obj -verify-machineinstrs < %s | llvm-objdump --triple=amdgcn--amdhsa --mcpu=gfx1100 -d - | FileCheck --check-prefix=GFX1100-OBJ %s
|
||||
|
||||
; GFX906-OBJ: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX906-OBJ-NEXT: v_illegal // 000000000004: FFFFFFFF
|
||||
|
||||
; GFX908-OBJ: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX908-OBJ-NEXT: global_atomic_add_f32
|
||||
|
||||
; GFX90A-OBJ: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX90A-OBJ-NEXT: global_atomic_add_f32
|
||||
|
||||
; GFX940-OBJ: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX940-OBJ-NEXT: global_atomic_add_f32
|
||||
|
||||
; GFX1030-OBJ: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX1030-OBJ-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1030-OBJ-NEXT: v_illegal // 000000000008: 00000000
|
||||
|
||||
; GFX1100-OBJ: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX1100-OBJ-NEXT: s_waitcnt_vscnt null, 0x0
|
||||
; GFX1100-OBJ-NEXT: global_atomic_add_f32 v[0:1], v2, off
|
||||
|
||||
define fastcc void @fadd_test(ptr addrspace(1) nocapture noundef %0, float noundef %1) unnamed_addr {
|
||||
%3 = tail call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) noundef %0, float noundef %1)
|
||||
ret void
|
||||
}
|
||||
declare float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) nocapture, float)
|
||||
@@ -1,14 +1,6 @@
|
||||
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx906 -show-encoding %s | FileCheck --check-prefix=GFX906 %s
|
||||
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx908 -show-encoding %s | FileCheck --check-prefix=GFX908 %s
|
||||
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx90a -show-encoding %s | FileCheck --check-prefix=GFX90A %s
|
||||
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx940 -show-encoding %s | FileCheck --check-prefix=GFX940 %s
|
||||
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1030 -show-encoding %s | FileCheck --check-prefix=GFX1030 %s
|
||||
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1100 -show-encoding %s | FileCheck --check-prefix=GFX1100 %s
|
||||
|
||||
v_illegal
|
||||
// GFX906: encoding: [0xff,0xff,0xff,0xff]
|
||||
// GFX908: encoding: [0xff,0xff,0xff,0xff]
|
||||
// GFX90A: encoding: [0xff,0xff,0xff,0xff]
|
||||
// GFX940: encoding: [0xff,0xff,0xff,0xff]
|
||||
// GFX1030: encoding: [0x00,0x00,0x00,0x00]
|
||||
// GFX1100: encoding: [0x00,0x00,0x00,0x00]
|
||||
|
||||
Reference in New Issue
Block a user