mirror of
https://github.com/intel/llvm.git
synced 2026-01-17 06:40:01 +08:00
[AMDGPU] Call the FINI_ARRAY destructors in the correct order (#71815)
Summary: The AMDGPU backend uses the linker-provided INIT_ARRAY and FINI_ARRAY sections to call all the global constructors in a single kernel. Previously this mistakenly used the same iteration logic for both arrays. The destructors stored in FINI_ARRAY are stored in the same order as the ones in the INIT_ARRAY section so we need to traverse it in reverse order.
This commit is contained in:
@@ -53,13 +53,22 @@ static Function *createInitOrFiniKernelFunction(Module &M, bool IsCtor) {
|
||||
//
|
||||
// extern "C" void * __init_array_start[];
|
||||
// extern "C" void * __init_array_end[];
|
||||
// extern "C" void * __fini_array_start[];
|
||||
// extern "C" void * __fini_array_end[];
|
||||
//
|
||||
// using InitCallback = void();
|
||||
// using FiniCallback = void(void);
|
||||
//
|
||||
// void call_init_array_callbacks() {
|
||||
// for (auto start = __init_array_start; start != __init_array_end; ++start)
|
||||
// reinterpret_cast<InitCallback *>(*start)();
|
||||
// }
|
||||
//
|
||||
// void call_fini_array_callbacks() {
|
||||
// size_t fini_array_size = __fini_array_end - __fini_array_start;
|
||||
// for (size_t i = fini_array_size; i > 0; --i)
|
||||
// reinterpret_cast<FiniCallback *>(__fini_array_start[i - 1])();
|
||||
// }
|
||||
static void createInitOrFiniCalls(Function &F, bool IsCtor) {
|
||||
Module &M = *F.getParent();
|
||||
LLVMContext &C = M.getContext();
|
||||
@@ -96,15 +105,39 @@ static void createInitOrFiniCalls(Function &F, bool IsCtor) {
|
||||
// for now we just call them with no arguments.
|
||||
auto *CallBackTy = FunctionType::get(IRB.getVoidTy(), {});
|
||||
|
||||
IRB.CreateCondBr(IRB.CreateICmpNE(Begin, End), LoopBB, ExitBB);
|
||||
Constant *Start = Begin;
|
||||
Constant *Stop = End;
|
||||
// The destructor array must be called in reverse order. Get a constant
|
||||
// expression to the end of the array and iterate backwards instead.
|
||||
if (!IsCtor) {
|
||||
Type *Int64Ty = IntegerType::getInt64Ty(C);
|
||||
auto *Offset = ConstantExpr::getSub(
|
||||
ConstantExpr::getAShr(
|
||||
ConstantExpr::getSub(ConstantExpr::getPtrToInt(End, Int64Ty),
|
||||
ConstantExpr::getPtrToInt(Begin, Int64Ty)),
|
||||
ConstantInt::get(Int64Ty, 3)),
|
||||
ConstantInt::get(Int64Ty, 1));
|
||||
Start = ConstantExpr::getGetElementPtr(
|
||||
ArrayType::get(IRB.getPtrTy(), 0), Begin,
|
||||
ArrayRef<Constant *>({ConstantInt::get(Int64Ty, 0), Offset}),
|
||||
/*InBounds=*/true);
|
||||
Stop = Begin;
|
||||
}
|
||||
|
||||
IRB.CreateCondBr(
|
||||
IRB.CreateCmp(IsCtor ? ICmpInst::ICMP_NE : ICmpInst::ICMP_UGE, Start,
|
||||
Stop),
|
||||
LoopBB, ExitBB);
|
||||
IRB.SetInsertPoint(LoopBB);
|
||||
auto *CallBackPHI = IRB.CreatePHI(PtrTy, 2, "ptr");
|
||||
auto *CallBack = IRB.CreateLoad(CallBackTy->getPointerTo(F.getAddressSpace()),
|
||||
CallBackPHI, "callback");
|
||||
IRB.CreateCall(CallBackTy, CallBack);
|
||||
auto *NewCallBack = IRB.CreateConstGEP1_64(PtrTy, CallBackPHI, 1, "next");
|
||||
auto *EndCmp = IRB.CreateICmpEQ(NewCallBack, End, "end");
|
||||
CallBackPHI->addIncoming(Begin, &F.getEntryBlock());
|
||||
auto *NewCallBack =
|
||||
IRB.CreateConstGEP1_64(PtrTy, CallBackPHI, IsCtor ? 1 : -1, "next");
|
||||
auto *EndCmp = IRB.CreateCmp(IsCtor ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_ULT,
|
||||
NewCallBack, Stop, "end");
|
||||
CallBackPHI->addIncoming(Start, &F.getEntryBlock());
|
||||
CallBackPHI->addIncoming(NewCallBack, LoopBB);
|
||||
IRB.CreateCondBr(EndCmp, ExitBB, LoopBB);
|
||||
IRB.SetInsertPoint(ExitBB);
|
||||
|
||||
@@ -25,8 +25,6 @@ define void @bar() addrspace(1) {
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
|
||||
;.
|
||||
; CHECK: @[[LLVM_GLOBAL_CTORS:[a-zA-Z0-9_$"\\.-]+]] = appending addrspace(1) global [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo.alias, ptr null }, { i32, ptr, ptr } { i32 1, ptr inttoptr (i64 4096 to ptr), ptr null }]
|
||||
; CHECK: @[[LLVM_GLOBAL_DTORS:[a-zA-Z0-9_$"\\.-]+]] = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr addrspacecast (ptr addrspace(1) @bar to ptr), ptr null }]
|
||||
@@ -65,13 +63,13 @@ define void @bar() addrspace(1) {
|
||||
; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.fini(
|
||||
; CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: br i1 icmp ne (ptr addrspace(1) @__fini_array_start, ptr addrspace(1) @__fini_array_end), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]]
|
||||
; CHECK-NEXT: br i1 icmp uge (ptr addrspace(1) getelementptr inbounds ([0 x ptr], ptr addrspace(1) @__fini_array_start, i64 0, i64 sub (i64 ashr (i64 sub (i64 ptrtoint (ptr addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start to i64)), i64 3), i64 1)), ptr addrspace(1) @__fini_array_start), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]]
|
||||
; CHECK: while.entry:
|
||||
; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ @__fini_array_start, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ]
|
||||
; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ getelementptr inbounds ([0 x ptr], ptr addrspace(1) @__fini_array_start, i64 0, i64 sub (i64 ashr (i64 sub (i64 ptrtoint (ptr addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start to i64)), i64 3), i64 1)), [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ]
|
||||
; CHECK-NEXT: [[CALLBACK:%.*]] = load ptr, ptr addrspace(1) [[PTR]], align 8
|
||||
; CHECK-NEXT: call void [[CALLBACK]]()
|
||||
; CHECK-NEXT: [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 1
|
||||
; CHECK-NEXT: [[END:%.*]] = icmp eq ptr addrspace(1) [[NEXT]], @__fini_array_end
|
||||
; CHECK-NEXT: [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 -1
|
||||
; CHECK-NEXT: [[END:%.*]] = icmp ult ptr addrspace(1) [[NEXT]], @__fini_array_start
|
||||
; CHECK-NEXT: br i1 [[END]], label [[WHILE_END]], label [[WHILE_ENTRY]]
|
||||
; CHECK: while.end:
|
||||
; CHECK-NEXT: ret void
|
||||
|
||||
@@ -12,20 +12,19 @@
|
||||
@llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo, ptr null }]
|
||||
@llvm.global_dtors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @bar, ptr null }]
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
; VISIBILITY: FUNC WEAK PROTECTED {{.*}} amdgcn.device.init
|
||||
; VISIBILITY: OBJECT WEAK DEFAULT {{.*}} amdgcn.device.init.kd
|
||||
; VISIBILITY: FUNC WEAK PROTECTED {{.*}} amdgcn.device.fini
|
||||
; VISIBILITY: OBJECT WEAK DEFAULT {{.*}} amdgcn.device.fini.kd
|
||||
|
||||
; SECTION: .init_array.1 INIT_ARRAY {{.*}} {{.*}} 000008 00 WA 0 0 8
|
||||
; SECTION: .fini_array.1 FINI_ARRAY {{.*}} {{.*}} 000008 00 WA 0 0 8
|
||||
|
||||
; DISABLED-NOT: FUNC GLOBAL PROTECTED {{.*}} amdgcn.device.init
|
||||
; DISABLED-NOT: OBJECT GLOBAL DEFAULT {{.*}} amdgcn.device.init.kd
|
||||
; DISABLED-NOT: FUNC GLOBAL PROTECTED {{.*}} amdgcn.device.fini
|
||||
; DISABLED-NOT: OBJECT GLOBAL DEFAULT {{.*}} amdgcn.device.fini.kd
|
||||
|
||||
; METADATA: amdhsa.kernels:
|
||||
; METADATA: .kind: init
|
||||
; METADATA: .max_flat_workgroup_size: 1
|
||||
@@ -53,13 +52,6 @@ define internal void @bar() {
|
||||
; CHECK: @[[__FINI_ARRAY_END:[a-zA-Z0-9_$"\\.-]+]] = external addrspace(1) constant [0 x ptr addrspace(1)]
|
||||
; CHECK: @[[LLVM_USED:[a-zA-Z0-9_$"\\.-]+]] = appending addrspace(1) global [2 x ptr] [ptr @amdgcn.device.init, ptr @amdgcn.device.fini], section "llvm.metadata"
|
||||
;.
|
||||
; CHECK-LABEL: define internal void @foo() {
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
;
|
||||
; CHECK-LABEL: define internal void @bar() {
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
;
|
||||
; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.init(
|
||||
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
|
||||
@@ -79,13 +71,13 @@ define internal void @bar() {
|
||||
; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.fini(
|
||||
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: br i1 icmp ne (ptr addrspace(1) @__fini_array_start, ptr addrspace(1) @__fini_array_end), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]]
|
||||
; CHECK-NEXT: br i1 icmp uge (ptr addrspace(1) getelementptr inbounds ([0 x ptr], ptr addrspace(1) @__fini_array_start, i64 0, i64 sub (i64 ashr (i64 sub (i64 ptrtoint (ptr addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start to i64)), i64 3), i64 1)), ptr addrspace(1) @__fini_array_start), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]]
|
||||
; CHECK: while.entry:
|
||||
; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ @__fini_array_start, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ]
|
||||
; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ getelementptr inbounds ([0 x ptr], ptr addrspace(1) @__fini_array_start, i64 0, i64 sub (i64 ashr (i64 sub (i64 ptrtoint (ptr addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start to i64)), i64 3), i64 1)), [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ]
|
||||
; CHECK-NEXT: [[CALLBACK:%.*]] = load ptr, ptr addrspace(1) [[PTR]], align 8
|
||||
; CHECK-NEXT: call void [[CALLBACK]]()
|
||||
; CHECK-NEXT: [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 1
|
||||
; CHECK-NEXT: [[END:%.*]] = icmp eq ptr addrspace(1) [[NEXT]], @__fini_array_end
|
||||
; CHECK-NEXT: [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 -1
|
||||
; CHECK-NEXT: [[END:%.*]] = icmp ult ptr addrspace(1) [[NEXT]], @__fini_array_start
|
||||
; CHECK-NEXT: br i1 [[END]], label [[WHILE_END]], label [[WHILE_ENTRY]]
|
||||
; CHECK: while.end:
|
||||
; CHECK-NEXT: ret void
|
||||
@@ -93,4 +85,3 @@ define internal void @bar() {
|
||||
;.
|
||||
; CHECK: attributes #[[ATTR0]] = { "amdgpu-flat-work-group-size"="1,1" "device-init" }
|
||||
; CHECK: attributes #[[ATTR1]] = { "amdgpu-flat-work-group-size"="1,1" "device-fini" }
|
||||
;.
|
||||
|
||||
@@ -3,10 +3,10 @@
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -filetype=obj -o - < %s | llvm-readelf -s - 2>&1 | FileCheck %s -check-prefix=CHECK-VIS
|
||||
|
||||
|
||||
; UTC_ARGS: --disable
|
||||
@llvm.global_ctors = appending addrspace(1) global [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo, ptr null }, { i32, ptr, ptr } { i32 1, ptr @foo.5, ptr null }]
|
||||
@llvm.global_dtors = appending addrspace(1) global [2 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @bar, ptr null }, { i32, ptr, ptr } { i32 1, ptr @bar.5, ptr null }]
|
||||
|
||||
; UTC_ARGS: --disable
|
||||
; CHECK: @__init_array_start = external addrspace(1) constant [0 x ptr addrspace(1)]
|
||||
; CHECK: @__init_array_end = external addrspace(1) constant [0 x ptr addrspace(1)]
|
||||
; CHECK: @__fini_array_start = external addrspace(1) constant [0 x ptr addrspace(1)]
|
||||
@@ -36,22 +36,6 @@ define internal void @bar.5() {
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: define internal void @foo() {
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
;
|
||||
; CHECK-LABEL: define internal void @bar() {
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
;
|
||||
; CHECK-LABEL: define internal void @foo.5() {
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
;
|
||||
; CHECK-LABEL: define internal void @bar.5() {
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
;
|
||||
; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.init(
|
||||
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
|
||||
; CHECK-NEXT: entry:
|
||||
@@ -70,14 +54,13 @@ define internal void @bar.5() {
|
||||
; CHECK-LABEL: define weak_odr amdgpu_kernel void @amdgcn.device.fini(
|
||||
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: br i1 icmp ne (ptr addrspace(1) @__fini_array_start, ptr addrspace(1) @__fini_array_end), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]]
|
||||
; CHECK-NEXT: br i1 icmp uge (ptr addrspace(1) getelementptr inbounds ([0 x ptr], ptr addrspace(1) @__fini_array_start, i64 0, i64 sub (i64 ashr (i64 sub (i64 ptrtoint (ptr addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start to i64)), i64 3), i64 1)), ptr addrspace(1) @__fini_array_start), label [[WHILE_ENTRY:%.*]], label [[WHILE_END:%.*]]
|
||||
; CHECK: while.entry:
|
||||
; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ @__fini_array_start, [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ]
|
||||
; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ getelementptr inbounds ([0 x ptr], ptr addrspace(1) @__fini_array_start, i64 0, i64 sub (i64 ashr (i64 sub (i64 ptrtoint (ptr addrspace(1) @__fini_array_end to i64), i64 ptrtoint (ptr addrspace(1) @__fini_array_start to i64)), i64 3), i64 1)), [[ENTRY:%.*]] ], [ [[NEXT:%.*]], [[WHILE_ENTRY]] ]
|
||||
; CHECK-NEXT: [[CALLBACK:%.*]] = load ptr, ptr addrspace(1) [[PTR]], align 8
|
||||
; CHECK-NEXT: call void [[CALLBACK]]()
|
||||
; CHECK-NEXT: [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 1
|
||||
; CHECK-NEXT: [[END:%.*]] = icmp eq ptr addrspace(1) [[NEXT]], @__fini_array_end
|
||||
; CHECK-NEXT: [[NEXT]] = getelementptr ptr addrspace(1), ptr addrspace(1) [[PTR]], i64 -1
|
||||
; CHECK-NEXT: [[END:%.*]] = icmp ult ptr addrspace(1) [[NEXT]], @__fini_array_start
|
||||
; CHECK-NEXT: br i1 [[END]], label [[WHILE_END]], label [[WHILE_ENTRY]]
|
||||
; CHECK: while.end:
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
|
||||
Reference in New Issue
Block a user