[AMDGPU] Emit backend_stack_size PAL metadata (#72509)

For chain functions, PAL uses a `backend_stack_size` metadata item,
which at the moment has the same meaning as `stack_frame_size_in_bytes`.
We emit both for now in order to simplify coordination with PAL.

The new item must be emitted in the `shader_functions` section, just as
the metadata for other module entry functions. For simplicity, we mark
chain functions as module entry functions and emit the same metadata for
all of them.
This commit is contained in:
Diana
2023-11-20 10:01:13 +01:00
committed by GitHub
parent 7e65dc72c4
commit 61332cb047
4 changed files with 48 additions and 1 deletions

View File

@@ -1910,7 +1910,7 @@ bool isModuleEntryFunctionCC(CallingConv::ID CC) {
case CallingConv::AMDGPU_Gfx:
return true;
default:
return isEntryFunctionCC(CC);
return isEntryFunctionCC(CC) || isChainCC(CC);
}
}

View File

@@ -244,6 +244,7 @@ void AMDGPUPALMetadata::setScratchSize(CallingConv::ID CC, unsigned Val) {
void AMDGPUPALMetadata::setFunctionScratchSize(StringRef FnName, unsigned Val) {
auto Node = getShaderFunction(FnName);
Node[".stack_frame_size_in_bytes"] = MsgPackDoc.getNode(Val);
Node[".backend_stack_size"] = MsgPackDoc.getNode(Val);
}
// Set the amount of LDS used in bytes in the metadata.

View File

@@ -146,12 +146,14 @@ attributes #0 = { nounwind }
; GCN-NEXT: 0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}}
; GCN-NEXT: .shader_functions:
; GCN-NEXT: dynamic_stack:
; GCN-NEXT: .backend_stack_size: 0x10{{$}}
; GCN-NEXT: .lds_size: 0{{$}}
; GCN-NEXT: .sgpr_count: 0x28{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
; SDAG-NEXT: .vgpr_count: 0x2{{$}}
; GISEL-NEXT: .vgpr_count: 0x3{{$}}
; GCN-NEXT: dynamic_stack_loop:
; GCN-NEXT: .backend_stack_size: 0x10{{$}}
; GCN-NEXT: .lds_size: 0{{$}}
; SDAG-NEXT: .sgpr_count: 0x25{{$}}
; GISEL-NEXT: .sgpr_count: 0x26{{$}}
@@ -159,71 +161,84 @@ attributes #0 = { nounwind }
; SDAG-NEXT: .vgpr_count: 0x3{{$}}
; GISEL-NEXT: .vgpr_count: 0x4{{$}}
; GCN-NEXT: multiple_stack:
; GCN-NEXT: .backend_stack_size: 0x24{{$}}
; GCN-NEXT: .lds_size: 0{{$}}
; GCN-NEXT: .sgpr_count: 0x21{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x24{{$}}
; GCN-NEXT: .vgpr_count: 0x3{{$}}
; GCN-NEXT: no_stack:
; GCN-NEXT: .backend_stack_size: 0{{$}}
; GCN-NEXT: .lds_size: 0{{$}}
; GCN-NEXT: .sgpr_count: 0x20{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}}
; GCN-NEXT: .vgpr_count: 0x1{{$}}
; GCN-NEXT: no_stack_call:
; GCN-NEXT: .backend_stack_size: 0x10{{$}}
; GCN-NEXT: .lds_size: 0{{$}}
; GCN-NEXT: .sgpr_count: 0x25{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
; GCN-NEXT: .vgpr_count: 0x3{{$}}
; GCN-NEXT: no_stack_extern_call:
; GCN-NEXT: .backend_stack_size: 0x10{{$}}
; GCN-NEXT: .lds_size: 0{{$}}
; GFX8-NEXT: .sgpr_count: 0x28{{$}}
; GFX9-NEXT: .sgpr_count: 0x2c{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
; GCN-NEXT: .vgpr_count: 0x2b{{$}}
; GCN-NEXT: no_stack_extern_call_many_args:
; GCN-NEXT: .backend_stack_size: 0x90{{$}}
; GCN-NEXT: .lds_size: 0{{$}}
; GFX8-NEXT: .sgpr_count: 0x28{{$}}
; GFX9-NEXT: .sgpr_count: 0x2c{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x90{{$}}
; GCN-NEXT: .vgpr_count: 0x2b{{$}}
; GCN-NEXT: no_stack_indirect_call:
; GCN-NEXT: .backend_stack_size: 0x10{{$}}
; GCN-NEXT: .lds_size: 0{{$}}
; GFX8-NEXT: .sgpr_count: 0x28{{$}}
; GFX9-NEXT: .sgpr_count: 0x2c{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
; GCN-NEXT: .vgpr_count: 0x2b{{$}}
; GCN-NEXT: simple_lds:
; GCN-NEXT: .backend_stack_size: 0{{$}}
; GCN-NEXT: .lds_size: 0x100{{$}}
; GCN-NEXT: .sgpr_count: 0x20{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}}
; GCN-NEXT: .vgpr_count: 0x1{{$}}
; GCN-NEXT: simple_lds_recurse:
; GCN-NEXT: .backend_stack_size: 0x10{{$}}
; GCN-NEXT: .lds_size: 0x100{{$}}
; GCN-NEXT: .sgpr_count: 0x28{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}}
; GCN-NEXT: .vgpr_count: 0x29{{$}}
; GCN-NEXT: simple_stack:
; GCN-NEXT: .backend_stack_size: 0x14{{$}}
; GCN-NEXT: .lds_size: 0{{$}}
; GCN-NEXT: .sgpr_count: 0x21{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x14{{$}}
; GCN-NEXT: .vgpr_count: 0x2{{$}}
; GCN-NEXT: simple_stack_call:
; GCN-NEXT: .backend_stack_size: 0x20{{$}}
; GCN-NEXT: .lds_size: 0{{$}}
; GCN-NEXT: .sgpr_count: 0x25{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
; GCN-NEXT: .vgpr_count: 0x4{{$}}
; GCN-NEXT: simple_stack_extern_call:
; GCN-NEXT: .backend_stack_size: 0x20{{$}}
; GCN-NEXT: .lds_size: 0{{$}}
; GFX8-NEXT: .sgpr_count: 0x28{{$}}
; GFX9-NEXT: .sgpr_count: 0x2c{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
; GCN-NEXT: .vgpr_count: 0x2b{{$}}
; GCN-NEXT: simple_stack_indirect_call:
; GCN-NEXT: .backend_stack_size: 0x20{{$}}
; GCN-NEXT: .lds_size: 0{{$}}
; GFX8-NEXT: .sgpr_count: 0x28{{$}}
; GFX9-NEXT: .sgpr_count: 0x2c{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}
; GCN-NEXT: .vgpr_count: 0x2b{{$}}
; GCN-NEXT: simple_stack_recurse:
; GCN-NEXT: .backend_stack_size: 0x20{{$}}
; GCN-NEXT: .lds_size: 0{{$}}
; GCN-NEXT: .sgpr_count: 0x28{{$}}
; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}}

View File

@@ -0,0 +1,31 @@
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 <%s | FileCheck %s
; CHECK-LABEL: {{^}}amdgpu_cs_chain_func:
; CHECK: .amdgpu_pal_metadata
; CHECK-NEXT: ---
; CHECK-NEXT: amdpal.pipelines:
; CHECK-NEXT: - .api: Vulkan
; CHECK: .shader_functions:
; CHECK-NEXT: amdgpu_cs_chain_func:
; CHECK: .backend_stack_size: 0x10{{$}}
; CHECK: .stack_frame_size_in_bytes: 0x10{{$}}
; CHECK:amdpal.version:
; CHECK-NEXT: - 0x3
; CHECK-NEXT: - 0
; CHECK-NEXT:...
; CHECK-NEXT: .end_amdgpu_pal_metadata
define amdgpu_cs_chain void @amdgpu_cs_chain_func(<40 x i32> %should_spill) {
.entry:
%v = alloca [3 x i32], addrspace(5)
store i32 42, ptr addrspace(5) %v
call amdgpu_gfx void @use(<40 x i32> %should_spill, ptr addrspace(5) %v)
ret void
}
declare amdgpu_gfx void @use(...)
!amdgpu.pal.metadata.msgpack = !{!0}
!0 = !{!"\82\B0amdpal.pipelines\91\8A\A4.api\A6Vulkan\B2.compute_registers\85\AB.tg_size_en\C3\AA.tgid_x_en\C2\AA.tgid_y_en\C2\AA.tgid_z_en\C2\AF.tidig_comp_cnt\01\B0.hardware_stages\81\A3.cs\8C\AF.checksum_value\CE\94D\D7\D0\AB.debug_mode\00\AB.float_mode\CC\C0\A9.image_op\C2\AC.mem_ordered\C3\AB.sgpr_limitj\B7.threadgroup_dimensions\93\01\CD\04\00\01\AD.trap_present\00\B2.user_data_reg_map\DC\00 \CE\10\00\00\00\CE\FF\FF\FF\FF\00\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\AB.user_sgprs\03\AB.vgpr_limit\CD\01\00\AF.wavefront_size@\B7.internal_pipeline_hash\92\CF\E7\10k\A6:\A6%\F7\CF\B2\1F\1A\D4{\DA\E1T\AA.registers\80\A8.shaders\81\A8.compute\82\B0.api_shader_hash\92\CF\E9Zn7}\1E\B9\E7\00\B1.hardware_mapping\91\A3.cs\B0.spill_threshold\CE\FF\FF\FF\FF\A5.type\A2Cs\B0.user_data_limit\01\AF.xgl_cache_info\82\B3.128_bit_cache_hash\92\CF\B4X\B8\11[\A4\88P\CF\A0;\B0\AF\FF\B4\BE\C0\AD.llpc_version\A461.1\AEamdpal.version\92\03\00"}
!1 = !{i32 7}