From 61332cb047faca2dffce9a0ae68bf0d1c0cdee39 Mon Sep 17 00:00:00 2001 From: Diana Date: Mon, 20 Nov 2023 10:01:13 +0100 Subject: [PATCH] [AMDGPU] Emit backend_stack_size PAL metadata (#72509) For chain functions, PAL uses a `backend_stack_size` metadata item, which at the moment has the same meaning as `stack_frame_size_in_bytes`. We emit both for now in order to simplify coordination with PAL. The new item must be emitted in the `shader_functions` section, just as the metadata for other module entry functions. For simplicity, we mark chain functions as module entry functions and emit the same metadata for all of them. --- .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 2 +- .../Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp | 1 + llvm/test/CodeGen/AMDGPU/amdpal-callable.ll | 15 +++++++++ .../CodeGen/AMDGPU/amdpal-chain-metadata.ll | 31 +++++++++++++++++++ 4 files changed, 48 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AMDGPU/amdpal-chain-metadata.ll diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index a09abc639d75..fdc59281c50d 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1910,7 +1910,7 @@ bool isModuleEntryFunctionCC(CallingConv::ID CC) { case CallingConv::AMDGPU_Gfx: return true; default: - return isEntryFunctionCC(CC); + return isEntryFunctionCC(CC) || isChainCC(CC); } } diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp index 278341944c52..48d0bde139d7 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp @@ -244,6 +244,7 @@ void AMDGPUPALMetadata::setScratchSize(CallingConv::ID CC, unsigned Val) { void AMDGPUPALMetadata::setFunctionScratchSize(StringRef FnName, unsigned Val) { auto Node = getShaderFunction(FnName); Node[".stack_frame_size_in_bytes"] = MsgPackDoc.getNode(Val); + Node[".backend_stack_size"] = MsgPackDoc.getNode(Val); } // Set the amount of LDS used in bytes in the metadata. diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll b/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll index 0061c0a3118a..b7b2cb22c1b6 100644 --- a/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll +++ b/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll @@ -146,12 +146,14 @@ attributes #0 = { nounwind } ; GCN-NEXT: 0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}} ; GCN-NEXT: .shader_functions: ; GCN-NEXT: dynamic_stack: +; GCN-NEXT: .backend_stack_size: 0x10{{$}} ; GCN-NEXT: .lds_size: 0{{$}} ; GCN-NEXT: .sgpr_count: 0x28{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}} ; SDAG-NEXT: .vgpr_count: 0x2{{$}} ; GISEL-NEXT: .vgpr_count: 0x3{{$}} ; GCN-NEXT: dynamic_stack_loop: +; GCN-NEXT: .backend_stack_size: 0x10{{$}} ; GCN-NEXT: .lds_size: 0{{$}} ; SDAG-NEXT: .sgpr_count: 0x25{{$}} ; GISEL-NEXT: .sgpr_count: 0x26{{$}} @@ -159,71 +161,84 @@ attributes #0 = { nounwind } ; SDAG-NEXT: .vgpr_count: 0x3{{$}} ; GISEL-NEXT: .vgpr_count: 0x4{{$}} ; GCN-NEXT: multiple_stack: +; GCN-NEXT: .backend_stack_size: 0x24{{$}} ; GCN-NEXT: .lds_size: 0{{$}} ; GCN-NEXT: .sgpr_count: 0x21{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x24{{$}} ; GCN-NEXT: .vgpr_count: 0x3{{$}} ; GCN-NEXT: no_stack: +; GCN-NEXT: .backend_stack_size: 0{{$}} ; GCN-NEXT: .lds_size: 0{{$}} ; GCN-NEXT: .sgpr_count: 0x20{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}} ; GCN-NEXT: .vgpr_count: 0x1{{$}} ; GCN-NEXT: no_stack_call: +; GCN-NEXT: .backend_stack_size: 0x10{{$}} ; GCN-NEXT: .lds_size: 0{{$}} ; GCN-NEXT: .sgpr_count: 0x25{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}} ; GCN-NEXT: .vgpr_count: 0x3{{$}} ; GCN-NEXT: no_stack_extern_call: +; GCN-NEXT: .backend_stack_size: 0x10{{$}} ; GCN-NEXT: .lds_size: 0{{$}} ; GFX8-NEXT: .sgpr_count: 0x28{{$}} ; GFX9-NEXT: .sgpr_count: 0x2c{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}} ; GCN-NEXT: .vgpr_count: 0x2b{{$}} ; GCN-NEXT: no_stack_extern_call_many_args: +; GCN-NEXT: .backend_stack_size: 0x90{{$}} ; GCN-NEXT: .lds_size: 0{{$}} ; GFX8-NEXT: .sgpr_count: 0x28{{$}} ; GFX9-NEXT: .sgpr_count: 0x2c{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x90{{$}} ; GCN-NEXT: .vgpr_count: 0x2b{{$}} ; GCN-NEXT: no_stack_indirect_call: +; GCN-NEXT: .backend_stack_size: 0x10{{$}} ; GCN-NEXT: .lds_size: 0{{$}} ; GFX8-NEXT: .sgpr_count: 0x28{{$}} ; GFX9-NEXT: .sgpr_count: 0x2c{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}} ; GCN-NEXT: .vgpr_count: 0x2b{{$}} ; GCN-NEXT: simple_lds: +; GCN-NEXT: .backend_stack_size: 0{{$}} ; GCN-NEXT: .lds_size: 0x100{{$}} ; GCN-NEXT: .sgpr_count: 0x20{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0{{$}} ; GCN-NEXT: .vgpr_count: 0x1{{$}} ; GCN-NEXT: simple_lds_recurse: +; GCN-NEXT: .backend_stack_size: 0x10{{$}} ; GCN-NEXT: .lds_size: 0x100{{$}} ; GCN-NEXT: .sgpr_count: 0x28{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x10{{$}} ; GCN-NEXT: .vgpr_count: 0x29{{$}} ; GCN-NEXT: simple_stack: +; GCN-NEXT: .backend_stack_size: 0x14{{$}} ; GCN-NEXT: .lds_size: 0{{$}} ; GCN-NEXT: .sgpr_count: 0x21{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x14{{$}} ; GCN-NEXT: .vgpr_count: 0x2{{$}} ; GCN-NEXT: simple_stack_call: +; GCN-NEXT: .backend_stack_size: 0x20{{$}} ; GCN-NEXT: .lds_size: 0{{$}} ; GCN-NEXT: .sgpr_count: 0x25{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}} ; GCN-NEXT: .vgpr_count: 0x4{{$}} ; GCN-NEXT: simple_stack_extern_call: +; GCN-NEXT: .backend_stack_size: 0x20{{$}} ; GCN-NEXT: .lds_size: 0{{$}} ; GFX8-NEXT: .sgpr_count: 0x28{{$}} ; GFX9-NEXT: .sgpr_count: 0x2c{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}} ; GCN-NEXT: .vgpr_count: 0x2b{{$}} ; GCN-NEXT: simple_stack_indirect_call: +; GCN-NEXT: .backend_stack_size: 0x20{{$}} ; GCN-NEXT: .lds_size: 0{{$}} ; GFX8-NEXT: .sgpr_count: 0x28{{$}} ; GFX9-NEXT: .sgpr_count: 0x2c{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}} ; GCN-NEXT: .vgpr_count: 0x2b{{$}} ; GCN-NEXT: simple_stack_recurse: +; GCN-NEXT: .backend_stack_size: 0x20{{$}} ; GCN-NEXT: .lds_size: 0{{$}} ; GCN-NEXT: .sgpr_count: 0x28{{$}} ; GCN-NEXT: .stack_frame_size_in_bytes: 0x20{{$}} diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-chain-metadata.ll b/llvm/test/CodeGen/AMDGPU/amdpal-chain-metadata.ll new file mode 100644 index 000000000000..044abe15c5d8 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdpal-chain-metadata.ll @@ -0,0 +1,31 @@ +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 <%s | FileCheck %s + +; CHECK-LABEL: {{^}}amdgpu_cs_chain_func: +; CHECK: .amdgpu_pal_metadata +; CHECK-NEXT: --- +; CHECK-NEXT: amdpal.pipelines: +; CHECK-NEXT: - .api: Vulkan +; CHECK: .shader_functions: +; CHECK-NEXT: amdgpu_cs_chain_func: +; CHECK: .backend_stack_size: 0x10{{$}} +; CHECK: .stack_frame_size_in_bytes: 0x10{{$}} +; CHECK:amdpal.version: +; CHECK-NEXT: - 0x3 +; CHECK-NEXT: - 0 +; CHECK-NEXT:... +; CHECK-NEXT: .end_amdgpu_pal_metadata + +define amdgpu_cs_chain void @amdgpu_cs_chain_func(<40 x i32> %should_spill) { +.entry: + %v = alloca [3 x i32], addrspace(5) + store i32 42, ptr addrspace(5) %v + call amdgpu_gfx void @use(<40 x i32> %should_spill, ptr addrspace(5) %v) + ret void +} + +declare amdgpu_gfx void @use(...) + +!amdgpu.pal.metadata.msgpack = !{!0} + +!0 = !{!"\82\B0amdpal.pipelines\91\8A\A4.api\A6Vulkan\B2.compute_registers\85\AB.tg_size_en\C3\AA.tgid_x_en\C2\AA.tgid_y_en\C2\AA.tgid_z_en\C2\AF.tidig_comp_cnt\01\B0.hardware_stages\81\A3.cs\8C\AF.checksum_value\CE\94D\D7\D0\AB.debug_mode\00\AB.float_mode\CC\C0\A9.image_op\C2\AC.mem_ordered\C3\AB.sgpr_limitj\B7.threadgroup_dimensions\93\01\CD\04\00\01\AD.trap_present\00\B2.user_data_reg_map\DC\00 \CE\10\00\00\00\CE\FF\FF\FF\FF\00\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\CE\FF\FF\FF\FF\AB.user_sgprs\03\AB.vgpr_limit\CD\01\00\AF.wavefront_size@\B7.internal_pipeline_hash\92\CF\E7\10k\A6:\A6%\F7\CF\B2\1F\1A\D4{\DA\E1T\AA.registers\80\A8.shaders\81\A8.compute\82\B0.api_shader_hash\92\CF\E9Zn7}\1E\B9\E7\00\B1.hardware_mapping\91\A3.cs\B0.spill_threshold\CE\FF\FF\FF\FF\A5.type\A2Cs\B0.user_data_limit\01\AF.xgl_cache_info\82\B3.128_bit_cache_hash\92\CF\B4X\B8\11[\A4\88P\CF\A0;\B0\AF\FF\B4\BE\C0\AD.llpc_version\A461.1\AEamdpal.version\92\03\00"} +!1 = !{i32 7}