AMDGPU: Partially respect nobuiltin in libcall simplifier

There are more contexts where it's not handled correctly but this is
the simplest one.

https://reviews.llvm.org/D156682
This commit is contained in:
Matt Arsenault
2023-07-30 19:05:19 -04:00
parent 2dc1a27449
commit 8a677a7ff0
2 changed files with 14 additions and 2 deletions

View File

@@ -500,7 +500,7 @@ bool AMDGPULibCalls::sincosUseNative(CallInst *aCI, const FuncInfo &FInfo) {
bool AMDGPULibCalls::useNative(CallInst *aCI) {
CI = aCI;
Function *Callee = aCI->getCalledFunction();
if (!Callee)
if (!Callee || aCI->isNoBuiltin())
return false;
FuncInfo FInfo;
@@ -593,7 +593,7 @@ bool AMDGPULibCalls::fold(CallInst *CI, AliasAnalysis *AA) {
this->CI = CI;
Function *Callee = CI->getCalledFunction();
// Ignore indirect calls.
if (!Callee)
if (!Callee || CI->isNoBuiltin())
return false;
IRBuilder<> B(CI);

View File

@@ -630,6 +630,18 @@ entry:
ret void
}
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_powr_nobuiltin
; GCN: %call = tail call fast float @_Z4powrff(float %tmp, float %tmp1)
define amdgpu_kernel void @test_use_native_powr_nobuiltin(ptr addrspace(1) nocapture %a) {
entry:
%tmp = load float, ptr addrspace(1) %a, align 4
%arrayidx1 = getelementptr inbounds float, ptr addrspace(1) %a, i64 1
%tmp1 = load float, ptr addrspace(1) %arrayidx1, align 4
%call = call fast float @_Z4powrff(float %tmp, float %tmp1) nobuiltin
store float %call, ptr addrspace(1) %a, align 4
ret void
}
; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_sqrt
; GCN-NATIVE: call fast float @_Z11native_sqrtf(float %tmp)
define amdgpu_kernel void @test_use_native_sqrt(ptr addrspace(1) nocapture %a) {