mirror of
https://github.com/intel/llvm.git
synced 2026-01-26 21:53:12 +08:00
AMDGPU: Use 128-bit DS operations by default
This commit is contained in:
committed by
Matt Arsenault
parent
192cccb152
commit
f68cc2a7ed
@@ -78,7 +78,7 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
|
||||
// unset everything else if it is disabled
|
||||
|
||||
// Assuming ECC is enabled is the conservative default.
|
||||
SmallString<256> FullFS("+promote-alloca,+load-store-opt,+sram-ecc,+xnack,");
|
||||
SmallString<256> FullFS("+promote-alloca,+load-store-opt,+enable-ds128,+sram-ecc,+xnack,");
|
||||
|
||||
if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
|
||||
FullFS += "+flat-for-global,+unaligned-buffer-access,+trap-handler,";
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -309,8 +309,8 @@ define amdgpu_kernel void @concat_vector_crash2(<8 x i8> addrspace(1)* %out, i32
|
||||
|
||||
; GCN-LABEL: {{^}}build_vector_splat_concat_v8i16:
|
||||
; VI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
|
||||
; VI: ds_write_b64
|
||||
; VI: ds_write2_b64
|
||||
; VI: ds_write_b128
|
||||
; VI: ds_write_b128
|
||||
define amdgpu_kernel void @build_vector_splat_concat_v8i16() {
|
||||
entry:
|
||||
store <8 x i16> zeroinitializer, <8 x i16> addrspace(3)* undef, align 16
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -check-prefix=CI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt,-enable-ds128 < %s | FileCheck -check-prefixes=CI,NODS128 %s
|
||||
|
||||
@lds = addrspace(3) global [512 x float] undef, align 4
|
||||
@lds.v2 = addrspace(3) global [512 x <2 x float>] undef, align 4
|
||||
|
||||
@@ -49,8 +49,8 @@ define amdgpu_kernel void @private_access_f64_alloca(double addrspace(1)* noalia
|
||||
; SI-PROMOTE: ds_write_b64
|
||||
; SI-PROMOTE: ds_read_b64
|
||||
; SI-PROMOTE: ds_read_b64
|
||||
; CI-PROMOTE: ds_write2_b64
|
||||
; CI-PROMOTE: ds_read2_b64
|
||||
; CI-PROMOTE: ds_write_b128
|
||||
; CI-PROMOTE: ds_read_b128
|
||||
define amdgpu_kernel void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out, <2 x double> addrspace(1)* noalias %in, i32 %b) #1 {
|
||||
%val = load <2 x double>, <2 x double> addrspace(1)* %in, align 16
|
||||
%array = alloca [4 x <2 x double>], align 16, addrspace(5)
|
||||
@@ -107,8 +107,8 @@ define amdgpu_kernel void @private_access_i64_alloca(i64 addrspace(1)* noalias %
|
||||
; SI-PROMOTE: ds_write_b64
|
||||
; SI-PROMOTE: ds_read_b64
|
||||
; SI-PROMOTE: ds_read_b64
|
||||
; CI-PROMOTE: ds_write2_b64
|
||||
; CI-PROMOTE: ds_read2_b64
|
||||
; CI-PROMOTE: ds_write_b128
|
||||
; CI-PROMOTE: ds_read_b128
|
||||
define amdgpu_kernel void @private_access_v2i64_alloca(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in, i32 %b) #1 {
|
||||
%val = load <2 x i64>, <2 x i64> addrspace(1)* %in, align 16
|
||||
%array = alloca [4 x <2 x i64>], align 16, addrspace(5)
|
||||
|
||||
@@ -18,9 +18,9 @@ define amdgpu_kernel void @store_v3i32(<3 x i32> addrspace(3)* %out, <3 x i32> %
|
||||
|
||||
; GCN-LABEL: store_v5i32:
|
||||
; GCN: ds_read_b32
|
||||
; GCN: ds_read2_b64
|
||||
; GCN: ds_read_b128
|
||||
; GCN: ds_write_b32
|
||||
; GCN: ds_write2_b64
|
||||
; GCN: ds_write_b128
|
||||
; GCN: ScratchSize: 0
|
||||
define amdgpu_kernel void @store_v5i32(<5 x i32> addrspace(3)* %out, <5 x i32> %a) nounwind {
|
||||
%val = load <5 x i32>, <5 x i32> addrspace(3)* %out
|
||||
@@ -28,5 +28,3 @@ define amdgpu_kernel void @store_v5i32(<5 x i32> addrspace(3)* %out, <5 x i32> %
|
||||
store <5 x i32> %val.1, <5 x i32> addrspace(3)* %out, align 16
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefixes=EG,FUNC %s
|
||||
|
||||
; Testing for ds_read/write_128
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefixes=EG,FUNC %s
|
||||
|
||||
; Testing for ds_read_b128
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,SI,SICIVI,FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,SICIVI,GFX89,FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX9,GFX89,FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,SICIVI,GFX89,FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX9,GFX89,FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s
|
||||
|
||||
; Testing for ds_read/write_b128
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx908 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global,-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global,-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx908 -mattr=-flat-for-global,-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
|
||||
; Testing for ds_read/write_128
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefixes=EG,FUNC %s
|
||||
|
||||
; Testing for ds_read/write_b128
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,SI,SICIVI,FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,VI,SICIVI,FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX9,FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,VI,SICIVI,FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX9,FUNC %s
|
||||
; RUN: llc -march=r600 -mtriple=r600-- -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s
|
||||
|
||||
; Testing for ds_read/write_b128
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs< %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,SICIVI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs< %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,SICIVI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs< %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SICIVI %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs< %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs< %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,SICIVI,CIPLUS %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs< %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SICIVI,CIPLUS %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs< %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,CIPLUS %s
|
||||
|
||||
; GCN-LABEL: {{^}}local_i32_load
|
||||
; SICIVI: s_mov_b32 m0
|
||||
@@ -165,7 +165,8 @@ define amdgpu_kernel void @local_f64_store_0_offset(double addrspace(3)* %out) n
|
||||
; GFX9-NOT: m0
|
||||
|
||||
; GCN-NOT: add
|
||||
; GCN: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:14 offset1:15
|
||||
; SI: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:14 offset1:15
|
||||
; CIPLUS: ds_write_b128 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:112
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @local_v2i64_store(<2 x i64> addrspace(3)* %out) nounwind {
|
||||
%gep = getelementptr <2 x i64>, <2 x i64> addrspace(3)* %out, i32 7
|
||||
@@ -178,7 +179,10 @@ define amdgpu_kernel void @local_v2i64_store(<2 x i64> addrspace(3)* %out) nounw
|
||||
; GFX9-NOT: m0
|
||||
|
||||
; GCN-NOT: add
|
||||
; GCN: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset1:1
|
||||
|
||||
; SI: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset1:1{{$}}
|
||||
; CIPLUS: ds_write_b128 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]$}}
|
||||
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @local_v2i64_store_0_offset(<2 x i64> addrspace(3)* %out) nounwind {
|
||||
store <2 x i64> <i64 1234, i64 1234>, <2 x i64> addrspace(3)* %out, align 16
|
||||
@@ -190,8 +194,12 @@ define amdgpu_kernel void @local_v2i64_store_0_offset(<2 x i64> addrspace(3)* %o
|
||||
; GFX9-NOT: m0
|
||||
|
||||
; GCN-NOT: add
|
||||
; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:30 offset1:31
|
||||
; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:28 offset1:29
|
||||
; SI-DAG: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:30 offset1:31
|
||||
; SI-DAG: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:28 offset1:29
|
||||
|
||||
; CIPLUS-DAG: ds_write_b128 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:224{{$}}
|
||||
; CIPLUS-DAG: ds_write_b128 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:240{{$}}
|
||||
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @local_v4i64_store(<4 x i64> addrspace(3)* %out) nounwind {
|
||||
%gep = getelementptr <4 x i64>, <4 x i64> addrspace(3)* %out, i32 7
|
||||
@@ -204,8 +212,12 @@ define amdgpu_kernel void @local_v4i64_store(<4 x i64> addrspace(3)* %out) nounw
|
||||
; GFX9-NOT: m0
|
||||
|
||||
; GCN-NOT: add
|
||||
; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:2 offset1:3
|
||||
; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset1:1
|
||||
; SI-DAG: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:2 offset1:3
|
||||
; SI-DAG: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset1:1
|
||||
|
||||
; CIPLUS-DAG: ds_write_b128 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]$}}
|
||||
; CIPLUS-DAG: ds_write_b128 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:16{{$}}
|
||||
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @local_v4i64_store_0_offset(<4 x i64> addrspace(3)* %out) nounwind {
|
||||
store <4 x i64> <i64 1234, i64 1234, i64 1234, i64 1234>, <4 x i64> addrspace(3)* %out, align 16
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=SI %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn < %s | FileCheck -check-prefixes=GCN,SI %s
|
||||
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GCN,VI %s
|
||||
|
||||
; SI-LABEL: {{^}}no_reorder_v2f64_global_load_store:
|
||||
; SI: buffer_load_dwordx4
|
||||
; SI: buffer_load_dwordx4
|
||||
; SI: buffer_store_dwordx4
|
||||
; SI: buffer_store_dwordx4
|
||||
; SI: s_endpgm
|
||||
; GCN-LABEL: {{^}}no_reorder_v2f64_global_load_store:
|
||||
; GCN: buffer_load_dwordx4
|
||||
; GCN: buffer_load_dwordx4
|
||||
; GCN: buffer_store_dwordx4
|
||||
; GCN: buffer_store_dwordx4
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @no_reorder_v2f64_global_load_store(<2 x double> addrspace(1)* nocapture %x, <2 x double> addrspace(1)* nocapture %y) nounwind {
|
||||
%tmp1 = load <2 x double>, <2 x double> addrspace(1)* %x, align 16
|
||||
%tmp4 = load <2 x double>, <2 x double> addrspace(1)* %y, align 16
|
||||
@@ -15,10 +15,14 @@ define amdgpu_kernel void @no_reorder_v2f64_global_load_store(<2 x double> addrs
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}no_reorder_scalarized_v2f64_local_load_store:
|
||||
; GCN-LABEL: {{^}}no_reorder_scalarized_v2f64_local_load_store:
|
||||
; SI: ds_read2_b64
|
||||
; SI: ds_write2_b64
|
||||
; SI: s_endpgm
|
||||
|
||||
; VI: ds_read_b128
|
||||
; VI: ds_write_b128
|
||||
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @no_reorder_scalarized_v2f64_local_load_store(<2 x double> addrspace(3)* nocapture %x, <2 x double> addrspace(3)* nocapture %y) nounwind {
|
||||
%tmp1 = load <2 x double>, <2 x double> addrspace(3)* %x, align 16
|
||||
%tmp4 = load <2 x double>, <2 x double> addrspace(3)* %y, align 16
|
||||
@@ -27,18 +31,18 @@ define amdgpu_kernel void @no_reorder_scalarized_v2f64_local_load_store(<2 x dou
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}no_reorder_split_v8i32_global_load_store:
|
||||
; SI: buffer_load_dwordx4
|
||||
; SI: buffer_load_dwordx4
|
||||
; SI: buffer_load_dwordx4
|
||||
; SI: buffer_load_dwordx4
|
||||
; GCN-LABEL: {{^}}no_reorder_split_v8i32_global_load_store:
|
||||
; GCN: buffer_load_dwordx4
|
||||
; GCN: buffer_load_dwordx4
|
||||
; GCN: buffer_load_dwordx4
|
||||
; GCN: buffer_load_dwordx4
|
||||
|
||||
|
||||
; SI: buffer_store_dwordx4
|
||||
; SI: buffer_store_dwordx4
|
||||
; SI: buffer_store_dwordx4
|
||||
; SI: buffer_store_dwordx4
|
||||
; SI: s_endpgm
|
||||
; GCN: buffer_store_dwordx4
|
||||
; GCN: buffer_store_dwordx4
|
||||
; GCN: buffer_store_dwordx4
|
||||
; GCN: buffer_store_dwordx4
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @no_reorder_split_v8i32_global_load_store(<8 x i32> addrspace(1)* nocapture %x, <8 x i32> addrspace(1)* nocapture %y) nounwind {
|
||||
%tmp1 = load <8 x i32>, <8 x i32> addrspace(1)* %x, align 32
|
||||
%tmp4 = load <8 x i32>, <8 x i32> addrspace(1)* %y, align 32
|
||||
@@ -47,13 +51,13 @@ define amdgpu_kernel void @no_reorder_split_v8i32_global_load_store(<8 x i32> ad
|
||||
ret void
|
||||
}
|
||||
|
||||
; SI-LABEL: {{^}}no_reorder_extload_64:
|
||||
; SI: ds_read_b64
|
||||
; SI: ds_read_b64
|
||||
; SI: ds_write_b64
|
||||
; SI-NOT: ds_read
|
||||
; SI: ds_write_b64
|
||||
; SI: s_endpgm
|
||||
; GCN-LABEL: {{^}}no_reorder_extload_64:
|
||||
; GCN: ds_read_b64
|
||||
; GCN: ds_read_b64
|
||||
; GCN: ds_write_b64
|
||||
; GCN-NOT: ds_read
|
||||
; GCN: ds_write_b64
|
||||
; GCN: s_endpgm
|
||||
define amdgpu_kernel void @no_reorder_extload_64(<2 x i32> addrspace(3)* nocapture %x, <2 x i32> addrspace(3)* nocapture %y) nounwind {
|
||||
%tmp1 = load <2 x i32>, <2 x i32> addrspace(3)* %x, align 8
|
||||
%tmp4 = load <2 x i32>, <2 x i32> addrspace(3)* %y, align 8
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=hawaii -enable-amdgpu-aa=0 -verify-machineinstrs -mattr=-promote-alloca,-load-store-opt < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=hawaii -enable-amdgpu-aa=0 -verify-machineinstrs -mattr=-promote-alloca,-load-store-opt,-enable-ds128 < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
@sPrivateStorage = internal addrspace(3) global [256 x [8 x <4 x i64>]] undef
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,VI,FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s
|
||||
; RUN: llc -march=r600 -mtriple=r600-- -mcpu=redwood < %s | FileCheck -check-prefixes=EG,FUNC %s
|
||||
; RUN: llc -march=r600 -mtriple=r600-- -mcpu=cayman < %s | FileCheck -check-prefixes=CM,FUNC %s
|
||||
@@ -156,7 +156,9 @@ entry:
|
||||
; CM: LDS_WRITE
|
||||
; CM: LDS_WRITE
|
||||
|
||||
; GCN: ds_write2_b64
|
||||
; SI: ds_write2_b32
|
||||
; VI: ds_write_b128
|
||||
; GFX9: ds_write_b128
|
||||
define amdgpu_kernel void @store_local_v4i32(<4 x i32> addrspace(3)* %out, <4 x i32> %in) {
|
||||
entry:
|
||||
store <4 x i32> %in, <4 x i32> addrspace(3)* %out
|
||||
|
||||
@@ -46,8 +46,14 @@ define amdgpu_kernel void @global_store_v3i64_unaligned(<3 x i64> addrspace(1)*
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}local_store_v3i64:
|
||||
; GCN: ds_write2_b64
|
||||
; GCN: ds_write_b64
|
||||
; SI: ds_write2_b64
|
||||
; SI: ds_write_b64
|
||||
|
||||
; CI: ds_write_b64
|
||||
; CI: ds_write_b128
|
||||
|
||||
; VI: ds_write_b64
|
||||
; VI: ds_write_b128
|
||||
define amdgpu_kernel void @local_store_v3i64(<3 x i64> addrspace(3)* %out, <3 x i64> %x) {
|
||||
store <3 x i64> %x, <3 x i64> addrspace(3)* %out, align 32
|
||||
ret void
|
||||
|
||||
Reference in New Issue
Block a user