AMDGPU: Use 128-bit DS operations by default

This commit is contained in:
Matt Arsenault
2020-04-01 16:46:10 -04:00
committed by Matt Arsenault
parent 192cccb152
commit f68cc2a7ed
17 changed files with 612 additions and 724 deletions

View File

@@ -78,7 +78,7 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
// unset everything else if it is disabled
// Assuming ECC is enabled is the conservative default.
SmallString<256> FullFS("+promote-alloca,+load-store-opt,+sram-ecc,+xnack,");
SmallString<256> FullFS("+promote-alloca,+load-store-opt,+enable-ds128,+sram-ecc,+xnack,");
if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA.
FullFS += "+flat-for-global,+unaligned-buffer-access,+trap-handler,";

File diff suppressed because it is too large Load Diff

View File

@@ -309,8 +309,8 @@ define amdgpu_kernel void @concat_vector_crash2(<8 x i8> addrspace(1)* %out, i32
; GCN-LABEL: {{^}}build_vector_splat_concat_v8i16:
; VI: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
; VI: ds_write_b64
; VI: ds_write2_b64
; VI: ds_write_b128
; VI: ds_write_b128
define amdgpu_kernel void @build_vector_splat_concat_v8i16() {
entry:
store <8 x i16> zeroinitializer, <8 x i16> addrspace(3)* undef, align 16

View File

@@ -1,4 +1,4 @@
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt < %s | FileCheck -check-prefix=CI %s
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -mattr=+load-store-opt,-enable-ds128 < %s | FileCheck -check-prefixes=CI,NODS128 %s
@lds = addrspace(3) global [512 x float] undef, align 4
@lds.v2 = addrspace(3) global [512 x <2 x float>] undef, align 4

View File

@@ -49,8 +49,8 @@ define amdgpu_kernel void @private_access_f64_alloca(double addrspace(1)* noalia
; SI-PROMOTE: ds_write_b64
; SI-PROMOTE: ds_read_b64
; SI-PROMOTE: ds_read_b64
; CI-PROMOTE: ds_write2_b64
; CI-PROMOTE: ds_read2_b64
; CI-PROMOTE: ds_write_b128
; CI-PROMOTE: ds_read_b128
define amdgpu_kernel void @private_access_v2f64_alloca(<2 x double> addrspace(1)* noalias %out, <2 x double> addrspace(1)* noalias %in, i32 %b) #1 {
%val = load <2 x double>, <2 x double> addrspace(1)* %in, align 16
%array = alloca [4 x <2 x double>], align 16, addrspace(5)
@@ -107,8 +107,8 @@ define amdgpu_kernel void @private_access_i64_alloca(i64 addrspace(1)* noalias %
; SI-PROMOTE: ds_write_b64
; SI-PROMOTE: ds_read_b64
; SI-PROMOTE: ds_read_b64
; CI-PROMOTE: ds_write2_b64
; CI-PROMOTE: ds_read2_b64
; CI-PROMOTE: ds_write_b128
; CI-PROMOTE: ds_read_b128
define amdgpu_kernel void @private_access_v2i64_alloca(<2 x i64> addrspace(1)* noalias %out, <2 x i64> addrspace(1)* noalias %in, i32 %b) #1 {
%val = load <2 x i64>, <2 x i64> addrspace(1)* %in, align 16
%array = alloca [4 x <2 x i64>], align 16, addrspace(5)

View File

@@ -18,9 +18,9 @@ define amdgpu_kernel void @store_v3i32(<3 x i32> addrspace(3)* %out, <3 x i32> %
; GCN-LABEL: store_v5i32:
; GCN: ds_read_b32
; GCN: ds_read2_b64
; GCN: ds_read_b128
; GCN: ds_write_b32
; GCN: ds_write2_b64
; GCN: ds_write_b128
; GCN: ScratchSize: 0
define amdgpu_kernel void @store_v5i32(<5 x i32> addrspace(3)* %out, <5 x i32> %a) nounwind {
%val = load <5 x i32>, <5 x i32> addrspace(3)* %out
@@ -28,5 +28,3 @@ define amdgpu_kernel void @store_v5i32(<5 x i32> addrspace(3)* %out, <5 x i32> %
store <5 x i32> %val.1, <5 x i32> addrspace(3)* %out, align 16
ret void
}

View File

@@ -1,5 +1,5 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefixes=EG,FUNC %s
; Testing for ds_read/write_128

View File

@@ -1,7 +1,7 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefixes=EG,FUNC %s
; Testing for ds_read_b128

View File

@@ -1,6 +1,6 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,SI,SICIVI,FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,SICIVI,GFX89,FUNC %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX9,GFX89,FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,SICIVI,GFX89,FUNC %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX9,GFX89,FUNC %s
; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s
; Testing for ds_read/write_b128

View File

@@ -1,7 +1,7 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,FUNC %s
; RUN: llc -march=amdgcn -mcpu=gfx908 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global,-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global,-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,FUNC %s
; RUN: llc -march=amdgcn -mcpu=gfx908 -mattr=-flat-for-global,-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
; Testing for ds_read/write_128

View File

@@ -1,7 +1,7 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefixes=EG,FUNC %s
; Testing for ds_read/write_b128

View File

@@ -1,6 +1,6 @@
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,SI,SICIVI,FUNC %s
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,VI,SICIVI,FUNC %s
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX9,FUNC %s
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,VI,SICIVI,FUNC %s
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-enable-ds128 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX9,FUNC %s
; RUN: llc -march=r600 -mtriple=r600-- -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s
; Testing for ds_read/write_b128

View File

@@ -1,7 +1,7 @@
; RUN: llc -march=amdgcn -verify-machineinstrs< %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,SICIVI %s
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs< %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,SICIVI %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs< %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SICIVI %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs< %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs< %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,SICIVI,CIPLUS %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs< %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SICIVI,CIPLUS %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs< %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,CIPLUS %s
; GCN-LABEL: {{^}}local_i32_load
; SICIVI: s_mov_b32 m0
@@ -165,7 +165,8 @@ define amdgpu_kernel void @local_f64_store_0_offset(double addrspace(3)* %out) n
; GFX9-NOT: m0
; GCN-NOT: add
; GCN: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:14 offset1:15
; SI: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:14 offset1:15
; CIPLUS: ds_write_b128 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:112
; GCN: s_endpgm
define amdgpu_kernel void @local_v2i64_store(<2 x i64> addrspace(3)* %out) nounwind {
%gep = getelementptr <2 x i64>, <2 x i64> addrspace(3)* %out, i32 7
@@ -178,7 +179,10 @@ define amdgpu_kernel void @local_v2i64_store(<2 x i64> addrspace(3)* %out) nounw
; GFX9-NOT: m0
; GCN-NOT: add
; GCN: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset1:1
; SI: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset1:1{{$}}
; CIPLUS: ds_write_b128 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]$}}
; GCN: s_endpgm
define amdgpu_kernel void @local_v2i64_store_0_offset(<2 x i64> addrspace(3)* %out) nounwind {
store <2 x i64> <i64 1234, i64 1234>, <2 x i64> addrspace(3)* %out, align 16
@@ -190,8 +194,12 @@ define amdgpu_kernel void @local_v2i64_store_0_offset(<2 x i64> addrspace(3)* %o
; GFX9-NOT: m0
; GCN-NOT: add
; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:30 offset1:31
; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:28 offset1:29
; SI-DAG: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:30 offset1:31
; SI-DAG: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:28 offset1:29
; CIPLUS-DAG: ds_write_b128 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:224{{$}}
; CIPLUS-DAG: ds_write_b128 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:240{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @local_v4i64_store(<4 x i64> addrspace(3)* %out) nounwind {
%gep = getelementptr <4 x i64>, <4 x i64> addrspace(3)* %out, i32 7
@@ -204,8 +212,12 @@ define amdgpu_kernel void @local_v4i64_store(<4 x i64> addrspace(3)* %out) nounw
; GFX9-NOT: m0
; GCN-NOT: add
; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:2 offset1:3
; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset1:1
; SI-DAG: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset0:2 offset1:3
; SI-DAG: ds_write2_b64 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}} offset1:1
; CIPLUS-DAG: ds_write_b128 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]$}}
; CIPLUS-DAG: ds_write_b128 v{{[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}} offset:16{{$}}
; GCN: s_endpgm
define amdgpu_kernel void @local_v4i64_store_0_offset(<4 x i64> addrspace(3)* %out) nounwind {
store <4 x i64> <i64 1234, i64 1234, i64 1234, i64 1234>, <4 x i64> addrspace(3)* %out, align 16

View File

@@ -1,12 +1,12 @@
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn < %s | FileCheck -check-prefix=SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn < %s | FileCheck -check-prefixes=GCN,SI %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GCN,VI %s
; SI-LABEL: {{^}}no_reorder_v2f64_global_load_store:
; SI: buffer_load_dwordx4
; SI: buffer_load_dwordx4
; SI: buffer_store_dwordx4
; SI: buffer_store_dwordx4
; SI: s_endpgm
; GCN-LABEL: {{^}}no_reorder_v2f64_global_load_store:
; GCN: buffer_load_dwordx4
; GCN: buffer_load_dwordx4
; GCN: buffer_store_dwordx4
; GCN: buffer_store_dwordx4
; GCN: s_endpgm
define amdgpu_kernel void @no_reorder_v2f64_global_load_store(<2 x double> addrspace(1)* nocapture %x, <2 x double> addrspace(1)* nocapture %y) nounwind {
%tmp1 = load <2 x double>, <2 x double> addrspace(1)* %x, align 16
%tmp4 = load <2 x double>, <2 x double> addrspace(1)* %y, align 16
@@ -15,10 +15,14 @@ define amdgpu_kernel void @no_reorder_v2f64_global_load_store(<2 x double> addrs
ret void
}
; SI-LABEL: {{^}}no_reorder_scalarized_v2f64_local_load_store:
; GCN-LABEL: {{^}}no_reorder_scalarized_v2f64_local_load_store:
; SI: ds_read2_b64
; SI: ds_write2_b64
; SI: s_endpgm
; VI: ds_read_b128
; VI: ds_write_b128
; GCN: s_endpgm
define amdgpu_kernel void @no_reorder_scalarized_v2f64_local_load_store(<2 x double> addrspace(3)* nocapture %x, <2 x double> addrspace(3)* nocapture %y) nounwind {
%tmp1 = load <2 x double>, <2 x double> addrspace(3)* %x, align 16
%tmp4 = load <2 x double>, <2 x double> addrspace(3)* %y, align 16
@@ -27,18 +31,18 @@ define amdgpu_kernel void @no_reorder_scalarized_v2f64_local_load_store(<2 x dou
ret void
}
; SI-LABEL: {{^}}no_reorder_split_v8i32_global_load_store:
; SI: buffer_load_dwordx4
; SI: buffer_load_dwordx4
; SI: buffer_load_dwordx4
; SI: buffer_load_dwordx4
; GCN-LABEL: {{^}}no_reorder_split_v8i32_global_load_store:
; GCN: buffer_load_dwordx4
; GCN: buffer_load_dwordx4
; GCN: buffer_load_dwordx4
; GCN: buffer_load_dwordx4
; SI: buffer_store_dwordx4
; SI: buffer_store_dwordx4
; SI: buffer_store_dwordx4
; SI: buffer_store_dwordx4
; SI: s_endpgm
; GCN: buffer_store_dwordx4
; GCN: buffer_store_dwordx4
; GCN: buffer_store_dwordx4
; GCN: buffer_store_dwordx4
; GCN: s_endpgm
define amdgpu_kernel void @no_reorder_split_v8i32_global_load_store(<8 x i32> addrspace(1)* nocapture %x, <8 x i32> addrspace(1)* nocapture %y) nounwind {
%tmp1 = load <8 x i32>, <8 x i32> addrspace(1)* %x, align 32
%tmp4 = load <8 x i32>, <8 x i32> addrspace(1)* %y, align 32
@@ -47,13 +51,13 @@ define amdgpu_kernel void @no_reorder_split_v8i32_global_load_store(<8 x i32> ad
ret void
}
; SI-LABEL: {{^}}no_reorder_extload_64:
; SI: ds_read_b64
; SI: ds_read_b64
; SI: ds_write_b64
; SI-NOT: ds_read
; SI: ds_write_b64
; SI: s_endpgm
; GCN-LABEL: {{^}}no_reorder_extload_64:
; GCN: ds_read_b64
; GCN: ds_read_b64
; GCN: ds_write_b64
; GCN-NOT: ds_read
; GCN: ds_write_b64
; GCN: s_endpgm
define amdgpu_kernel void @no_reorder_extload_64(<2 x i32> addrspace(3)* nocapture %x, <2 x i32> addrspace(3)* nocapture %y) nounwind {
%tmp1 = load <2 x i32>, <2 x i32> addrspace(3)* %x, align 8
%tmp4 = load <2 x i32>, <2 x i32> addrspace(3)* %y, align 8

View File

@@ -1,4 +1,4 @@
; RUN: llc -march=amdgcn -mcpu=hawaii -enable-amdgpu-aa=0 -verify-machineinstrs -mattr=-promote-alloca,-load-store-opt < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=hawaii -enable-amdgpu-aa=0 -verify-machineinstrs -mattr=-promote-alloca,-load-store-opt,-enable-ds128 < %s | FileCheck -check-prefix=GCN %s
@sPrivateStorage = internal addrspace(3) global [256 x [8 x <4 x i64>]] undef

View File

@@ -1,5 +1,5 @@
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,VI,FUNC %s
; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s
; RUN: llc -march=r600 -mtriple=r600-- -mcpu=redwood < %s | FileCheck -check-prefixes=EG,FUNC %s
; RUN: llc -march=r600 -mtriple=r600-- -mcpu=cayman < %s | FileCheck -check-prefixes=CM,FUNC %s
@@ -156,7 +156,9 @@ entry:
; CM: LDS_WRITE
; CM: LDS_WRITE
; GCN: ds_write2_b64
; SI: ds_write2_b32
; VI: ds_write_b128
; GFX9: ds_write_b128
define amdgpu_kernel void @store_local_v4i32(<4 x i32> addrspace(3)* %out, <4 x i32> %in) {
entry:
store <4 x i32> %in, <4 x i32> addrspace(3)* %out

View File

@@ -46,8 +46,14 @@ define amdgpu_kernel void @global_store_v3i64_unaligned(<3 x i64> addrspace(1)*
}
; GCN-LABEL: {{^}}local_store_v3i64:
; GCN: ds_write2_b64
; GCN: ds_write_b64
; SI: ds_write2_b64
; SI: ds_write_b64
; CI: ds_write_b64
; CI: ds_write_b128
; VI: ds_write_b64
; VI: ds_write_b128
define amdgpu_kernel void @local_store_v3i64(<3 x i64> addrspace(3)* %out, <3 x i64> %x) {
store <3 x i64> %x, <3 x i64> addrspace(3)* %out, align 32
ret void