[mlir][NVVM] Make sure barrier reduction attr can roundtrip (#167958)

The IR was not able to be roundtrip through mlir-opt. Update the
assembly format and add round trip tests.

```
mlir-opt mlir/test/Target/LLVMIR/nvvm/barrier.mlir | mlir-opt
<stdin>:6:5: error: cannot name an operation with no results
    %0 = nvvm.barrier <and> %arg2 -> i32
```
This commit is contained in:
Valentin Clement (バレンタイン クレメン)
2025-11-13 14:02:08 -08:00
committed by GitHub
parent 6a89439423
commit ebc35f8b12
3 changed files with 26 additions and 19 deletions

View File

@@ -103,24 +103,24 @@ end
! CHECK-LABEL: func.func @_QPdevsub() attributes {cuf.proc_attr = #cuf.cuda_proc<global>}
! CHECK: nvvm.barrier0
! CHECK: nvvm.bar.warp.sync %c1{{.*}} : i32
! CHECK: %{{.*}} = nvvm.barrier <and> %c1{{.*}} -> i32
! CHECK: %{{.*}} = nvvm.barrier #nvvm.reduction<and> %c1{{.*}} -> i32
! CHECK: %[[A:.*]] = fir.load %{{.*}} : !fir.ref<i32>
! CHECK: %[[B:.*]] = fir.load %{{.*}} : !fir.ref<i32>
! CHECK: %[[CMP:.*]] = arith.cmpi sgt, %[[A]], %[[B]] : i32
! CHECK: %[[CONV:.*]] = fir.convert %[[CMP]] : (i1) -> i32
! CHECK: %{{.*}} = nvvm.barrier <and> %[[CONV]] -> i32
! CHECK: %{{.*}} = nvvm.barrier <popc> %c1{{.*}} -> i32
! CHECK: %{{.*}} = nvvm.barrier #nvvm.reduction<and> %[[CONV]] -> i32
! CHECK: %{{.*}} = nvvm.barrier #nvvm.reduction<popc> %c1{{.*}} -> i32
! CHECK: %[[A:.*]] = fir.load %{{.*}} : !fir.ref<i32>
! CHECK: %[[B:.*]] = fir.load %{{.*}} : !fir.ref<i32>
! CHECK: %[[CMP:.*]] = arith.cmpi sgt, %[[A]], %[[B]] : i32
! CHECK: %[[CONV:.*]] = fir.convert %[[CMP]] : (i1) -> i32
! CHECK: %{{.*}} = nvvm.barrier <popc> %[[CONV]] -> i32
! CHECK: %{{.*}} = nvvm.barrier <or> %c1{{.*}} -> i32
! CHECK: %{{.*}} = nvvm.barrier #nvvm.reduction<popc> %[[CONV]] -> i32
! CHECK: %{{.*}} = nvvm.barrier #nvvm.reduction<or> %c1{{.*}} -> i32
! CHECK: %[[A:.*]] = fir.load %{{.*}} : !fir.ref<i32>
! CHECK: %[[B:.*]] = fir.load %{{.*}} : !fir.ref<i32>
! CHECK: %[[CMP:.*]] = arith.cmpi sgt, %[[A]], %[[B]] : i32
! CHECK: %[[CONV:.*]] = fir.convert %[[CMP]] : (i1) -> i32
! CHECK: %{{.*}} = nvvm.barrier <or> %[[CONV]] -> i32
! CHECK: %{{.*}} = nvvm.barrier #nvvm.reduction<or> %[[CONV]] -> i32
! CHECK: %{{.*}} = llvm.atomicrmw add %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32
! CHECK: %{{.*}} = llvm.atomicrmw add %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i64
! CHECK: %{{.*}} = llvm.atomicrmw fadd %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, f32
@@ -214,9 +214,9 @@ end
! CHECK: cuf.kernel
! CHECK: nvvm.barrier0
! CHECK: nvvm.bar.warp.sync %c1{{.*}} : i32
! CHECK: nvvm.barrier <and> %c1{{.*}} -> i32
! CHECK: nvvm.barrier <popc> %c1{{.*}} -> i32
! CHECK: nvvm.barrier <or> %c1{{.*}} -> i32
! CHECK: nvvm.barrier #nvvm.reduction<and> %c1{{.*}} -> i32
! CHECK: nvvm.barrier #nvvm.reduction<popc> %c1{{.*}} -> i32
! CHECK: nvvm.barrier #nvvm.reduction<or> %c1{{.*}} -> i32
attributes(device) subroutine testMatch()
integer :: a, ipred, mask, v32

View File

@@ -994,7 +994,7 @@ def NVVM_BarrierOp : NVVM_Op<"barrier", [AttrSizedOperandSegments]> {
let assemblyFormat =
"(`id` `=` $barrierId^)? (`number_of_threads` `=` $numberOfThreads^)? "
"($reductionOp^ $reductionPredicate)? (`->` type($res)^)? attr-dict";
"(qualified($reductionOp)^ $reductionPredicate)? (`->` type($res)^)? attr-dict";
let builders = [OpBuilder<(ins), [{
return build($_builder, $_state, TypeRange{}, Value{}, Value{}, {}, Value{});

View File

@@ -1,19 +1,26 @@
// RUN: mlir-translate -mlir-to-llvmir %s -split-input-file --verify-diagnostics | FileCheck %s
// RUN: mlir-translate -mlir-to-llvmir %s -split-input-file --verify-diagnostics | FileCheck %s --check-prefix=LLVM
// RUN: mlir-opt %s -split-input-file | mlir-opt | FileCheck %s
// CHECK-LABEL: @llvm_nvvm_barrier(
// CHECK-SAME: i32 %[[barId:.*]], i32 %[[numThreads:.*]], i32 %[[redOperand:.*]])
// LLVM-LABEL: @llvm_nvvm_barrier(
// LLVM-SAME: i32 %[[barId:.*]], i32 %[[numThreads:.*]], i32 %[[redOperand:.*]])
llvm.func @llvm_nvvm_barrier(%barID : i32, %numberOfThreads : i32, %redOperand : i32) {
// CHECK: call void @llvm.nvvm.barrier.cta.sync.aligned.all(i32 0)
// LLVM: call void @llvm.nvvm.barrier.cta.sync.aligned.all(i32 0)
// CHECK: nvvm.barrier
nvvm.barrier
// CHECK: call void @llvm.nvvm.barrier.cta.sync.aligned.all(i32 %[[barId]])
// LLVM: call void @llvm.nvvm.barrier.cta.sync.aligned.all(i32 %[[barId]])
// CHECK: nvvm.barrier id = %{{.*}}
nvvm.barrier id = %barID
// CHECK: call void @llvm.nvvm.barrier.cta.sync.aligned.count(i32 %[[barId]], i32 %[[numThreads]])
// LLVM: call void @llvm.nvvm.barrier.cta.sync.aligned.count(i32 %[[barId]], i32 %[[numThreads]])
// CHECK: nvvm.barrier id = %{{.*}} number_of_threads = %{{.*}}
nvvm.barrier id = %barID number_of_threads = %numberOfThreads
// CHECK: %{{.*}} = call i32 @llvm.nvvm.barrier0.and(i32 %[[redOperand]])
// LLVM: %{{.*}} = call i32 @llvm.nvvm.barrier0.and(i32 %[[redOperand]])
// CHECK: %{{.*}} = nvvm.barrier #nvvm.reduction<and> %{{.*}} -> i32
%0 = nvvm.barrier #nvvm.reduction<and> %redOperand -> i32
// CHECK: %{{.*}} = call i32 @llvm.nvvm.barrier0.or(i32 %[[redOperand]])
// LLVM: %{{.*}} = call i32 @llvm.nvvm.barrier0.or(i32 %[[redOperand]])
// CHECK: %{{.*}} = nvvm.barrier #nvvm.reduction<or> %{{.*}} -> i32
%1 = nvvm.barrier #nvvm.reduction<or> %redOperand -> i32
// CHECK: %{{.*}} = call i32 @llvm.nvvm.barrier0.popc(i32 %[[redOperand]])
// LLVM: %{{.*}} = call i32 @llvm.nvvm.barrier0.popc(i32 %[[redOperand]])
// CHECK: %{{.*}} = nvvm.barrier #nvvm.reduction<popc> %{{.*}} -> i32
%2 = nvvm.barrier #nvvm.reduction<popc> %redOperand -> i32
llvm.return