mirror of
https://github.com/intel/llvm.git
synced 2026-01-13 19:08:21 +08:00
[MLIR][Python] Add tests for nvvm barrier ops (#167976)
Found this issue #167958 when adding these tests, thanks for the quick fix @clementval.
This commit is contained in:
@@ -15,7 +15,9 @@ def constructAndPrintInModule(f):
|
||||
module = Module.create()
|
||||
with InsertionPoint(module.body):
|
||||
f()
|
||||
|
||||
print(module)
|
||||
module.operation.verify()
|
||||
return f
|
||||
|
||||
|
||||
@@ -89,3 +91,133 @@ def test_inline_ptx():
|
||||
arith.addf(a, b)
|
||||
arith.addi(c, d)
|
||||
arith.addf(wo0, wo1)
|
||||
|
||||
|
||||
@constructAndPrintInModule
|
||||
def test_barriers():
|
||||
i32 = T.i32()
|
||||
f32 = T.f32()
|
||||
|
||||
@func.FuncOp.from_py_func(i32, i32, f32)
|
||||
def barriers(mask, vi32, vf32):
|
||||
c0 = arith.constant(T.i32(), 0)
|
||||
cffff = arith.constant(T.i32(), 0xFFFF)
|
||||
res = nvvm.barrier(
|
||||
res=i32,
|
||||
barrier_id=c0,
|
||||
number_of_threads=cffff,
|
||||
)
|
||||
|
||||
for reduction in (
|
||||
nvvm.BarrierReduction.AND,
|
||||
nvvm.BarrierReduction.OR,
|
||||
nvvm.BarrierReduction.POPC,
|
||||
):
|
||||
res = nvvm.barrier(
|
||||
res=i32,
|
||||
reduction_op=reduction,
|
||||
reduction_predicate=res,
|
||||
)
|
||||
|
||||
nvvm.barrier0()
|
||||
nvvm.bar_warp_sync(mask)
|
||||
nvvm.cluster_arrive()
|
||||
nvvm.cluster_arrive(aligned=True)
|
||||
nvvm.cluster_arrive_relaxed()
|
||||
nvvm.cluster_arrive_relaxed(aligned=True)
|
||||
nvvm.cluster_wait()
|
||||
nvvm.cluster_wait(aligned=True)
|
||||
nvvm.fence_mbarrier_init()
|
||||
nvvm.bar_warp_sync(mask)
|
||||
return res
|
||||
|
||||
|
||||
# CHECK-LABEL: func.func @barriers(
|
||||
# CHECK: %[[ARG0:.*]]: i32, %[[ARG1:.*]]: i32, %[[ARG2:.*]]: f32) -> i32 {
|
||||
# CHECK: %[[CONSTANT_0:.*]] = arith.constant 0 : i32
|
||||
# CHECK: %[[CONSTANT_1:.*]] = arith.constant 65535 : i32
|
||||
# CHECK: %[[BARRIER_0:.*]] = nvvm.barrier id = %[[CONSTANT_0]] number_of_threads = %[[CONSTANT_1]] -> i32
|
||||
# CHECK: %[[BARRIER_1:.*]] = nvvm.barrier #nvvm.reduction<and> %[[BARRIER_0]] -> i32
|
||||
# CHECK: %[[BARRIER_2:.*]] = nvvm.barrier #nvvm.reduction<or> %[[BARRIER_1]] -> i32
|
||||
# CHECK: %[[BARRIER_3:.*]] = nvvm.barrier #nvvm.reduction<popc> %[[BARRIER_2]] -> i32
|
||||
# CHECK: nvvm.barrier0
|
||||
# CHECK: nvvm.bar.warp.sync %[[ARG0]] : i32
|
||||
# CHECK: nvvm.cluster.arrive
|
||||
# CHECK: nvvm.cluster.arrive {aligned}
|
||||
# CHECK: nvvm.cluster.arrive.relaxed
|
||||
# CHECK: nvvm.cluster.arrive.relaxed {aligned}
|
||||
# CHECK: nvvm.cluster.wait
|
||||
# CHECK: nvvm.cluster.wait {aligned}
|
||||
# CHECK: nvvm.fence.mbarrier.init
|
||||
# CHECK: nvvm.bar.warp.sync %[[ARG0]] : i32
|
||||
# CHECK: return %[[BARRIER_3]] : i32
|
||||
# CHECK: }
|
||||
|
||||
|
||||
@constructAndPrintInModule
|
||||
def test_reductions():
|
||||
i32 = T.i32()
|
||||
f32 = T.f32()
|
||||
|
||||
@func.FuncOp.from_py_func(i32, i32, f32)
|
||||
def reductions(mask, vi32, vf32):
|
||||
for abs in (True, False):
|
||||
for nan in (True, False):
|
||||
for kind in (
|
||||
nvvm.ReduxKind.AND,
|
||||
nvvm.ReduxKind.MAX,
|
||||
nvvm.ReduxKind.MIN,
|
||||
nvvm.ReduxKind.OR,
|
||||
nvvm.ReduxKind.UMAX,
|
||||
nvvm.ReduxKind.UMIN,
|
||||
nvvm.ReduxKind.XOR,
|
||||
):
|
||||
nvvm.redux_sync(i32, vi32, kind, vi32)
|
||||
|
||||
for kind in (
|
||||
nvvm.ReduxKind.FMIN,
|
||||
nvvm.ReduxKind.FMAX,
|
||||
):
|
||||
nvvm.redux_sync(f32, vf32, kind, vi32, abs=abs, nan=nan)
|
||||
|
||||
|
||||
# CHECK-LABEL: func.func @reductions(
|
||||
# CHECK: %[[ARG0:.*]]: i32, %[[ARG1:.*]]: i32, %[[ARG2:.*]]: f32) {
|
||||
# CHECK: %[[REDUX_0:.*]] = nvvm.redux.sync and %[[ARG1]], %[[ARG1]] : i32 -> i32
|
||||
# CHECK: %[[REDUX_1:.*]] = nvvm.redux.sync max %[[ARG1]], %[[ARG1]] : i32 -> i32
|
||||
# CHECK: %[[REDUX_2:.*]] = nvvm.redux.sync min %[[ARG1]], %[[ARG1]] : i32 -> i32
|
||||
# CHECK: %[[REDUX_3:.*]] = nvvm.redux.sync or %[[ARG1]], %[[ARG1]] : i32 -> i32
|
||||
# CHECK: %[[REDUX_4:.*]] = nvvm.redux.sync umax %[[ARG1]], %[[ARG1]] : i32 -> i32
|
||||
# CHECK: %[[REDUX_5:.*]] = nvvm.redux.sync umin %[[ARG1]], %[[ARG1]] : i32 -> i32
|
||||
# CHECK: %[[REDUX_6:.*]] = nvvm.redux.sync xor %[[ARG1]], %[[ARG1]] : i32 -> i32
|
||||
# CHECK: %[[REDUX_7:.*]] = nvvm.redux.sync fmin %[[ARG2]], %[[ARG1]] {abs = true, nan = true} : f32 -> f32
|
||||
# CHECK: %[[REDUX_8:.*]] = nvvm.redux.sync fmax %[[ARG2]], %[[ARG1]] {abs = true, nan = true} : f32 -> f32
|
||||
# CHECK: %[[REDUX_9:.*]] = nvvm.redux.sync and %[[ARG1]], %[[ARG1]] : i32 -> i32
|
||||
# CHECK: %[[REDUX_10:.*]] = nvvm.redux.sync max %[[ARG1]], %[[ARG1]] : i32 -> i32
|
||||
# CHECK: %[[REDUX_11:.*]] = nvvm.redux.sync min %[[ARG1]], %[[ARG1]] : i32 -> i32
|
||||
# CHECK: %[[REDUX_12:.*]] = nvvm.redux.sync or %[[ARG1]], %[[ARG1]] : i32 -> i32
|
||||
# CHECK: %[[REDUX_13:.*]] = nvvm.redux.sync umax %[[ARG1]], %[[ARG1]] : i32 -> i32
|
||||
# CHECK: %[[REDUX_14:.*]] = nvvm.redux.sync umin %[[ARG1]], %[[ARG1]] : i32 -> i32
|
||||
# CHECK: %[[REDUX_15:.*]] = nvvm.redux.sync xor %[[ARG1]], %[[ARG1]] : i32 -> i32
|
||||
# CHECK: %[[REDUX_16:.*]] = nvvm.redux.sync fmin %[[ARG2]], %[[ARG1]] {abs = true} : f32 -> f32
|
||||
# CHECK: %[[REDUX_17:.*]] = nvvm.redux.sync fmax %[[ARG2]], %[[ARG1]] {abs = true} : f32 -> f32
|
||||
# CHECK: %[[REDUX_18:.*]] = nvvm.redux.sync and %[[ARG1]], %[[ARG1]] : i32 -> i32
|
||||
# CHECK: %[[REDUX_19:.*]] = nvvm.redux.sync max %[[ARG1]], %[[ARG1]] : i32 -> i32
|
||||
# CHECK: %[[REDUX_20:.*]] = nvvm.redux.sync min %[[ARG1]], %[[ARG1]] : i32 -> i32
|
||||
# CHECK: %[[REDUX_21:.*]] = nvvm.redux.sync or %[[ARG1]], %[[ARG1]] : i32 -> i32
|
||||
# CHECK: %[[REDUX_22:.*]] = nvvm.redux.sync umax %[[ARG1]], %[[ARG1]] : i32 -> i32
|
||||
# CHECK: %[[REDUX_23:.*]] = nvvm.redux.sync umin %[[ARG1]], %[[ARG1]] : i32 -> i32
|
||||
# CHECK: %[[REDUX_24:.*]] = nvvm.redux.sync xor %[[ARG1]], %[[ARG1]] : i32 -> i32
|
||||
# CHECK: %[[REDUX_25:.*]] = nvvm.redux.sync fmin %[[ARG2]], %[[ARG1]] {nan = true} : f32 -> f32
|
||||
# CHECK: %[[REDUX_26:.*]] = nvvm.redux.sync fmax %[[ARG2]], %[[ARG1]] {nan = true} : f32 -> f32
|
||||
# CHECK: %[[REDUX_27:.*]] = nvvm.redux.sync and %[[ARG1]], %[[ARG1]] : i32 -> i32
|
||||
# CHECK: %[[REDUX_28:.*]] = nvvm.redux.sync max %[[ARG1]], %[[ARG1]] : i32 -> i32
|
||||
# CHECK: %[[REDUX_29:.*]] = nvvm.redux.sync min %[[ARG1]], %[[ARG1]] : i32 -> i32
|
||||
# CHECK: %[[REDUX_30:.*]] = nvvm.redux.sync or %[[ARG1]], %[[ARG1]] : i32 -> i32
|
||||
# CHECK: %[[REDUX_31:.*]] = nvvm.redux.sync umax %[[ARG1]], %[[ARG1]] : i32 -> i32
|
||||
# CHECK: %[[REDUX_32:.*]] = nvvm.redux.sync umin %[[ARG1]], %[[ARG1]] : i32 -> i32
|
||||
# CHECK: %[[REDUX_33:.*]] = nvvm.redux.sync xor %[[ARG1]], %[[ARG1]] : i32 -> i32
|
||||
# CHECK: %[[REDUX_34:.*]] = nvvm.redux.sync fmin %[[ARG2]], %[[ARG1]] : f32 -> f32
|
||||
# CHECK: %[[REDUX_35:.*]] = nvvm.redux.sync fmax %[[ARG2]], %[[ARG1]] : f32 -> f32
|
||||
# CHECK: return
|
||||
# CHECK: }
|
||||
|
||||
Reference in New Issue
Block a user