From 6e30d97e89f1c49e8bf2073746a5e08f5e75948f Mon Sep 17 00:00:00 2001 From: Durgadoss R Date: Thu, 18 Apr 2024 13:10:49 +0530 Subject: [PATCH] [MLIR][NVVM] [NFC] Update Docs for shfl.sync Op (#89044) The first argument to the nvvm_shfl_sync_* family of intrinsics is the thread_mask (aka member_mask). This patch renames the corresponding operand in the Op to reflect the same i.e. `dst` -> `thread_mask`. While we are there, add summary and description for this Op. Signed-off-by: Durgadoss R --- mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index 728e92c9dc8d..f76b6d19b895 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -619,20 +619,33 @@ def ShflKindAttr : EnumAttr; def NVVM_ShflOp : NVVM_Op<"shfl.sync">, Results<(outs LLVM_Type:$res)>, - Arguments<(ins I32:$dst, + Arguments<(ins I32:$thread_mask, LLVM_Type:$val, I32:$offset, I32:$mask_and_clamp, ShflKindAttr:$kind, OptionalAttr:$return_value_and_is_valid)> { + let summary = "NVVM Dialect Op for shfl.sync"; + let description = [{ + The `shfl.sync` Op implements data shuffle within threads of a warp. + The `thread_mask` denotes the threads participating in the Op where + the bit position corresponds to a particular thread’s laneid. + The `offset` specifies a source lane or source lane offset + (depending on `kind`). The `val` is the input value to be copied from + the source. The `mask_and_clamp` contains two packed values specifying + a mask for logically splitting warps into sub-segments and an upper bound + for clamping the source lane index. + [For more information, refer PTX ISA] + (https://docs.nvidia.com/cuda/parallel-thread-execution/#data-movement-and-conversion-instructions-shfl-sync) + }]; string llvmBuilder = [{ auto intId = getShflIntrinsicId( $_resultType, $kind, static_cast($return_value_and_is_valid)); $res = createIntrinsicCall(builder, - intId, {$dst, $val, $offset, $mask_and_clamp}); + intId, {$thread_mask, $val, $offset, $mask_and_clamp}); }]; let assemblyFormat = [{ - $kind $dst `,` $val `,` $offset `,` $mask_and_clamp attr-dict + $kind $thread_mask `,` $val `,` $offset `,` $mask_and_clamp attr-dict `:` type($val) `->` type($res) }]; let hasVerifier = 1;