mirror of
https://github.com/intel/llvm.git
synced 2026-01-13 11:02:04 +08:00
[MLIR][NVVM] Update mbarrier.arrive.expect_tx Op (#169922)
This patch updates the mbarrier.arrive.expect_tx Op. It also adds an Op for its arrive_drop version. * No change in the existing inline-asm lowering. This functionality continues to work as is. * An optional return value is added for shared_cta space. * The scope and semantics are added as attributes. * Inline-PTX lowering is available when `predicate` is provided. Otherwise, the Op lowers to intrinsics. * lit tests are added to verify the lowering to intrinsics. * Specific negative tests are added to check the invalid cases for inline-ptx lowering. Signed-off-by: Durgadoss R <durgadossr@nvidia.com>
This commit is contained in:
@@ -889,10 +889,7 @@ def NVVM_MBarrierArriveDropNocompleteOp : NVVM_Op<"mbarrier.arrive_drop.nocomple
|
||||
}];
|
||||
}
|
||||
|
||||
def NVVM_MBarrierArriveExpectTxOp : NVVM_PTXBuilder_Op<"mbarrier.arrive.expect_tx">,
|
||||
Arguments<(ins
|
||||
AnyTypeOf<[LLVM_PointerGeneric, LLVM_PointerShared]>:$addr,
|
||||
I32:$txcount, PtxPredicate:$predicate)> {
|
||||
def NVVM_MBarrierArriveExpectTxOp : NVVM_PTXBuilder_Op<"mbarrier.arrive.expect_tx"> {
|
||||
let summary = "MBarrier Arrive with Expected Transaction Count";
|
||||
let description = [{
|
||||
The `nvvm.mbarrier.arrive.expect_tx` operation performs an expect-tx operation
|
||||
@@ -903,11 +900,11 @@ def NVVM_MBarrierArriveExpectTxOp : NVVM_PTXBuilder_Op<"mbarrier.arrive.expect_t
|
||||
threads within the CTA. When other threads perform corresponding acquire operations
|
||||
(like 'mbarrier.test.wait'), they synchronize with this release pattern.
|
||||
|
||||
This operation first performs an expect-tx operation with the specified transaction
|
||||
count, then performs an arrive-on operation with an implicit count of 1. The
|
||||
expect-tx operation increases the tx-count of the *mbarrier object* by the specified
|
||||
expectCount value, setting the current phase to expect and tracks the completion
|
||||
of additional asynchronous transactions.
|
||||
This operation first performs an expect-tx operation with the specified transaction
|
||||
count, then performs an arrive-on operation with an implicit count of 1. The
|
||||
expect-tx operation increases the expect-count of the *mbarrier object* by the
|
||||
specified value (i.e. `txcount`), setting the current phase to expect and track
|
||||
the completion of additional asynchronous transactions.
|
||||
|
||||
The operation takes the following operands:
|
||||
- `addr`: A pointer to the memory location of the *mbarrier object*. Uses generic
|
||||
@@ -915,11 +912,86 @@ def NVVM_MBarrierArriveExpectTxOp : NVVM_PTXBuilder_Op<"mbarrier.arrive.expect_t
|
||||
- `txcount`: An unsigned integer specifying the expected transaction count
|
||||
for the expect-tx operation. This represents the number of asynchronous transactions
|
||||
expected to complete before the barrier phase completes.
|
||||
- `predicate`: Optional predicate for conditional execution.
|
||||
- `scope`: This specifies the set of threads that directly observe the memory
|
||||
synchronizing effect of the `mbarrier.test.wait` operation.
|
||||
- `relaxed`: When set to true, the `arrive` operation has relaxed memory semantics
|
||||
and does not provide any ordering or visibility guarantees.
|
||||
- `predicate`: Optional predicate for conditional execution used only when lowering to
|
||||
inline-ptx.
|
||||
|
||||
[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-arrive)
|
||||
[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-arrive-drop)
|
||||
}];
|
||||
|
||||
let results = (outs Optional<I64>:$res);
|
||||
let arguments = (ins
|
||||
AnyTypeOf<[LLVM_PointerGeneric, LLVM_PointerShared, LLVM_PointerSharedCluster]>:$addr,
|
||||
I32:$txcount,
|
||||
DefaultValuedAttr<MemScopeKindAttr, "MemScopeKind::CTA">:$scope,
|
||||
DefaultValuedAttr<BoolAttr, "false">:$relaxed,
|
||||
PtxPredicate:$predicate);
|
||||
|
||||
let assemblyFormat = "$addr `,` $txcount (`,` `predicate` `=` $predicate^)? attr-dict `:` type(operands) (`->` type($res)^)?";
|
||||
let hasVerifier = 1;
|
||||
|
||||
let extraClassDeclaration = [{
|
||||
bool hasIntrinsic() { return !getPredicate(); }
|
||||
|
||||
bool getAsmValues(RewriterBase &rewriter,
|
||||
llvm::SmallVectorImpl<std::pair<mlir::Value, mlir::NVVM::PTXRegisterMod>> &asmValues);
|
||||
|
||||
static mlir::NVVM::IDArgPair
|
||||
getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
|
||||
llvm::IRBuilderBase& builder);
|
||||
}];
|
||||
|
||||
string llvmBuilder = [{
|
||||
auto [id, args] = NVVM::MBarrierArriveExpectTxOp::getIntrinsicIDAndArgs(
|
||||
*op, moduleTranslation, builder);
|
||||
|
||||
int addrSpace = llvm::cast<LLVMPointerType>(op.getAddr().getType()).getAddressSpace();
|
||||
if (addrSpace != NVVM::NVVMMemorySpace::SharedCluster)
|
||||
$res = createIntrinsicCall(builder, id, args);
|
||||
else
|
||||
createIntrinsicCall(builder, id, args);
|
||||
}];
|
||||
}
|
||||
|
||||
def NVVM_MBarrierArriveDropExpectTxOp : NVVM_Op<"mbarrier.arrive_drop.expect_tx"> {
|
||||
let summary = "MBarrier arrive_drop with expected transaction count";
|
||||
let description = [{
|
||||
The `nvvm.mbarrier.arrive_drop.expect_tx` operation is similar to the
|
||||
`nvvm.mbarrier.arrive.expect_tx` operation except that it performs an
|
||||
`arrive_drop` operation instead of only an `arrive` operation.
|
||||
|
||||
[For more information, see PTX ISA](https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-mbarrier-arrive-drop)
|
||||
}];
|
||||
|
||||
let results = (outs Optional<I64>:$res);
|
||||
let arguments = (ins
|
||||
AnyTypeOf<[LLVM_PointerGeneric, LLVM_PointerShared, LLVM_PointerSharedCluster]>:$addr,
|
||||
I32:$txcount,
|
||||
DefaultValuedAttr<MemScopeKindAttr, "MemScopeKind::CTA">:$scope,
|
||||
DefaultValuedAttr<BoolAttr, "false">:$relaxed);
|
||||
|
||||
let assemblyFormat = "$addr `,` $txcount attr-dict `:` type(operands) (`->` type($res)^)?";
|
||||
let hasVerifier = 1;
|
||||
|
||||
let extraClassDeclaration = [{
|
||||
static mlir::NVVM::IDArgPair
|
||||
getIntrinsicIDAndArgs(Operation &op, LLVM::ModuleTranslation &mt,
|
||||
llvm::IRBuilderBase& builder);
|
||||
}];
|
||||
|
||||
string llvmBuilder = [{
|
||||
auto [id, args] = NVVM::MBarrierArriveDropExpectTxOp::getIntrinsicIDAndArgs(
|
||||
*op, moduleTranslation, builder);
|
||||
|
||||
int addrSpace = llvm::cast<LLVMPointerType>(op.getAddr().getType()).getAddressSpace();
|
||||
if (addrSpace != NVVM::NVVMMemorySpace::SharedCluster)
|
||||
$res = createIntrinsicCall(builder, id, args);
|
||||
else
|
||||
createIntrinsicCall(builder, id, args);
|
||||
}];
|
||||
let assemblyFormat = "$addr `,` $txcount (`,` `predicate` `=` $predicate^)? attr-dict `:` type(operands)";
|
||||
}
|
||||
|
||||
def NVVM_MBarrierTryWaitParityOp : NVVM_PTXBuilder_Op<"mbarrier.try_wait.parity">,
|
||||
|
||||
@@ -923,7 +923,11 @@ struct NVGPUMBarrierArriveExpectTxLowering
|
||||
adaptor.getMbarId(), rewriter);
|
||||
Value txcount = truncToI32(b, adaptor.getTxcount());
|
||||
rewriter.replaceOpWithNewOp<NVVM::MBarrierArriveExpectTxOp>(
|
||||
op, barrier, txcount, adaptor.getPredicate());
|
||||
op, Type{}, // return-value is optional and is void by default
|
||||
barrier, txcount, // barrier and txcount
|
||||
NVVM::MemScopeKind::CTA, // default scope is CTA
|
||||
false, // relaxed-semantics is false
|
||||
adaptor.getPredicate());
|
||||
return success();
|
||||
}
|
||||
};
|
||||
|
||||
@@ -274,6 +274,34 @@ LogicalResult MBarrierArriveDropOp::verify() {
|
||||
getRes());
|
||||
}
|
||||
|
||||
LogicalResult MBarrierArriveExpectTxOp::verify() {
|
||||
// The inline-ptx version of this Op does not support all features.
|
||||
// With predicate, this Op lowers to inline-ptx. So, verify and
|
||||
// error-out if there are unsupported features.
|
||||
if (getPredicate()) {
|
||||
if (getScope() != NVVM::MemScopeKind::CTA)
|
||||
return emitError("mbarrier scope must be CTA when using predicate");
|
||||
|
||||
if (isPtrInSharedClusterSpace(getAddr()))
|
||||
return emitError("mbarrier in shared_cluster space is not supported when "
|
||||
"using predicate");
|
||||
|
||||
if (getRes())
|
||||
return emitError("return-value is not supported when using predicate");
|
||||
|
||||
if (getRelaxed() == true)
|
||||
return emitError("mbarrier with relaxed semantics is not supported when "
|
||||
"using predicate");
|
||||
}
|
||||
return verifyMBarrierArriveLikeOp(getOperation(), getAddr(), getScope(),
|
||||
getRes());
|
||||
}
|
||||
|
||||
LogicalResult MBarrierArriveDropExpectTxOp::verify() {
|
||||
return verifyMBarrierArriveLikeOp(getOperation(), getAddr(), getScope(),
|
||||
getRes());
|
||||
}
|
||||
|
||||
LogicalResult MBarrierExpectTxOp::verify() {
|
||||
return verifyMBarrierArriveLikeOp(getOperation(), getAddr(), getScope());
|
||||
}
|
||||
@@ -2576,6 +2604,87 @@ mlir::NVVM::IDArgPair MBarrierArriveDropOp::getIntrinsicIDAndArgs(
|
||||
return {id, {mbar, count}};
|
||||
}
|
||||
|
||||
bool MBarrierArriveExpectTxOp::getAsmValues(
|
||||
RewriterBase &rewriter,
|
||||
llvm::SmallVectorImpl<std::pair<mlir::Value, mlir::NVVM::PTXRegisterMod>>
|
||||
&asmValues) {
|
||||
// Add all the operands but not the attrs to the asmValues list.
|
||||
// The attrs here are used to generate the right variants for
|
||||
// intrinsics-lowering. So, we ignore them while generating inline-PTX.
|
||||
for (auto val : getOperands())
|
||||
asmValues.push_back({val, mlir::NVVM::PTXRegisterMod::Read});
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
mlir::NVVM::IDArgPair MBarrierArriveExpectTxOp::getIntrinsicIDAndArgs(
|
||||
Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) {
|
||||
auto thisOp = cast<NVVM::MBarrierArriveExpectTxOp>(op);
|
||||
|
||||
bool isClusterSpace = isPtrInSharedClusterSpace(thisOp.getAddr());
|
||||
bool isClusterScope = thisOp.getScope() == NVVM::MemScopeKind::CLUSTER;
|
||||
// bit-0: Space
|
||||
// bit-1: Scope
|
||||
size_t index = ((isClusterScope ? 1 : 0) << 1) | (isClusterSpace ? 1 : 0);
|
||||
|
||||
// clang-format off
|
||||
static constexpr llvm::Intrinsic::ID IDs[] = {
|
||||
llvm::Intrinsic::nvvm_mbarrier_arrive_expect_tx_scope_cta_space_cta,
|
||||
llvm::Intrinsic::nvvm_mbarrier_arrive_expect_tx_scope_cta_space_cluster,
|
||||
llvm::Intrinsic::nvvm_mbarrier_arrive_expect_tx_scope_cluster_space_cta,
|
||||
llvm::Intrinsic::nvvm_mbarrier_arrive_expect_tx_scope_cluster_space_cluster};
|
||||
static constexpr llvm::Intrinsic::ID relaxedIDs[] = {
|
||||
llvm::Intrinsic::nvvm_mbarrier_arrive_expect_tx_relaxed_scope_cta_space_cta,
|
||||
llvm::Intrinsic::nvvm_mbarrier_arrive_expect_tx_relaxed_scope_cta_space_cluster,
|
||||
llvm::Intrinsic::nvvm_mbarrier_arrive_expect_tx_relaxed_scope_cluster_space_cta,
|
||||
llvm::Intrinsic::nvvm_mbarrier_arrive_expect_tx_relaxed_scope_cluster_space_cluster};
|
||||
// clang-format on
|
||||
auto id = thisOp.getRelaxed() ? relaxedIDs[index] : IDs[index];
|
||||
|
||||
// Tidy-up the Intrinsic Args
|
||||
llvm::Value *txcount = mt.lookupValue(thisOp.getTxcount());
|
||||
llvm::Value *mbar = mt.lookupValue(thisOp.getAddr());
|
||||
bool needCast = isPtrInGenericSpace(thisOp.getAddr());
|
||||
if (needCast)
|
||||
mbar = castPtrToAddrSpace(builder, mbar, NVVMMemorySpace::Shared);
|
||||
|
||||
return {id, {mbar, txcount}};
|
||||
}
|
||||
|
||||
mlir::NVVM::IDArgPair MBarrierArriveDropExpectTxOp::getIntrinsicIDAndArgs(
|
||||
Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) {
|
||||
auto thisOp = cast<NVVM::MBarrierArriveDropExpectTxOp>(op);
|
||||
|
||||
bool isClusterSpace = isPtrInSharedClusterSpace(thisOp.getAddr());
|
||||
bool isClusterScope = thisOp.getScope() == NVVM::MemScopeKind::CLUSTER;
|
||||
// bit-0: Space
|
||||
// bit-1: Scope
|
||||
size_t index = ((isClusterScope ? 1 : 0) << 1) | (isClusterSpace ? 1 : 0);
|
||||
|
||||
// clang-format off
|
||||
static constexpr llvm::Intrinsic::ID IDs[] = {
|
||||
llvm::Intrinsic::nvvm_mbarrier_arrive_drop_expect_tx_scope_cta_space_cta,
|
||||
llvm::Intrinsic::nvvm_mbarrier_arrive_drop_expect_tx_scope_cta_space_cluster,
|
||||
llvm::Intrinsic::nvvm_mbarrier_arrive_drop_expect_tx_scope_cluster_space_cta,
|
||||
llvm::Intrinsic::nvvm_mbarrier_arrive_drop_expect_tx_scope_cluster_space_cluster};
|
||||
static constexpr llvm::Intrinsic::ID relaxedIDs[] = {
|
||||
llvm::Intrinsic::nvvm_mbarrier_arrive_drop_expect_tx_relaxed_scope_cta_space_cta,
|
||||
llvm::Intrinsic::nvvm_mbarrier_arrive_drop_expect_tx_relaxed_scope_cta_space_cluster,
|
||||
llvm::Intrinsic::nvvm_mbarrier_arrive_drop_expect_tx_relaxed_scope_cluster_space_cta,
|
||||
llvm::Intrinsic::nvvm_mbarrier_arrive_drop_expect_tx_relaxed_scope_cluster_space_cluster};
|
||||
// clang-format on
|
||||
auto id = thisOp.getRelaxed() ? relaxedIDs[index] : IDs[index];
|
||||
|
||||
// Tidy-up the Intrinsic Args
|
||||
llvm::Value *txcount = mt.lookupValue(thisOp.getTxcount());
|
||||
llvm::Value *mbar = mt.lookupValue(thisOp.getAddr());
|
||||
bool needCast = isPtrInGenericSpace(thisOp.getAddr());
|
||||
if (needCast)
|
||||
mbar = castPtrToAddrSpace(builder, mbar, NVVMMemorySpace::Shared);
|
||||
|
||||
return {id, {mbar, txcount}};
|
||||
}
|
||||
|
||||
mlir::NVVM::IDArgPair MBarrierArriveNocompleteOp::getIntrinsicIDAndArgs(
|
||||
Operation &op, LLVM::ModuleTranslation &mt, llvm::IRBuilderBase &builder) {
|
||||
auto thisOp = cast<NVVM::MBarrierArriveNocompleteOp>(op);
|
||||
|
||||
@@ -16,8 +16,6 @@ llvm.func @init_mbarrier(%barrier_gen : !llvm.ptr, %barrier : !llvm.ptr<3>, %cou
|
||||
|
||||
// CHECK-LABEL: @init_mbarrier_arrive_expect_tx
|
||||
llvm.func @init_mbarrier_arrive_expect_tx(%barrier : !llvm.ptr<3>, %txcount : i32, %pred : i1) {
|
||||
//CHECK: llvm.inline_asm has_side_effects asm_dialect = att "mbarrier.arrive.expect_tx.shared.b64 _, [$0], $1;", "r,r"
|
||||
nvvm.mbarrier.arrive.expect_tx %barrier, %txcount : !llvm.ptr<3>, i32
|
||||
//CHECK: llvm.inline_asm has_side_effects asm_dialect = att "@$2 mbarrier.arrive.expect_tx.shared.b64 _, [$0], $1;", "r,r,b"
|
||||
nvvm.mbarrier.arrive.expect_tx %barrier, %txcount, predicate = %pred : !llvm.ptr<3>, i32, i1
|
||||
llvm.return
|
||||
@@ -25,8 +23,6 @@ llvm.func @init_mbarrier_arrive_expect_tx(%barrier : !llvm.ptr<3>, %txcount : i3
|
||||
|
||||
// CHECK-LABEL: @init_mbarrier_arrive_expect_tx_generic
|
||||
llvm.func @init_mbarrier_arrive_expect_tx_generic(%barrier : !llvm.ptr, %txcount : i32, %pred : i1) {
|
||||
// CHECK: llvm.inline_asm has_side_effects asm_dialect = att "mbarrier.arrive.expect_tx.b64 _, [$0], $1;", "l,r"
|
||||
nvvm.mbarrier.arrive.expect_tx %barrier, %txcount : !llvm.ptr, i32
|
||||
// CHECK: llvm.inline_asm has_side_effects asm_dialect = att "@$2 mbarrier.arrive.expect_tx.b64 _, [$0], $1;", "l,r,b"
|
||||
nvvm.mbarrier.arrive.expect_tx %barrier, %txcount, predicate = %pred : !llvm.ptr, i32, i1
|
||||
llvm.return
|
||||
@@ -544,8 +540,8 @@ func.func @elect_one_leader_sync() {
|
||||
|
||||
// -----
|
||||
|
||||
// CHECK-LABEL: @init_mbarrier_arrive_expect_tx
|
||||
llvm.func @init_mbarrier_arrive_expect_tx(%desc : !llvm.ptr, %pred : i1) {
|
||||
// CHECK-LABEL: @test_nvvm_prefetch
|
||||
llvm.func @test_nvvm_prefetch(%desc : !llvm.ptr, %pred : i1) {
|
||||
//CHECK: nvvm.prefetch tensormap, %{{.*}}
|
||||
nvvm.prefetch tensormap, %desc : !llvm.ptr
|
||||
//CHECK: llvm.inline_asm has_side_effects asm_dialect = att "@$1 prefetch.tensormap [$0];", "l,b"
|
||||
|
||||
68
mlir/test/Target/LLVMIR/nvvm/mbar_arr_drop_expect_tx.mlir
Normal file
68
mlir/test/Target/LLVMIR/nvvm/mbar_arr_drop_expect_tx.mlir
Normal file
@@ -0,0 +1,68 @@
|
||||
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
|
||||
|
||||
llvm.func @mbarrier_arrive_drop_expect_tx_generic(%barrier: !llvm.ptr, %txcount : i32) {
|
||||
// CHECK-LABEL: define void @mbarrier_arrive_drop_expect_tx_generic(ptr %0, i32 %1) {
|
||||
// CHECK-NEXT: %3 = addrspacecast ptr %0 to ptr addrspace(3)
|
||||
// CHECK-NEXT: %4 = call i64 @llvm.nvvm.mbarrier.arrive.drop.expect.tx.scope.cta.space.cta(ptr addrspace(3) %3, i32 %1)
|
||||
// CHECK-NEXT: %5 = addrspacecast ptr %0 to ptr addrspace(3)
|
||||
// CHECK-NEXT: %6 = call i64 @llvm.nvvm.mbarrier.arrive.drop.expect.tx.scope.cta.space.cta(ptr addrspace(3) %5, i32 %1)
|
||||
// CHECK-NEXT: %7 = addrspacecast ptr %0 to ptr addrspace(3)
|
||||
// CHECK-NEXT: %8 = call i64 @llvm.nvvm.mbarrier.arrive.drop.expect.tx.scope.cluster.space.cta(ptr addrspace(3) %7, i32 %1)
|
||||
// CHECK-NEXT: %9 = addrspacecast ptr %0 to ptr addrspace(3)
|
||||
// CHECK-NEXT: %10 = call i64 @llvm.nvvm.mbarrier.arrive.drop.expect.tx.relaxed.scope.cta.space.cta(ptr addrspace(3) %9, i32 %1)
|
||||
// CHECK-NEXT: %11 = addrspacecast ptr %0 to ptr addrspace(3)
|
||||
// CHECK-NEXT: %12 = call i64 @llvm.nvvm.mbarrier.arrive.drop.expect.tx.relaxed.scope.cta.space.cta(ptr addrspace(3) %11, i32 %1)
|
||||
// CHECK-NEXT: %13 = addrspacecast ptr %0 to ptr addrspace(3)
|
||||
// CHECK-NEXT: %14 = call i64 @llvm.nvvm.mbarrier.arrive.drop.expect.tx.relaxed.scope.cluster.space.cta(ptr addrspace(3) %13, i32 %1)
|
||||
// CHECK-NEXT: ret void
|
||||
// CHECK-NEXT: }
|
||||
%0 = nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount : !llvm.ptr, i32 -> i64
|
||||
%1 = nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount {scope = #nvvm.mem_scope<cta>} : !llvm.ptr, i32 -> i64
|
||||
%2 = nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount {scope = #nvvm.mem_scope<cluster>} : !llvm.ptr, i32 -> i64
|
||||
|
||||
%3 = nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount {relaxed = true} : !llvm.ptr, i32 -> i64
|
||||
%4 = nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount {scope = #nvvm.mem_scope<cta>, relaxed = true} : !llvm.ptr, i32 -> i64
|
||||
%5 = nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount {scope = #nvvm.mem_scope<cluster>, relaxed = true} : !llvm.ptr, i32 -> i64
|
||||
llvm.return
|
||||
}
|
||||
|
||||
llvm.func @mbarrier_arrive_drop_expect_tx_shared(%barrier: !llvm.ptr<3>, %txcount : i32) {
|
||||
// CHECK-LABEL: define void @mbarrier_arrive_drop_expect_tx_shared(ptr addrspace(3) %0, i32 %1) {
|
||||
// CHECK-NEXT: %3 = call i64 @llvm.nvvm.mbarrier.arrive.drop.expect.tx.scope.cta.space.cta(ptr addrspace(3) %0, i32 %1)
|
||||
// CHECK-NEXT: %4 = call i64 @llvm.nvvm.mbarrier.arrive.drop.expect.tx.scope.cta.space.cta(ptr addrspace(3) %0, i32 %1)
|
||||
// CHECK-NEXT: %5 = call i64 @llvm.nvvm.mbarrier.arrive.drop.expect.tx.scope.cluster.space.cta(ptr addrspace(3) %0, i32 %1)
|
||||
// CHECK-NEXT: %6 = call i64 @llvm.nvvm.mbarrier.arrive.drop.expect.tx.relaxed.scope.cta.space.cta(ptr addrspace(3) %0, i32 %1)
|
||||
// CHECK-NEXT: %7 = call i64 @llvm.nvvm.mbarrier.arrive.drop.expect.tx.relaxed.scope.cta.space.cta(ptr addrspace(3) %0, i32 %1)
|
||||
// CHECK-NEXT: %8 = call i64 @llvm.nvvm.mbarrier.arrive.drop.expect.tx.relaxed.scope.cluster.space.cta(ptr addrspace(3) %0, i32 %1)
|
||||
// CHECK-NEXT: ret void
|
||||
// CHECK-NEXT: }
|
||||
%0 = nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount : !llvm.ptr<3>, i32 -> i64
|
||||
%1 = nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount {scope = #nvvm.mem_scope<cta>} : !llvm.ptr<3>, i32 -> i64
|
||||
%2 = nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount {scope = #nvvm.mem_scope<cluster>} : !llvm.ptr<3>, i32 -> i64
|
||||
|
||||
%3 = nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount {relaxed = true} : !llvm.ptr<3>, i32 -> i64
|
||||
%4 = nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount {scope = #nvvm.mem_scope<cta>, relaxed = true} : !llvm.ptr<3>, i32 -> i64
|
||||
%5 = nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount {scope = #nvvm.mem_scope<cluster>, relaxed = true} : !llvm.ptr<3>, i32 -> i64
|
||||
llvm.return
|
||||
}
|
||||
|
||||
llvm.func @mbarrier_arrive_drop_expect_tx_shared_cluster(%barrier: !llvm.ptr<7>, %txcount : i32) {
|
||||
// CHECK-LABEL: define void @mbarrier_arrive_drop_expect_tx_shared_cluster(ptr addrspace(7) %0, i32 %1) {
|
||||
// CHECK-NEXT: call void @llvm.nvvm.mbarrier.arrive.drop.expect.tx.scope.cta.space.cluster(ptr addrspace(7) %0, i32 %1)
|
||||
// CHECK-NEXT: call void @llvm.nvvm.mbarrier.arrive.drop.expect.tx.scope.cta.space.cluster(ptr addrspace(7) %0, i32 %1)
|
||||
// CHECK-NEXT: call void @llvm.nvvm.mbarrier.arrive.drop.expect.tx.scope.cluster.space.cluster(ptr addrspace(7) %0, i32 %1)
|
||||
// CHECK-NEXT: call void @llvm.nvvm.mbarrier.arrive.drop.expect.tx.relaxed.scope.cta.space.cluster(ptr addrspace(7) %0, i32 %1)
|
||||
// CHECK-NEXT: call void @llvm.nvvm.mbarrier.arrive.drop.expect.tx.relaxed.scope.cta.space.cluster(ptr addrspace(7) %0, i32 %1)
|
||||
// CHECK-NEXT: call void @llvm.nvvm.mbarrier.arrive.drop.expect.tx.relaxed.scope.cluster.space.cluster(ptr addrspace(7) %0, i32 %1)
|
||||
// CHECK-NEXT: ret void
|
||||
// CHECK-NEXT: }
|
||||
nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount : !llvm.ptr<7>, i32
|
||||
nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount {scope = #nvvm.mem_scope<cta>} : !llvm.ptr<7>, i32
|
||||
nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount {scope = #nvvm.mem_scope<cluster>} : !llvm.ptr<7>, i32
|
||||
|
||||
nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount {relaxed = true} : !llvm.ptr<7>, i32
|
||||
nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount {scope = #nvvm.mem_scope<cta>, relaxed = true} : !llvm.ptr<7>, i32
|
||||
nvvm.mbarrier.arrive_drop.expect_tx %barrier, %txcount {scope = #nvvm.mem_scope<cluster>, relaxed = true} : !llvm.ptr<7>, i32
|
||||
llvm.return
|
||||
}
|
||||
|
||||
68
mlir/test/Target/LLVMIR/nvvm/mbar_arr_expect_tx.mlir
Normal file
68
mlir/test/Target/LLVMIR/nvvm/mbar_arr_expect_tx.mlir
Normal file
@@ -0,0 +1,68 @@
|
||||
// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
|
||||
|
||||
llvm.func @mbarrier_arrive_expect_tx_generic(%barrier: !llvm.ptr, %txcount : i32) {
|
||||
// CHECK-LABEL: define void @mbarrier_arrive_expect_tx_generic(ptr %0, i32 %1) {
|
||||
// CHECK-NEXT: %3 = addrspacecast ptr %0 to ptr addrspace(3)
|
||||
// CHECK-NEXT: %4 = call i64 @llvm.nvvm.mbarrier.arrive.expect.tx.scope.cta.space.cta(ptr addrspace(3) %3, i32 %1)
|
||||
// CHECK-NEXT: %5 = addrspacecast ptr %0 to ptr addrspace(3)
|
||||
// CHECK-NEXT: %6 = call i64 @llvm.nvvm.mbarrier.arrive.expect.tx.scope.cta.space.cta(ptr addrspace(3) %5, i32 %1)
|
||||
// CHECK-NEXT: %7 = addrspacecast ptr %0 to ptr addrspace(3)
|
||||
// CHECK-NEXT: %8 = call i64 @llvm.nvvm.mbarrier.arrive.expect.tx.scope.cluster.space.cta(ptr addrspace(3) %7, i32 %1)
|
||||
// CHECK-NEXT: %9 = addrspacecast ptr %0 to ptr addrspace(3)
|
||||
// CHECK-NEXT: %10 = call i64 @llvm.nvvm.mbarrier.arrive.expect.tx.relaxed.scope.cta.space.cta(ptr addrspace(3) %9, i32 %1)
|
||||
// CHECK-NEXT: %11 = addrspacecast ptr %0 to ptr addrspace(3)
|
||||
// CHECK-NEXT: %12 = call i64 @llvm.nvvm.mbarrier.arrive.expect.tx.relaxed.scope.cta.space.cta(ptr addrspace(3) %11, i32 %1)
|
||||
// CHECK-NEXT: %13 = addrspacecast ptr %0 to ptr addrspace(3)
|
||||
// CHECK-NEXT: %14 = call i64 @llvm.nvvm.mbarrier.arrive.expect.tx.relaxed.scope.cluster.space.cta(ptr addrspace(3) %13, i32 %1)
|
||||
// CHECK-NEXT: ret void
|
||||
// CHECK-NEXT: }
|
||||
%0 = nvvm.mbarrier.arrive.expect_tx %barrier, %txcount : !llvm.ptr, i32 -> i64
|
||||
%1 = nvvm.mbarrier.arrive.expect_tx %barrier, %txcount {scope = #nvvm.mem_scope<cta>} : !llvm.ptr, i32 -> i64
|
||||
%2 = nvvm.mbarrier.arrive.expect_tx %barrier, %txcount {scope = #nvvm.mem_scope<cluster>} : !llvm.ptr, i32 -> i64
|
||||
|
||||
%3 = nvvm.mbarrier.arrive.expect_tx %barrier, %txcount {relaxed = true} : !llvm.ptr, i32 -> i64
|
||||
%4 = nvvm.mbarrier.arrive.expect_tx %barrier, %txcount {scope = #nvvm.mem_scope<cta>, relaxed = true} : !llvm.ptr, i32 -> i64
|
||||
%5 = nvvm.mbarrier.arrive.expect_tx %barrier, %txcount {scope = #nvvm.mem_scope<cluster>, relaxed = true} : !llvm.ptr, i32 -> i64
|
||||
llvm.return
|
||||
}
|
||||
|
||||
llvm.func @mbarrier_arrive_expect_tx_shared(%barrier: !llvm.ptr<3>, %txcount : i32) {
|
||||
// CHECK-LABEL: define void @mbarrier_arrive_expect_tx_shared(ptr addrspace(3) %0, i32 %1) {
|
||||
// CHECK-NEXT: %3 = call i64 @llvm.nvvm.mbarrier.arrive.expect.tx.scope.cta.space.cta(ptr addrspace(3) %0, i32 %1)
|
||||
// CHECK-NEXT: %4 = call i64 @llvm.nvvm.mbarrier.arrive.expect.tx.scope.cta.space.cta(ptr addrspace(3) %0, i32 %1)
|
||||
// CHECK-NEXT: %5 = call i64 @llvm.nvvm.mbarrier.arrive.expect.tx.scope.cluster.space.cta(ptr addrspace(3) %0, i32 %1)
|
||||
// CHECK-NEXT: %6 = call i64 @llvm.nvvm.mbarrier.arrive.expect.tx.relaxed.scope.cta.space.cta(ptr addrspace(3) %0, i32 %1)
|
||||
// CHECK-NEXT: %7 = call i64 @llvm.nvvm.mbarrier.arrive.expect.tx.relaxed.scope.cta.space.cta(ptr addrspace(3) %0, i32 %1)
|
||||
// CHECK-NEXT: %8 = call i64 @llvm.nvvm.mbarrier.arrive.expect.tx.relaxed.scope.cluster.space.cta(ptr addrspace(3) %0, i32 %1)
|
||||
// CHECK-NEXT: ret void
|
||||
// CHECK-NEXT: }
|
||||
%0 = nvvm.mbarrier.arrive.expect_tx %barrier, %txcount : !llvm.ptr<3>, i32 -> i64
|
||||
%1 = nvvm.mbarrier.arrive.expect_tx %barrier, %txcount {scope = #nvvm.mem_scope<cta>} : !llvm.ptr<3>, i32 -> i64
|
||||
%2 = nvvm.mbarrier.arrive.expect_tx %barrier, %txcount {scope = #nvvm.mem_scope<cluster>} : !llvm.ptr<3>, i32 -> i64
|
||||
|
||||
%3 = nvvm.mbarrier.arrive.expect_tx %barrier, %txcount {relaxed = true} : !llvm.ptr<3>, i32 -> i64
|
||||
%4 = nvvm.mbarrier.arrive.expect_tx %barrier, %txcount {scope = #nvvm.mem_scope<cta>, relaxed = true} : !llvm.ptr<3>, i32 -> i64
|
||||
%5 = nvvm.mbarrier.arrive.expect_tx %barrier, %txcount {scope = #nvvm.mem_scope<cluster>, relaxed = true} : !llvm.ptr<3>, i32 -> i64
|
||||
llvm.return
|
||||
}
|
||||
|
||||
llvm.func @mbarrier_arrive_expect_tx_shared_cluster(%barrier: !llvm.ptr<7>, %txcount : i32) {
|
||||
// CHECK-LABEL: define void @mbarrier_arrive_expect_tx_shared_cluster(ptr addrspace(7) %0, i32 %1) {
|
||||
// CHECK-NEXT: call void @llvm.nvvm.mbarrier.arrive.expect.tx.scope.cta.space.cluster(ptr addrspace(7) %0, i32 %1)
|
||||
// CHECK-NEXT: call void @llvm.nvvm.mbarrier.arrive.expect.tx.scope.cta.space.cluster(ptr addrspace(7) %0, i32 %1)
|
||||
// CHECK-NEXT: call void @llvm.nvvm.mbarrier.arrive.expect.tx.scope.cluster.space.cluster(ptr addrspace(7) %0, i32 %1)
|
||||
// CHECK-NEXT: call void @llvm.nvvm.mbarrier.arrive.expect.tx.relaxed.scope.cta.space.cluster(ptr addrspace(7) %0, i32 %1)
|
||||
// CHECK-NEXT: call void @llvm.nvvm.mbarrier.arrive.expect.tx.relaxed.scope.cta.space.cluster(ptr addrspace(7) %0, i32 %1)
|
||||
// CHECK-NEXT: call void @llvm.nvvm.mbarrier.arrive.expect.tx.relaxed.scope.cluster.space.cluster(ptr addrspace(7) %0, i32 %1)
|
||||
// CHECK-NEXT: ret void
|
||||
// CHECK-NEXT: }
|
||||
nvvm.mbarrier.arrive.expect_tx %barrier, %txcount : !llvm.ptr<7>, i32
|
||||
nvvm.mbarrier.arrive.expect_tx %barrier, %txcount {scope = #nvvm.mem_scope<cta>} : !llvm.ptr<7>, i32
|
||||
nvvm.mbarrier.arrive.expect_tx %barrier, %txcount {scope = #nvvm.mem_scope<cluster>} : !llvm.ptr<7>, i32
|
||||
|
||||
nvvm.mbarrier.arrive.expect_tx %barrier, %txcount {relaxed = true} : !llvm.ptr<7>, i32
|
||||
nvvm.mbarrier.arrive.expect_tx %barrier, %txcount {scope = #nvvm.mem_scope<cta>, relaxed = true} : !llvm.ptr<7>, i32
|
||||
nvvm.mbarrier.arrive.expect_tx %barrier, %txcount {scope = #nvvm.mem_scope<cluster>, relaxed = true} : !llvm.ptr<7>, i32
|
||||
llvm.return
|
||||
}
|
||||
|
||||
@@ -47,3 +47,68 @@ llvm.func @mbarrier_complete_tx_scope(%barrier: !llvm.ptr<3>, %tx_count: i32) {
|
||||
nvvm.mbarrier.complete_tx %barrier, %tx_count {scope = #nvvm.mem_scope<sys>} : !llvm.ptr<3>, i32
|
||||
llvm.return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
llvm.func @mbarrier_arr_expect_tx(%barrier: !llvm.ptr<3>, %tx_count: i32) {
|
||||
// expected-error @below {{mbarrier scope must be either CTA or Cluster}}
|
||||
%1 = nvvm.mbarrier.arrive.expect_tx %barrier, %tx_count {scope = #nvvm.mem_scope<gpu>} : !llvm.ptr<3>, i32 -> i64
|
||||
llvm.return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
llvm.func @mbarrier_arr_expect_tx_cluster(%barrier: !llvm.ptr<7>, %tx_count: i32) {
|
||||
// expected-error @below {{mbarrier in shared_cluster space cannot return any value}}
|
||||
%1 = nvvm.mbarrier.arrive.expect_tx %barrier, %tx_count {scope = #nvvm.mem_scope<cta>} : !llvm.ptr<7>, i32 -> i64
|
||||
llvm.return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
llvm.func @init_mbarrier_arrive_expect_tx_asm_ret(%barrier : !llvm.ptr<3>, %txcount : i32, %pred : i1) {
|
||||
// expected-error @below {{return-value is not supported when using predicate}}
|
||||
%1 = nvvm.mbarrier.arrive.expect_tx %barrier, %txcount, predicate = %pred : !llvm.ptr<3>, i32, i1 -> i64
|
||||
llvm.return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
llvm.func @init_mbarrier_arrive_expect_tx_asm_relaxed(%barrier : !llvm.ptr<3>, %txcount : i32, %pred : i1) {
|
||||
// expected-error @below {{mbarrier with relaxed semantics is not supported when using predicate}}
|
||||
nvvm.mbarrier.arrive.expect_tx %barrier, %txcount, predicate = %pred {relaxed = true} : !llvm.ptr<3>, i32, i1
|
||||
llvm.return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
llvm.func @init_mbarrier_arrive_expect_tx_asm_cta(%barrier : !llvm.ptr<3>, %txcount : i32, %pred : i1) {
|
||||
// expected-error @below {{mbarrier scope must be CTA when using predicate}}
|
||||
nvvm.mbarrier.arrive.expect_tx %barrier, %txcount, predicate = %pred {scope = #nvvm.mem_scope<cluster>} : !llvm.ptr<3>, i32, i1
|
||||
llvm.return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
llvm.func @init_mbarrier_arrive_expect_tx_asm_cluster(%barrier : !llvm.ptr<7>, %txcount : i32, %pred : i1) {
|
||||
// expected-error @below {{mbarrier in shared_cluster space is not supported when using predicate}}
|
||||
nvvm.mbarrier.arrive.expect_tx %barrier, %txcount, predicate = %pred : !llvm.ptr<7>, i32, i1
|
||||
llvm.return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
llvm.func @mbarrier_arr_drop_expect_tx(%barrier: !llvm.ptr<3>, %tx_count: i32) {
|
||||
// expected-error @below {{mbarrier scope must be either CTA or Cluster}}
|
||||
%1 = nvvm.mbarrier.arrive_drop.expect_tx %barrier, %tx_count {scope = #nvvm.mem_scope<gpu>} : !llvm.ptr<3>, i32 -> i64
|
||||
llvm.return
|
||||
}
|
||||
|
||||
// -----
|
||||
|
||||
llvm.func @mbarrier_arr_drop_expect_tx_cluster(%barrier: !llvm.ptr<7>, %tx_count: i32) {
|
||||
// expected-error @below {{mbarrier in shared_cluster space cannot return any value}}
|
||||
%1 = nvvm.mbarrier.arrive_drop.expect_tx %barrier, %tx_count {scope = #nvvm.mem_scope<cta>} : !llvm.ptr<7>, i32 -> i64
|
||||
llvm.return
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user