mirror of
https://github.com/intel/llvm.git
synced 2026-01-13 19:08:21 +08:00
[flang][cuda] Use PTX instruction for atomicAdd with 4xf32 (#169581)
Implementation similar to the clang one in `clang/lib/Headers/__clang_cuda_intrinsics.h`
This commit is contained in:
committed by
GitHub
parent
fd22706e93
commit
f7a9fcad99
@@ -29,6 +29,8 @@ struct CUDAIntrinsicLibrary : IntrinsicLibrary {
|
||||
template <int extent>
|
||||
fir::ExtendedValue genAtomicAddVector(mlir::Type,
|
||||
llvm::ArrayRef<fir::ExtendedValue>);
|
||||
fir::ExtendedValue genAtomicAddVector4x4(mlir::Type,
|
||||
llvm::ArrayRef<fir::ExtendedValue>);
|
||||
mlir::Value genAtomicAnd(mlir::Type, llvm::ArrayRef<mlir::Value>);
|
||||
fir::ExtendedValue genAtomicCas(mlir::Type,
|
||||
llvm::ArrayRef<fir::ExtendedValue>);
|
||||
|
||||
@@ -195,7 +195,7 @@ static constexpr IntrinsicHandler cudaHandlers[]{
|
||||
false},
|
||||
{"atomicadd_r4x4",
|
||||
static_cast<CUDAIntrinsicLibrary::ExtendedGenerator>(
|
||||
&CI::genAtomicAddVector<4>),
|
||||
&CI::genAtomicAddVector4x4),
|
||||
{{{"a", asAddr}, {"v", asAddr}}},
|
||||
false},
|
||||
{"atomicaddd",
|
||||
@@ -758,6 +758,56 @@ fir::ExtendedValue CUDAIntrinsicLibrary::genAtomicAddVector(
|
||||
return fir::ArrayBoxValue(res, {ext});
|
||||
}
|
||||
|
||||
// ATOMICADDVECTOR4x4
|
||||
fir::ExtendedValue CUDAIntrinsicLibrary::genAtomicAddVector4x4(
|
||||
mlir::Type resultType, llvm::ArrayRef<fir::ExtendedValue> args) {
|
||||
assert(args.size() == 2);
|
||||
mlir::Value a = fir::getBase(args[0]);
|
||||
if (mlir::isa<fir::BaseBoxType>(a.getType()))
|
||||
a = fir::BoxAddrOp::create(builder, loc, a);
|
||||
|
||||
const unsigned extent = 4;
|
||||
auto llvmPtrTy = mlir::LLVM::LLVMPointerType::get(builder.getContext());
|
||||
mlir::Value ptr = builder.createConvert(loc, llvmPtrTy, a);
|
||||
mlir::Type f32Ty = builder.getF32Type();
|
||||
mlir::Type idxTy = builder.getIndexType();
|
||||
mlir::Type refTy = fir::ReferenceType::get(f32Ty);
|
||||
llvm::SmallVector<mlir::Value> values;
|
||||
for (unsigned i = 0; i < extent; ++i) {
|
||||
mlir::Value pos = builder.createIntegerConstant(loc, idxTy, i);
|
||||
mlir::Value coord = fir::CoordinateOp::create(builder, loc, refTy,
|
||||
fir::getBase(args[1]), pos);
|
||||
mlir::Value value = fir::LoadOp::create(builder, loc, coord);
|
||||
values.push_back(value);
|
||||
}
|
||||
|
||||
auto inlinePtx = mlir::NVVM::InlinePtxOp::create(
|
||||
builder, loc, {f32Ty, f32Ty, f32Ty, f32Ty},
|
||||
{ptr, values[0], values[1], values[2], values[3]}, {},
|
||||
"atom.add.v4.f32 {%0, %1, %2, %3}, [%4], {%5, %6, %7, %8};", {});
|
||||
|
||||
llvm::SmallVector<mlir::Value> results;
|
||||
results.push_back(inlinePtx.getResult(0));
|
||||
results.push_back(inlinePtx.getResult(1));
|
||||
results.push_back(inlinePtx.getResult(2));
|
||||
results.push_back(inlinePtx.getResult(3));
|
||||
|
||||
mlir::Type vecF32Ty = mlir::VectorType::get({extent}, f32Ty);
|
||||
mlir::Value undef = mlir::LLVM::UndefOp::create(builder, loc, vecF32Ty);
|
||||
mlir::Type i32Ty = builder.getI32Type();
|
||||
for (unsigned i = 0; i < extent; ++i)
|
||||
undef = mlir::LLVM::InsertElementOp::create(
|
||||
builder, loc, undef, results[i],
|
||||
builder.createIntegerConstant(loc, i32Ty, i));
|
||||
|
||||
auto i128Ty = builder.getIntegerType(128);
|
||||
auto i128VecTy = mlir::VectorType::get({1}, i128Ty);
|
||||
mlir::Value vec128 =
|
||||
mlir::vector::BitCastOp::create(builder, loc, i128VecTy, undef);
|
||||
return mlir::vector::ExtractOp::create(builder, loc, vec128,
|
||||
mlir::ArrayRef<int64_t>{0});
|
||||
}
|
||||
|
||||
mlir::Value
|
||||
CUDAIntrinsicLibrary::genAtomicAnd(mlir::Type resultType,
|
||||
llvm::ArrayRef<mlir::Value> args) {
|
||||
|
||||
@@ -32,4 +32,4 @@ attributes(global) subroutine test_atomicadd_r4x4()
|
||||
end subroutine
|
||||
|
||||
! CHECK-LABEL: func.func @_QPtest_atomicadd_r4x4() attributes {cuf.proc_attr = #cuf.cuda_proc<global>}
|
||||
! CHECK: llvm.atomicrmw fadd %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, vector<4xf32>
|
||||
! CHECK: atom.add.v4.f32
|
||||
|
||||
Reference in New Issue
Block a user