[mlir][sparse][gpu] unify dnmat and dnvec handle and ops

Reviewed By: aartbik

Differential Revision: https://reviews.llvm.org/D152465
This commit is contained in:
Kun Wu
2023-06-09 16:58:44 +00:00
parent 853d704fd0
commit 97f4c22b3a
12 changed files with 164 additions and 271 deletions

View File

@@ -116,17 +116,11 @@ def GPU_SparseEnvHandle :
"sparse environment handle type">,
BuildableType<"mlir::gpu::SparseEnvHandleType::get($_builder.getContext())">;
def GPU_SparseDnVecHandle :
def GPU_SparseDnTensorHandle :
DialectType<GPU_Dialect,
CPred<"llvm::isa<::mlir::gpu::SparseDnVecHandleType>($_self)">,
"dense vector handle type">,
BuildableType<"mlir::gpu::SparseDnVecHandleType::get($_builder.getContext())">;
def GPU_SparseDnMatHandle :
DialectType<GPU_Dialect,
CPred<"llvm::isa<::mlir::gpu::SparseDnMatHandleType>($_self)">,
"dense matrix handle type">,
BuildableType<"mlir::gpu::SparseDnMatHandleType::get($_builder.getContext())">;
CPred<"llvm::isa<::mlir::gpu::SparseDnTensorHandleType>($_self)">,
"dense tensor handle type">,
BuildableType<"mlir::gpu::SparseDnTensorHandleType::get($_builder.getContext())">;
def GPU_SparseSpMatHandle :
DialectType<GPU_Dialect,

View File

@@ -165,7 +165,7 @@ public:
void addAsyncDependency(Operation *op, Value token);
// Handle types for sparse.
enum class SparseHandleKind { Env, DnVec, DnMat, SpMat };
enum class SparseHandleKind { Env, SpMat, DnTensor };
template <SparseHandleKind K>
class SparseHandleType
@@ -177,8 +177,7 @@ public:
};
using SparseEnvHandleType = SparseHandleType<SparseHandleKind::Env>;
using SparseDnVecHandleType = SparseHandleType<SparseHandleKind::DnVec>;
using SparseDnMatHandleType = SparseHandleType<SparseHandleKind::DnMat>;
using SparseDnTensorHandleType = SparseHandleType<SparseHandleKind::DnTensor>;
using SparseSpMatHandleType = SparseHandleType<SparseHandleKind::SpMat>;
} // namespace gpu

View File

@@ -1597,73 +1597,13 @@ def GPU_DestroySparseEnvOp : GPU_Op<
}];
}
def GPU_CreateDnVecOp : GPU_Op<"create_dn_vec", [GPU_AsyncOpInterface]> {
let summary = "Create dense vector operation";
def GPU_CreateDnTensorOp : GPU_Op<"create_dn_tensor", [GPU_AsyncOpInterface, AttrSizedOperandSegments]> {
let summary = "Create dense tensor operation";
let description = [{
The `gpu.create_dn_vec` operation initializes a dense vector from
the given values buffer and size. The buffer must already be copied
from the host to the device prior to using this operation. The
operation returns a handle to the dense vector descriptor.
If the `async` keyword is present, the op is executed asynchronously (i.e.
it does not block until the execution has finished on the device). In
that case, it returns a !gpu.async.token in addition to the environment.
Example:
```mlir
%dvec, %token = gpu.create_dn_vec async [%dep] %env, %mem, %size : memref<?xf64>
```
}];
let arguments = (ins Variadic<GPU_AsyncToken>:$asyncDependencies,
GPU_SparseEnvHandle:$env,
AnyMemRef:$memref,
Index:$size);
let results = (outs Res<GPU_SparseDnVecHandle>:$dvec,
Optional<GPU_AsyncToken>:$asyncToken);
let assemblyFormat = [{
custom<AsyncDependencies>(type($asyncToken), $asyncDependencies)
$env `,` $memref `,` $size attr-dict `:` type($memref)
}];
}
def GPU_DestroyDnVecOp : GPU_Op<"destroy_dn_vec", [GPU_AsyncOpInterface]> {
let summary = "Destroy dense vector operation";
let description = [{
The `gpu.destroy_dn_vec` operation releases all resources of a dense
vector represented by a handle that was previously created by a
`gpu.create_dn_vec` operation.
If the `async` keyword is present, the op is executed asynchronously (i.e.
it does not block until the execution has finished on the device). In
that case, it returns a !gpu.async.token in addition to the environment.
Example:
```mlir
%token = gpu.destroy_dn_vec async [%dep] %dvec
```
}];
let arguments = (ins Variadic<GPU_AsyncToken>:$asyncDependencies,
Arg<GPU_SparseDnVecHandle>:$dvec);
let results = (outs Optional<GPU_AsyncToken>:$asyncToken);
let assemblyFormat = [{
custom<AsyncDependencies>(type($asyncToken), $asyncDependencies)
$dvec attr-dict
}];
}
def GPU_CreateDnMatOp : GPU_Op<"create_dn_mat", [GPU_AsyncOpInterface]> {
let summary = "Create dense matrix operation";
let description = [{
The `gpu.create_dn_mat` operation initializes a dense matrix from
The `gpu.create_dn_tensor` operation initializes a dense tensor from
the given values buffer and sizes. The buffer must already be copied
from the host to the device prior to using this operation. The
operation returns a handle to the dense matrix descriptor.
operation returns a handle to the dense tensor descriptor.
If the `async` keyword is present, the op is executed asynchronously (i.e.
it does not block until the execution has finished on the device). In
@@ -1672,29 +1612,28 @@ def GPU_CreateDnMatOp : GPU_Op<"create_dn_mat", [GPU_AsyncOpInterface]> {
Example:
```mlir
%dmat, %token = gpu.create_dn_mat async [%dep] %env, %rows, %cols, %mem : memref<?xf64>
%dmat, %token = gpu.create_dn_tensor async [%dep] %env, %mem, %dims : index, index into memref<?xf64>
```
}];
let arguments = (ins Variadic<GPU_AsyncToken>:$asyncDependencies,
GPU_SparseEnvHandle:$env,
Index:$rows,
Index:$cols,
AnyMemRef:$memref);
let results = (outs Res<GPU_SparseDnMatHandle>:$dmat, Optional<GPU_AsyncToken>:$asyncToken);
AnyMemRef:$memref,
Variadic<Index>:$dims);
let results = (outs Res<GPU_SparseDnTensorHandle>:$dnTensor, Optional<GPU_AsyncToken>:$asyncToken);
let assemblyFormat = [{
custom<AsyncDependencies>(type($asyncToken), $asyncDependencies)
$env `,` $rows `,` $cols `,` $memref attr-dict `:` type($memref)
$env `,` $memref `,` $dims attr-dict `:` type($dims) `into` type($memref)
}];
}
def GPU_DestroyDnMatOp : GPU_Op<"destroy_dn_mat", [GPU_AsyncOpInterface]> {
let summary = "Destroy dense matrix operation";
def GPU_DestroyDnTensorOp : GPU_Op<"destroy_dn_tensor", [GPU_AsyncOpInterface]> {
let summary = "Destroy dense tensor operation";
let description = [{
The `gpu.destroy_dn_mat` operation releases all resources of a dense
matrix represented by a handle that was previously created by a
`gpu.create_dn_mat` operation.
The `gpu.destroy_dn_tensor` operation releases all resources of a dense
tensor represented by a handle that was previously created by a
`gpu.create_dn_tensor` operation.
If the `async` keyword is present, the op is executed asynchronously (i.e.
it does not block until the execution has finished on the device). In
@@ -1703,17 +1642,17 @@ def GPU_DestroyDnMatOp : GPU_Op<"destroy_dn_mat", [GPU_AsyncOpInterface]> {
Example:
```mlir
%token = gpu.destroy_dn_vec async [%dep] %dmat
%token = gpu.destroy_dn_tensor async [%dep] %dnTensor
```
}];
let arguments = (ins Variadic<GPU_AsyncToken>:$asyncDependencies,
Arg<GPU_SparseDnMatHandle>:$dmat);
Arg<GPU_SparseDnTensorHandle>:$dnTensor);
let results = (outs Optional<GPU_AsyncToken>:$asyncToken);
let assemblyFormat = [{
custom<AsyncDependencies>(type($asyncToken), $asyncDependencies)
$dmat attr-dict
$dnTensor attr-dict
}];
}
@@ -1945,8 +1884,8 @@ def GPU_SpMVBufferSizeOp : GPU_Op<"spmv_buffer_size", [GPU_AsyncOpInterface]> {
GPU_SparseEnvHandle:$env,
GPU_TransposeModeAttr:$modeA,
GPU_SparseSpMatHandle:$spmatA,
GPU_SparseDnVecHandle:$dnX,
GPU_SparseDnVecHandle:$dnY,
GPU_SparseDnTensorHandle:$dnX,
GPU_SparseDnTensorHandle:$dnY,
TypeAttr:$computeType);
let results = (outs Res<Index>:$bufferSz,
Optional<GPU_AsyncToken>:$asyncToken);
@@ -1998,8 +1937,8 @@ def GPU_SpMVOp : GPU_Op<"spmv", [GPU_AsyncOpInterface]> {
GPU_SparseEnvHandle:$env,
GPU_TransposeModeAttr:$modeA,
GPU_SparseSpMatHandle:$spmatA,
GPU_SparseDnVecHandle:$dnX,
GPU_SparseDnVecHandle:$dnY,
GPU_SparseDnTensorHandle:$dnX,
GPU_SparseDnTensorHandle:$dnY,
TypeAttr:$computeType,
AnyMemRef:$buffer);
let results = (outs Optional<GPU_AsyncToken>:$asyncToken);
@@ -2052,8 +1991,8 @@ def GPU_SpMMBufferSizeOp : GPU_Op<"spmm_buffer_size", [GPU_AsyncOpInterface]> {
GPU_TransposeModeAttr:$modeA,
GPU_TransposeModeAttr:$modeB,
GPU_SparseSpMatHandle:$spmatA,
GPU_SparseDnMatHandle:$dnmatB,
GPU_SparseDnMatHandle:$dnmatC,
GPU_SparseDnTensorHandle:$dnmatB,
GPU_SparseDnTensorHandle:$dnmatC,
TypeAttr:$computeType);
let results = (outs Res<AnyTypeOf<[Index, TupleOf<[Index, Index,
Index]>]>>:$bufferSzs,
@@ -2108,8 +2047,8 @@ def GPU_SpMMOp : GPU_Op<"spmm", [GPU_AsyncOpInterface, AttrSizedOperandSegments]
GPU_TransposeModeAttr:$modeA,
GPU_TransposeModeAttr:$modeB,
GPU_SparseSpMatHandle:$spmatA,
GPU_SparseDnMatHandle:$dnmatB,
GPU_SparseDnMatHandle:$dnmatC,
GPU_SparseDnTensorHandle:$dnmatB,
GPU_SparseDnTensorHandle:$dnmatC,
TypeAttr:$computeType,
Variadic<AnyMemRef>:$buffers);
let results = (outs Optional<GPU_AsyncToken>:$asyncToken);
@@ -2162,8 +2101,8 @@ def GPU_SDDMMBufferSizeOp : GPU_Op<"sddmm_buffer_size", [GPU_AsyncOpInterface]>
GPU_SparseEnvHandle:$env,
GPU_TransposeModeAttr:$modeA,
GPU_TransposeModeAttr:$modeB,
GPU_SparseDnMatHandle:$dnmatA,
GPU_SparseDnMatHandle:$dnmatB,
GPU_SparseDnTensorHandle:$dnmatA,
GPU_SparseDnTensorHandle:$dnmatB,
GPU_SparseSpMatHandle:$spmatC,
TypeAttr:$computeType);
let results = (outs Res<Index>:$bufferSz, Optional<GPU_AsyncToken>:$asyncToken);
@@ -2216,8 +2155,8 @@ def GPU_SDDMMOp : GPU_Op<"sddmm", [GPU_AsyncOpInterface]> {
GPU_SparseEnvHandle:$env,
GPU_TransposeModeAttr:$modeA,
GPU_TransposeModeAttr:$modeB,
GPU_SparseDnMatHandle:$dnmatA,
GPU_SparseDnMatHandle:$dnmatB,
GPU_SparseDnTensorHandle:$dnmatA,
GPU_SparseDnTensorHandle:$dnmatB,
GPU_SparseSpMatHandle:$spmatC,
TypeAttr:$computeType,
AnyMemRef:$buffer);

View File

@@ -548,51 +548,31 @@ private:
ConversionPatternRewriter &rewriter) const override;
};
class ConvertCreateDnVecOpToGpuRuntimeCallPattern
: public ConvertOpToGpuRuntimeCallPattern<gpu::CreateDnVecOp> {
class ConvertCreateDnTensorOpToGpuRuntimeCallPattern
: public ConvertOpToGpuRuntimeCallPattern<gpu::CreateDnTensorOp> {
public:
ConvertCreateDnVecOpToGpuRuntimeCallPattern(LLVMTypeConverter &typeConverter)
: ConvertOpToGpuRuntimeCallPattern<gpu::CreateDnVecOp>(typeConverter) {}
ConvertCreateDnTensorOpToGpuRuntimeCallPattern(
LLVMTypeConverter &typeConverter)
: ConvertOpToGpuRuntimeCallPattern<gpu::CreateDnTensorOp>(typeConverter) {
}
private:
LogicalResult
matchAndRewrite(gpu::CreateDnVecOp op, OpAdaptor adaptor,
matchAndRewrite(gpu::CreateDnTensorOp op, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override;
};
class ConvertDestroyDnVecOpToGpuRuntimeCallPattern
: public ConvertOpToGpuRuntimeCallPattern<gpu::DestroyDnVecOp> {
class ConvertDestroyDnTensorOpToGpuRuntimeCallPattern
: public ConvertOpToGpuRuntimeCallPattern<gpu::DestroyDnTensorOp> {
public:
ConvertDestroyDnVecOpToGpuRuntimeCallPattern(LLVMTypeConverter &typeConverter)
: ConvertOpToGpuRuntimeCallPattern<gpu::DestroyDnVecOp>(typeConverter) {}
ConvertDestroyDnTensorOpToGpuRuntimeCallPattern(
LLVMTypeConverter &typeConverter)
: ConvertOpToGpuRuntimeCallPattern<gpu::DestroyDnTensorOp>(
typeConverter) {}
private:
LogicalResult
matchAndRewrite(gpu::DestroyDnVecOp op, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override;
};
class ConvertCreateDnMatOpToGpuRuntimeCallPattern
: public ConvertOpToGpuRuntimeCallPattern<gpu::CreateDnMatOp> {
public:
ConvertCreateDnMatOpToGpuRuntimeCallPattern(LLVMTypeConverter &typeConverter)
: ConvertOpToGpuRuntimeCallPattern<gpu::CreateDnMatOp>(typeConverter) {}
private:
LogicalResult
matchAndRewrite(gpu::CreateDnMatOp op, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override;
};
class ConvertDestroyDnMatOpToGpuRuntimeCallPattern
: public ConvertOpToGpuRuntimeCallPattern<gpu::DestroyDnMatOp> {
public:
ConvertDestroyDnMatOpToGpuRuntimeCallPattern(LLVMTypeConverter &typeConverter)
: ConvertOpToGpuRuntimeCallPattern<gpu::DestroyDnMatOp>(typeConverter) {}
private:
LogicalResult
matchAndRewrite(gpu::DestroyDnMatOp op, OpAdaptor adaptor,
matchAndRewrite(gpu::DestroyDnTensorOp op, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const override;
};
@@ -1474,102 +1454,90 @@ LogicalResult ConvertDestroySparseEnvOpToGpuRuntimeCallPattern::matchAndRewrite(
return success();
}
LogicalResult ConvertCreateDnVecOpToGpuRuntimeCallPattern::matchAndRewrite(
gpu::CreateDnVecOp op, OpAdaptor adaptor,
LogicalResult ConvertCreateDnTensorOpToGpuRuntimeCallPattern::matchAndRewrite(
gpu::CreateDnTensorOp op, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const {
if (failed(areAllLLVMTypes(op, adaptor.getOperands(), rewriter)) ||
failed(isAsyncWithOneDependency(rewriter, op)))
return failure();
Location loc = op.getLoc();
auto stream = adaptor.getAsyncDependencies().front();
Value pVec =
Value pTensor =
MemRefDescriptor(adaptor.getMemref()).allocatedPtr(rewriter, loc);
if (!getTypeConverter()->useOpaquePointers())
pVec = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pVec);
pTensor = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pTensor);
Type dType = op.getMemref().getType().getElementType();
auto dtp = genConstInt32From(rewriter, loc, getCuSparseDataTypeFrom(dType));
auto handle =
createDnVecCallBuilder
.create(loc, rewriter, {adaptor.getSize(), pVec, dtp, stream})
.getResult();
rewriter.replaceOp(op, {handle, stream});
return success();
}
LogicalResult ConvertDestroyDnVecOpToGpuRuntimeCallPattern::matchAndRewrite(
gpu::DestroyDnVecOp op, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const {
if (failed(areAllLLVMTypes(op, adaptor.getOperands(), rewriter)) ||
failed(isAsyncWithOneDependency(rewriter, op)))
return failure();
Location loc = op.getLoc();
auto stream = adaptor.getAsyncDependencies().front();
destroyDnVecCallBuilder.create(loc, rewriter, {adaptor.getDvec(), stream});
rewriter.replaceOp(op, {stream});
return success();
}
SmallVector<Value, 4> dims;
for (Value dim : adaptor.getDims()) {
dims.push_back(dim);
}
LogicalResult ConvertCreateDnMatOpToGpuRuntimeCallPattern::matchAndRewrite(
gpu::CreateDnMatOp op, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const {
if (failed(areAllLLVMTypes(op, adaptor.getOperands(), rewriter)) ||
failed(isAsyncWithOneDependency(rewriter, op)))
return failure();
Location loc = op.getLoc();
auto stream = adaptor.getAsyncDependencies().front();
Value pMat =
MemRefDescriptor(adaptor.getMemref()).allocatedPtr(rewriter, loc);
if (!getTypeConverter()->useOpaquePointers())
pMat = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, pMat);
Type dType = op.getMemref().getType().getElementType();
auto dtp = genConstInt32From(rewriter, loc, getCuSparseDataTypeFrom(dType));
Value handle;
// TODO: For now, we track the use of the handle and lower it to cusparse /
// cusparseLt accordingly. If in a block, both cusparse and cusparseLt are
// used, we require two separate Creation ops to be the correct logic. In
// future, we may add support to using one handle in sparse tensor / GPU
// dialect in both cusparse and cusparseLt. use the cusparseLt create call if
// the dnmat is used with spmat with 2:4 sparsity
Value handle;
if (isSpMMCusparseLtOp(op.getDmat())) {
auto envHandle = adaptor.getEnv();
AssertSparseLTDnMatHandleSizeCallBuilder.create(loc, rewriter, {});
auto handleSz = rewriter.create<LLVM::ConstantOp>(
loc, getIndexType(), rewriter.getIndexAttr(11032));
handle = rewriter.create<LLVM::AllocaOp>(loc, llvmInt8PointerType,
llvmInt8Type, handleSz);
handle = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, handle);
if (dims.size() == 2) {
if (isSpMMCusparseLtOp(op.getDnTensor())) {
auto envHandle = adaptor.getEnv();
AssertSparseLTDnMatHandleSizeCallBuilder.create(loc, rewriter, {});
auto handleSz = rewriter.create<LLVM::ConstantOp>(
loc, getIndexType(), rewriter.getIndexAttr(11032));
handle = rewriter.create<LLVM::AllocaOp>(loc, llvmInt8PointerType,
llvmInt8Type, handleSz);
handle = rewriter.create<LLVM::BitcastOp>(loc, llvmPointerType, handle);
createLtDnMatCallBuilder
.create(loc, rewriter,
{handle, envHandle, adaptor.getRows(), adaptor.getCols(), pMat,
dtp, stream})
.getResult();
createLtDnMatCallBuilder
.create(loc, rewriter,
{handle, envHandle, dims[0], dims[1], pTensor, dtp, stream})
.getResult();
} else {
handle =
createDnMatCallBuilder
.create(loc, rewriter, {dims[0], dims[1], pTensor, dtp, stream})
.getResult();
}
} else {
handle =
createDnMatCallBuilder
.create(loc, rewriter,
{adaptor.getRows(), adaptor.getCols(), pMat, dtp, stream})
.getResult();
assert(dims.size() == 1 && "Only 1D and 2D tensors are supported");
handle = createDnVecCallBuilder
.create(loc, rewriter, {dims[0], pTensor, dtp, stream})
.getResult();
}
rewriter.replaceOp(op, {handle, stream});
return success();
}
LogicalResult ConvertDestroyDnMatOpToGpuRuntimeCallPattern::matchAndRewrite(
gpu::DestroyDnMatOp op, OpAdaptor adaptor,
LogicalResult ConvertDestroyDnTensorOpToGpuRuntimeCallPattern::matchAndRewrite(
gpu::DestroyDnTensorOp op, OpAdaptor adaptor,
ConversionPatternRewriter &rewriter) const {
if (failed(areAllLLVMTypes(op, adaptor.getOperands(), rewriter)) ||
failed(isAsyncWithOneDependency(rewriter, op)))
return failure();
Location loc = op.getLoc();
auto stream = adaptor.getAsyncDependencies().front();
// Use the cusparseLt destroy call if the dnmat is used with spmat with
// 2:4 sparsity
if (isSpMMCusparseLtOp(op.getDmat())) {
destroyCuSparseLtDnMatBuilder.create(loc, rewriter,
{adaptor.getDmat(), stream});
auto definingOp = op.getDnTensor().getDefiningOp<gpu::CreateDnTensorOp>();
SmallVector<Value, 4> dims;
for (Value dim : definingOp.getDims()) {
dims.push_back(dim);
}
if (dims.size() == 2) {
// Use the cusparseLt destroy call if the dnmat is used with spmat with
// 2:4 sparsity
if (isSpMMCusparseLtOp(op.getDnTensor())) {
destroyCuSparseLtDnMatBuilder.create(loc, rewriter,
{adaptor.getDnTensor(), stream});
} else {
destroyDnMatCallBuilder.create(loc, rewriter,
{adaptor.getDnTensor(), stream});
}
} else {
destroyDnMatCallBuilder.create(loc, rewriter, {adaptor.getDmat(), stream});
assert(dims.size() == 1 && "Only 1D and 2D tensors are supported");
destroyDnVecCallBuilder.create(loc, rewriter,
{adaptor.getDnTensor(), stream});
}
rewriter.replaceOp(op, {stream});
return success();
@@ -1914,8 +1882,7 @@ void mlir::populateGpuToLLVMConversionPatterns(LLVMTypeConverter &converter,
StringRef gpuBinaryAnnotation,
bool kernelBarePtrCallConv) {
addOpaquePointerConversion<gpu::AsyncTokenType>(converter);
addOpaquePointerConversion<gpu::SparseDnVecHandleType>(converter);
addOpaquePointerConversion<gpu::SparseDnMatHandleType>(converter);
addOpaquePointerConversion<gpu::SparseDnTensorHandleType>(converter);
addOpaquePointerConversion<gpu::SparseSpMatHandleType>(converter);
addOpaquePointerConversion<gpu::SparseEnvHandleType>(converter);
@@ -1931,10 +1898,8 @@ void mlir::populateGpuToLLVMConversionPatterns(LLVMTypeConverter &converter,
ConvertAsyncYieldToGpuRuntimeCallPattern,
ConvertCreateSparseEnvOpToGpuRuntimeCallPattern,
ConvertDestroySparseEnvOpToGpuRuntimeCallPattern,
ConvertCreateDnVecOpToGpuRuntimeCallPattern,
ConvertDestroyDnVecOpToGpuRuntimeCallPattern,
ConvertCreateDnMatOpToGpuRuntimeCallPattern,
ConvertDestroyDnMatOpToGpuRuntimeCallPattern,
ConvertCreateDnTensorOpToGpuRuntimeCallPattern,
ConvertDestroyDnTensorOpToGpuRuntimeCallPattern,
ConvertCreateCooOpToGpuRuntimeCallPattern,
ConvertCreateCooAoSOpToGpuRuntimeCallPattern,
ConvertCreateCsrOpToGpuRuntimeCallPattern,

View File

@@ -147,8 +147,7 @@ void GPUDialect::initialize() {
addTypes<AsyncTokenType>();
addTypes<MMAMatrixType>();
addTypes<SparseEnvHandleType>();
addTypes<SparseDnVecHandleType>();
addTypes<SparseDnMatHandleType>();
addTypes<SparseDnTensorHandleType>();
addTypes<SparseSpMatHandleType>();
addOperations<
#define GET_OP_LIST
@@ -165,10 +164,8 @@ static std::string getSparseHandleKeyword(SparseHandleKind kind) {
switch (kind) {
case SparseHandleKind::Env:
return "sparse.env_handle";
case SparseHandleKind::DnVec:
return "sparse.dnvec_handle";
case SparseHandleKind::DnMat:
return "sparse.dnmat_handle";
case SparseHandleKind::DnTensor:
return "sparse.dntensor_handle";
case SparseHandleKind::SpMat:
return "sparse.spmat_handle";
}
@@ -221,10 +218,8 @@ Type GPUDialect::parseType(DialectAsmParser &parser) const {
if (keyword == getSparseHandleKeyword(SparseHandleKind::Env))
return SparseEnvHandleType::get(context);
if (keyword == getSparseHandleKeyword(SparseHandleKind::DnVec))
return SparseDnVecHandleType::get(context);
if (keyword == getSparseHandleKeyword(SparseHandleKind::DnMat))
return SparseDnMatHandleType::get(context);
if (keyword == getSparseHandleKeyword(SparseHandleKind::DnTensor))
return SparseDnTensorHandleType::get(context);
if (keyword == getSparseHandleKeyword(SparseHandleKind::SpMat))
return SparseSpMatHandleType::get(context);
@@ -238,10 +233,9 @@ void GPUDialect::printType(Type type, DialectAsmPrinter &os) const {
.Case<AsyncTokenType>([&](Type) { os << "async.token"; })
.Case<SparseEnvHandleType>(
[&](Type) { os << getSparseHandleKeyword(SparseHandleKind::Env); })
.Case<SparseDnVecHandleType>(
[&](Type) { os << getSparseHandleKeyword(SparseHandleKind::DnVec); })
.Case<SparseDnMatHandleType>(
[&](Type) { os << getSparseHandleKeyword(SparseHandleKind::DnMat); })
.Case<SparseDnTensorHandleType>([&](Type) {
os << getSparseHandleKeyword(SparseHandleKind::DnTensor);
})
.Case<SparseSpMatHandleType>(
[&](Type) { os << getSparseHandleKeyword(SparseHandleKind::SpMat); })
.Case<MMAMatrixType>([&](MMAMatrixType fragTy) {

View File

@@ -450,7 +450,7 @@ static LogicalResult rewriteSpMV(PatternRewriter &rewriter,
// Create sparse environment and sparse matrix/dense vector handles.
Type indexTp = rewriter.getIndexType();
Type envHandleTp = rewriter.getType<gpu::SparseEnvHandleType>();
Type dnVecHandleTp = rewriter.getType<gpu::SparseDnVecHandleType>();
Type dnTensorHandleTp = rewriter.getType<gpu::SparseDnTensorHandleType>();
Type spmatHandleTp = rewriter.getType<gpu::SparseSpMatHandleType>();
Type tokenTp = rewriter.getType<gpu::AsyncTokenType>();
Value token = genFirstWait(rewriter, loc);
@@ -463,12 +463,12 @@ static LogicalResult rewriteSpMV(PatternRewriter &rewriter,
rowA, colA, valA, isCOO, enableRT);
Value spMatA = spGenA->getResult(0);
token = spGenA->getResult(1);
auto dvecX = rewriter.create<gpu::CreateDnVecOp>(loc, dnVecHandleTp, tokenTp,
token, handle, vecX, szX);
auto dvecX = rewriter.create<gpu::CreateDnTensorOp>(
loc, dnTensorHandleTp, tokenTp, token, handle, vecX, szX);
Value dnX = dvecX.getResult(0);
token = dvecX.getAsyncToken();
auto dvecY = rewriter.create<gpu::CreateDnVecOp>(loc, dnVecHandleTp, tokenTp,
token, handle, vecY, szY);
auto dvecY = rewriter.create<gpu::CreateDnTensorOp>(
loc, dnTensorHandleTp, tokenTp, token, handle, vecY, szY);
Value dnY = dvecY.getResult(0);
token = dvecY.getAsyncToken();
@@ -493,9 +493,9 @@ static LogicalResult rewriteSpMV(PatternRewriter &rewriter,
// Copy data back to host and free all the resoures.
token = rewriter.create<gpu::DestroySpMatOp>(loc, tokenTp, token, spMatA)
.getAsyncToken();
token = rewriter.create<gpu::DestroyDnVecOp>(loc, tokenTp, token, dnX)
token = rewriter.create<gpu::DestroyDnTensorOp>(loc, tokenTp, token, dnX)
.getAsyncToken();
token = rewriter.create<gpu::DestroyDnVecOp>(loc, tokenTp, token, dnY)
token = rewriter.create<gpu::DestroyDnTensorOp>(loc, tokenTp, token, dnY)
.getAsyncToken();
token = rewriter.create<gpu::DestroySparseEnvOp>(loc, tokenTp, token, handle)
.getAsyncToken();
@@ -557,7 +557,7 @@ static LogicalResult rewriteSpMM(PatternRewriter &rewriter,
// Create sparse environment and sparse matrix/dense matrix handles.
Type indexTp = rewriter.getIndexType();
Type envHandleTp = rewriter.getType<gpu::SparseEnvHandleType>();
Type dnMatHandleTp = rewriter.getType<gpu::SparseDnMatHandleType>();
Type dnTensorHandleTp = rewriter.getType<gpu::SparseDnTensorHandleType>();
Type spMatHandleTp = rewriter.getType<gpu::SparseSpMatHandleType>();
Type tokenTp = rewriter.getType<gpu::AsyncTokenType>();
Value token = genFirstWait(rewriter, loc);
@@ -570,12 +570,14 @@ static LogicalResult rewriteSpMM(PatternRewriter &rewriter,
rowA, colA, valA, isCOO, enableRT);
Value spMatA = spGenA->getResult(0);
token = spGenA->getResult(1);
auto dmatB = rewriter.create<gpu::CreateDnMatOp>(
loc, dnMatHandleTp, tokenTp, token, handle, szk, szn, matB);
auto dmatB = rewriter.create<gpu::CreateDnTensorOp>(
loc, dnTensorHandleTp, tokenTp, token, handle, matB,
SmallVector<Value>{szk, szn});
Value dnB = dmatB.getResult(0);
token = dmatB.getAsyncToken();
auto dmatC = rewriter.create<gpu::CreateDnMatOp>(
loc, dnMatHandleTp, tokenTp, token, handle, szm, szn, matC);
auto dmatC = rewriter.create<gpu::CreateDnTensorOp>(
loc, dnTensorHandleTp, tokenTp, token, handle, matC,
SmallVector<Value>{szm, szn});
Value dnC = dmatC.getResult(0);
token = dmatC.getAsyncToken();
@@ -602,9 +604,9 @@ static LogicalResult rewriteSpMM(PatternRewriter &rewriter,
// Copy data back to host and free all the resoures.
token = rewriter.create<gpu::DestroySpMatOp>(loc, tokenTp, token, spMatA)
.getAsyncToken();
token = rewriter.create<gpu::DestroyDnMatOp>(loc, tokenTp, token, dnB)
token = rewriter.create<gpu::DestroyDnTensorOp>(loc, tokenTp, token, dnB)
.getAsyncToken();
token = rewriter.create<gpu::DestroyDnMatOp>(loc, tokenTp, token, dnC)
token = rewriter.create<gpu::DestroyDnTensorOp>(loc, tokenTp, token, dnC)
.getAsyncToken();
token = rewriter.create<gpu::DestroySparseEnvOp>(loc, tokenTp, token, handle)
.getAsyncToken();

View File

@@ -22,11 +22,11 @@ module attributes {gpu.container_module} {
%mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf16>
%env, %token3 = gpu.create_sparse_env async [%token2]
%spmat, %token4 = gpu.create_2to4_spmat async [%token3] %env, %arg0, %arg0, %mem1: memref<?xf16>
%dnmat, %token5 = gpu.create_dn_mat async [%token4] %env, %arg0, %arg0, %mem2 : memref<?xf16>
%dnmat, %token5 = gpu.create_dn_tensor async [%token4] %env, %mem2, %arg0, %arg0 : index, index into memref<?xf16>
%bufferSzs, %token6 = gpu.spmm_buffer_size async [%token5] %env, %spmat, %dnmat, %dnmat : tuple<index,index,index> into f16
%token7 = gpu.spmm async [%token6] %env, %spmat, %dnmat, %dnmat, %mem2, %mem2, %mem2 : memref<?xf16>,memref<?xf16>,memref<?xf16> into f16
%token8 = gpu.destroy_sp_mat async [%token7] %spmat
%token9 = gpu.destroy_dn_mat async [%token8] %dnmat
%token9 = gpu.destroy_dn_tensor async [%token8] %dnmat
%token10 = gpu.destroy_sparse_env async [%token9] %env
gpu.wait [%token10]
return

View File

@@ -22,11 +22,11 @@ module attributes {gpu.container_module} {
%mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf64>
%env, %token3 = gpu.create_sparse_env async [%token2]
%spmat, %token4 = gpu.create_coo async [%token3] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf64>
%dnvec, %token5 = gpu.create_dn_vec async [%token4] %env, %mem2, %arg0 : memref<?xf64>
%dnvec, %token5 = gpu.create_dn_tensor async [%token4] %env, %mem2, %arg0 : index into memref<?xf64>
%bufferSz, %token6 = gpu.spmv_buffer_size async [%token5] %env, %spmat, %dnvec, %dnvec into f64
%token7 = gpu.spmv async [%token6] %env, %spmat, %dnvec, %dnvec, %mem2 : memref<?xf64> into f64
%token8 = gpu.destroy_sp_mat async [%token7] %spmat
%token9 = gpu.destroy_dn_vec async [%token8] %dnvec
%token9 = gpu.destroy_dn_tensor async [%token8] %dnvec
%token10 = gpu.destroy_sparse_env async [%token9] %env
gpu.wait [%token10]
return
@@ -52,11 +52,11 @@ module attributes {gpu.container_module} {
%mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf64>
%env, %token3 = gpu.create_sparse_env async [%token2]
%spmat, %token4 = gpu.create_csr async [%token3] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf64>
%dnmat, %token5 = gpu.create_dn_mat async [%token4] %env, %arg0, %arg0, %mem2 : memref<?xf64>
%dnmat, %token5 = gpu.create_dn_tensor async [%token4] %env, %mem2, %arg0, %arg0 : index, index into memref<?xf64>
%bufferSz, %token6 = gpu.spmm_buffer_size async [%token5] %env, %spmat, %dnmat, %dnmat : index into f64
%token7 = gpu.spmm async [%token6] %env, %spmat, %dnmat, %dnmat, %mem2 : memref<?xf64> into f64
%token8 = gpu.destroy_sp_mat async [%token7] %spmat
%token9 = gpu.destroy_dn_mat async [%token8] %dnmat
%token9 = gpu.destroy_dn_tensor async [%token8] %dnmat
%token10 = gpu.destroy_sparse_env async [%token9] %env
gpu.wait [%token10]
return
@@ -82,11 +82,11 @@ module attributes {gpu.container_module} {
%mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf64>
%env, %token3 = gpu.create_sparse_env async [%token2]
%spmat, %token4 = gpu.create_csr async [%token3] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf64>
%dnmat, %token5 = gpu.create_dn_mat async [%token4] %env, %arg0, %arg0, %mem2 : memref<?xf64>
%dnmat, %token5 = gpu.create_dn_tensor async [%token4] %env, %mem2, %arg0, %arg0 : index, index into memref<?xf64>
%bufferSz, %token6 = gpu.sddmm_buffer_size async [%token5] %env, %dnmat, %dnmat, %spmat into f64
%token7 = gpu.sddmm async [%token6] %env, %dnmat, %dnmat, %spmat, %mem2 : memref<?xf64> into f64
%token8 = gpu.destroy_sp_mat async [%token7] %spmat
%token9 = gpu.destroy_dn_mat async [%token8] %dnmat
%token9 = gpu.destroy_dn_tensor async [%token8] %dnmat
%token10 = gpu.destroy_sparse_env async [%token9] %env
gpu.wait [%token10]
return

View File

@@ -332,14 +332,14 @@ module attributes {gpu.container_module} {
%spmat, %token4 = gpu.create_coo async [%token3] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf64>
// CHECK: gpu.create_csr async
%spmat2, %token5 = gpu.create_csr async [%token4] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf64>
// CHECK: gpu.create_dn_vec async
%dnvec, %token6 = gpu.create_dn_vec async [%token5] %env, %mem2, %arg0 : memref<?xf64>
// CHECK: gpu.create_dn_tensor async
%dnvec, %token6 = gpu.create_dn_tensor async [%token5] %env, %mem2, %arg0 : index into memref<?xf64>
// CHECK: gpu.spmv_buffer_size async
%bufferSz, %token7 = gpu.spmv_buffer_size async [%token6] %env, %spmat, %dnvec, %dnvec into f64
// CHECK: gpu.spmv async
%token8 = gpu.spmv async [%token7] %env, %spmat, %dnvec, %dnvec, %mem2 : memref<?xf64> into f64
// CHECK: gpu.create_dn_mat async
%dnmat, %token9 = gpu.create_dn_mat async [%token8] %env, %arg0, %arg0, %mem2 : memref<?xf64>
// CHECK: gpu.create_dn_tensor async
%dnmat, %token9 = gpu.create_dn_tensor async [%token8] %env, %mem2, %arg0, %arg0 : index, index into memref<?xf64>
// CHECK: gpu.spmm_buffer_size async
%bufferSz2, %token10 = gpu.spmm_buffer_size async [%token9] %env, %spmat, %dnmat, %dnmat : index into f64
// CHECK: gpu.spmm async
@@ -348,12 +348,12 @@ module attributes {gpu.container_module} {
%bufferSz3, %token12 = gpu.sddmm_buffer_size async [%token11] %env, %dnmat, %dnmat, %spmat into f64
// CHECK: gpu.sddmm async
%token13 = gpu.sddmm async [%token12] %env, %dnmat, %dnmat, %spmat, %mem2 : memref<?xf64> into f64
// CHECK: gpu.destroy_dn_mat async
%token14 = gpu.destroy_dn_mat async [%token13] %dnmat
// CHECK: gpu.destroy_dn_tensor async
%token14 = gpu.destroy_dn_tensor async [%token13] %dnmat
// CHECK: gpu.destroy_sp_mat async
%token15 = gpu.destroy_sp_mat async [%token14] %spmat
// CHECK: gpu.destroy_dn_vec async
%token16 = gpu.destroy_dn_vec async [%token15] %dnvec
// CHECK: gpu.destroy_dn_tensor async
%token16 = gpu.destroy_dn_tensor async [%token15] %dnvec
// CHECK: gpu.destroy_sparse_env async
%token17 = gpu.destroy_sparse_env async [%token16] %env
// CHECK: gpu.wait

View File

@@ -8,11 +8,11 @@ module attributes {gpu.container_module} {
// CHECK: %{{.*}}, %{{.*}} = gpu.alloc async [%{{.*}}] (%{{.*}}) : memref<?xf64>
// CHECK: %{{.*}}, %{{.*}} = gpu.create_sparse_env async [%{{.*}}]
// CHECK: %{{.*}}, %{{.*}} = gpu.create_coo async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<?xindex>, memref<?xindex>, memref<?xf64>
// CHECK: %{{.*}}, %{{.*}} = gpu.create_dn_vec async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}} : memref<?xf64>
// CHECK: %{{.*}}, %{{.*}} = gpu.create_dn_tensor async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}} : index into memref<?xf64>
// CHECK: %{{.*}}, %{{.*}} = gpu.spmv_buffer_size async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} into f64
// CHECK: %{{.*}} = gpu.spmv async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<?xf64> into f64
// CHECK: %{{.*}} = gpu.destroy_sp_mat async [%{{.*}}] %{{.*}}
// CHECK: %{{.*}} = gpu.destroy_dn_vec async [%{{.*}}] %{{.*}}
// CHECK: %{{.*}} = gpu.destroy_dn_tensor async [%{{.*}}] %{{.*}}
// CHECK: %{{.*}} = gpu.destroy_sparse_env async [%{{.*}}] %{{.*}}
// CHECK: gpu.wait [%{{.*}}]
// CHECK: return
@@ -22,11 +22,11 @@ module attributes {gpu.container_module} {
%mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf64>
%env, %token3 = gpu.create_sparse_env async [%token2]
%spmat, %token4 = gpu.create_coo async [%token3] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf64>
%dnvec, %token5 = gpu.create_dn_vec async [%token4] %env, %mem2, %arg0 : memref<?xf64>
%dnvec, %token5 = gpu.create_dn_tensor async [%token4] %env, %mem2, %arg0 : index into memref<?xf64>
%bufferSz, %token6 = gpu.spmv_buffer_size async [%token5] %env, %spmat, %dnvec, %dnvec into f64
%token7 = gpu.spmv async [%token6] %env, %spmat, %dnvec, %dnvec, %mem2 : memref<?xf64> into f64
%token8 = gpu.destroy_sp_mat async [%token7] %spmat
%token9 = gpu.destroy_dn_vec async [%token8] %dnvec
%token9 = gpu.destroy_dn_tensor async [%token8] %dnvec
%token10 = gpu.destroy_sparse_env async [%token9] %env
gpu.wait [%token10]
return
@@ -38,11 +38,11 @@ module attributes {gpu.container_module} {
// CHECK: %{{.*}}, %{{.*}} = gpu.alloc async [%{{.*}}] (%{{.*}}) : memref<?xf64>
// CHECK: %{{.*}}, %{{.*}} = gpu.create_sparse_env async [%{{.*}}]
// CHECK: %{{.*}}, %{{.*}} = gpu.create_csr async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<?xindex>, memref<?xindex>, memref<?xf64>
// CHECK: %{{.*}}, %{{.*}} = gpu.create_dn_mat async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<?xf64>
// CHECK: %{{.*}}, %{{.*}} = gpu.create_dn_tensor async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : index, index into memref<?xf64>
// CHECK: %{{.*}}, %{{.*}} = gpu.spmm_buffer_size async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} into f64
// CHECK: %{{.*}} = gpu.spmm async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<?xf64> into f64
// CHECK: %{{.*}} = gpu.destroy_sp_mat async [%{{.*}}] %{{.*}}
// CHECK: %{{.*}} = gpu.destroy_dn_mat async [%{{.*}}] %{{.*}}
// CHECK: %{{.*}} = gpu.destroy_dn_tensor async [%{{.*}}] %{{.*}}
// CHECK: %{{.*}} = gpu.destroy_sparse_env async [%{{.*}}] %{{.*}}
// CHECK: gpu.wait [%{{.*}}]
// CHECK: return
@@ -52,11 +52,11 @@ module attributes {gpu.container_module} {
%mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf64>
%env, %token3 = gpu.create_sparse_env async [%token2]
%spmat, %token4 = gpu.create_csr async [%token3] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf64>
%dnmat, %token5 = gpu.create_dn_mat async [%token4] %env, %arg0, %arg0, %mem2 : memref<?xf64>
%dnmat, %token5 = gpu.create_dn_tensor async [%token4] %env, %mem2, %arg0, %arg0 : index, index into memref<?xf64>
%bufferSz, %token6 = gpu.spmm_buffer_size async [%token5] %env, %spmat, %dnmat, %dnmat : index into f64
%token7 = gpu.spmm async [%token6] %env, %spmat, %dnmat, %dnmat, %mem2 : memref<?xf64> into f64
%token8 = gpu.destroy_sp_mat async [%token7] %spmat
%token9 = gpu.destroy_dn_mat async [%token8] %dnmat
%token9 = gpu.destroy_dn_tensor async [%token8] %dnmat
%token10 = gpu.destroy_sparse_env async [%token9] %env
gpu.wait [%token10]
return
@@ -68,11 +68,11 @@ module attributes {gpu.container_module} {
// CHECK: %{{.*}}, %{{.*}} = gpu.alloc async [%{{.*}}] (%{{.*}}) : memref<?xf64>
// CHECK: %{{.*}}, %{{.*}} = gpu.create_sparse_env async [%{{.*}}]
// CHECK: %{{.*}}, %{{.*}} = gpu.create_csr async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<?xindex>, memref<?xindex>, memref<?xf64>
// CHECK: %{{.*}}, %{{.*}} = gpu.create_dn_mat async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<?xf64>
// CHECK: %{{.*}}, %{{.*}} = gpu.create_dn_tensor async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : index, index into memref<?xf64>
// CHECK: %{{.*}}, %{{.*}} = gpu.sddmm_buffer_size async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} into f64
// CHECK: %{{.*}} = gpu.sddmm async [%{{.*}}] %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : memref<?xf64> into f64
// CHECK: %{{.*}} = gpu.destroy_sp_mat async [%{{.*}}] %{{.*}}
// CHECK: %{{.*}} = gpu.destroy_dn_mat async [%{{.*}}] %{{.*}}
// CHECK: %{{.*}} = gpu.destroy_dn_tensor async [%{{.*}}] %{{.*}}
// CHECK: %{{.*}} = gpu.destroy_sparse_env async [%{{.*}}] %{{.*}}
// CHECK: gpu.wait [%{{.*}}]
// CHECK: return
@@ -82,11 +82,11 @@ module attributes {gpu.container_module} {
%mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf64>
%env, %token3 = gpu.create_sparse_env async [%token2]
%spmat, %token4 = gpu.create_csr async [%token3] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf64>
%dnmat, %token5 = gpu.create_dn_mat async [%token4] %env, %arg0, %arg0, %mem2 : memref<?xf64>
%dnmat, %token5 = gpu.create_dn_tensor async [%token4] %env, %mem2, %arg0, %arg0 : index, index into memref<?xf64>
%bufferSz, %token6 = gpu.sddmm_buffer_size async [%token5] %env, %dnmat, %dnmat, %spmat into f64
%token7 = gpu.sddmm async [%token6] %env, %dnmat, %dnmat, %spmat, %mem2 : memref<?xf64> into f64
%token8 = gpu.destroy_sp_mat async [%token7] %spmat
%token9 = gpu.destroy_dn_mat async [%token8] %dnmat
%token9 = gpu.destroy_dn_tensor async [%token8] %dnmat
%token10 = gpu.destroy_sparse_env async [%token9] %env
gpu.wait [%token10]
return

View File

@@ -47,14 +47,14 @@
// CHECK: %[[VAL_41:.*]] = gpu.wait async
// CHECK: %[[VAL_42:.*]], %[[VAL_43:.*]] = gpu.create_sparse_env async {{\[}}%[[VAL_41]]]
// CHECK: %[[VAL_44:.*]], %[[VAL_45:.*]] = gpu.create_csr async {{\[}}%[[VAL_43]]] %[[VAL_6]], %[[VAL_7]], %[[VAL_5]], %[[VAL_14]], %[[VAL_19]], %[[VAL_24]] : memref<?xindex>, memref<?xindex>, memref<?xf64>
// CHECK: %[[VAL_46:.*]], %[[VAL_47:.*]] = gpu.create_dn_mat async {{\[}}%[[VAL_45]]] %[[VAL_42]], %[[VAL_7]], %[[VAL_8]], %[[VAL_31]] : memref<?x?xf64>
// CHECK: %[[VAL_48:.*]], %[[VAL_49:.*]] = gpu.create_dn_mat async {{\[}}%[[VAL_47]]] %[[VAL_42]], %[[VAL_6]], %[[VAL_8]], %[[VAL_38]] : memref<?x?xf64>
// CHECK: %[[VAL_46:.*]], %[[VAL_47:.*]] = gpu.create_dn_tensor async {{\[}}%[[VAL_45]]] %[[VAL_42]], %[[VAL_31]], %[[VAL_7]], %[[VAL_8]] : index, index into memref<?x?xf64>
// CHECK: %[[VAL_48:.*]], %[[VAL_49:.*]] = gpu.create_dn_tensor async {{\[}}%[[VAL_47]]] %[[VAL_42]], %[[VAL_38]], %[[VAL_6]], %[[VAL_8]] : index, index into memref<?x?xf64>
// CHECK: %[[VAL_50:.*]], %[[VAL_51:.*]] = gpu.spmm_buffer_size async {{\[}}%[[VAL_49]]] %[[VAL_42]], %[[VAL_44]], %[[VAL_46]], %[[VAL_48]] : index
// CHECK: %[[VAL_52:.*]], %[[VAL_53:.*]] = gpu.alloc async {{\[}}%[[VAL_51]]] (%[[VAL_50]]) : memref<?xi8>
// CHECK: %[[VAL_54:.*]] = gpu.spmm async {{\[}}%[[VAL_53]]] %[[VAL_42]], %[[VAL_44]], %[[VAL_46]], %[[VAL_48]], %[[VAL_52]] : memref<?xi8>
// CHECK: %[[VAL_55:.*]] = gpu.destroy_sp_mat async {{\[}}%[[VAL_54]]] %[[VAL_44]]
// CHECK: %[[VAL_56:.*]] = gpu.destroy_dn_mat async {{\[}}%[[VAL_55]]] %[[VAL_46]]
// CHECK: %[[VAL_57:.*]] = gpu.destroy_dn_mat async {{\[}}%[[VAL_56]]] %[[VAL_48]]
// CHECK: %[[VAL_56:.*]] = gpu.destroy_dn_tensor async {{\[}}%[[VAL_55]]] %[[VAL_46]]
// CHECK: %[[VAL_57:.*]] = gpu.destroy_dn_tensor async {{\[}}%[[VAL_56]]] %[[VAL_48]]
// CHECK: %[[VAL_58:.*]] = gpu.destroy_sparse_env async {{\[}}%[[VAL_57]]] %[[VAL_42]]
// CHECK: %[[VAL_59:.*]] = gpu.dealloc async {{\[}}%[[VAL_58]]] %[[VAL_14]] : memref<?xindex>
// CHECK: %[[VAL_60:.*]] = gpu.dealloc async {{\[}}%[[VAL_59]]] %[[VAL_19]] : memref<?xindex>

View File

@@ -45,14 +45,14 @@ module {
// CHECK: %[[VAL_38:.*]] = gpu.wait async
// CHECK: %[[VAL_39:.*]], %[[VAL_40:.*]] = gpu.create_sparse_env async {{\[}}%[[VAL_38]]]
// CHECK: %[[VAL_41:.*]], %[[VAL_42:.*]] = gpu.create_coo async {{\[}}%[[VAL_40]]] %[[VAL_6]], %[[VAL_7]], %[[VAL_5]], %[[VAL_13]], %[[VAL_18]], %[[VAL_23]] : memref<?xindex>, memref<?xindex>, memref<?xf64>
// CHECK: %[[VAL_43:.*]], %[[VAL_44:.*]] = gpu.create_dn_vec async {{\[}}%[[VAL_42]]] %[[VAL_39:.*]], %[[VAL_29]], %[[VAL_7]] : memref<?xf64>
// CHECK: %[[VAL_45:.*]], %[[VAL_46:.*]] = gpu.create_dn_vec async {{\[}}%[[VAL_44]]] %[[VAL_39:.*]], %[[VAL_35]], %[[VAL_6]] : memref<?xf64>
// CHECK: %[[VAL_43:.*]], %[[VAL_44:.*]] = gpu.create_dn_tensor async {{\[}}%[[VAL_42]]] %[[VAL_39:.*]], %[[VAL_29]], %[[VAL_7]] : index into memref<?xf64>
// CHECK: %[[VAL_45:.*]], %[[VAL_46:.*]] = gpu.create_dn_tensor async {{\[}}%[[VAL_44]]] %[[VAL_39:.*]], %[[VAL_35]], %[[VAL_6]] : index into memref<?xf64>
// CHECK: %[[VAL_47:.*]], %[[VAL_48:.*]] = gpu.spmv_buffer_size async {{\[}}%[[VAL_46]]] %[[VAL_39]], %[[VAL_41]], %[[VAL_43]], %[[VAL_45]]
// CHECK: %[[VAL_49:.*]], %[[VAL_50:.*]] = gpu.alloc async {{\[}}%[[VAL_48]]] (%[[VAL_47]]) : memref<?xi8>
// CHECK: %[[VAL_51:.*]] = gpu.spmv async {{\[}}%[[VAL_50]]] %[[VAL_39]], %[[VAL_41]], %[[VAL_43]], %[[VAL_45]], %[[VAL_49]] : memref<?xi8>
// CHECK: %[[VAL_52:.*]] = gpu.destroy_sp_mat async {{\[}}%[[VAL_51]]] %[[VAL_41]]
// CHECK: %[[VAL_53:.*]] = gpu.destroy_dn_vec async {{\[}}%[[VAL_52]]] %[[VAL_43]]
// CHECK: %[[VAL_54:.*]] = gpu.destroy_dn_vec async {{\[}}%[[VAL_53]]] %[[VAL_45]]
// CHECK: %[[VAL_53:.*]] = gpu.destroy_dn_tensor async {{\[}}%[[VAL_52]]] %[[VAL_43]]
// CHECK: %[[VAL_54:.*]] = gpu.destroy_dn_tensor async {{\[}}%[[VAL_53]]] %[[VAL_45]]
// CHECK: %[[VAL_55:.*]] = gpu.destroy_sparse_env async {{\[}}%[[VAL_54]]] %[[VAL_39]]
// CHECK: %[[VAL_56:.*]] = gpu.dealloc async {{\[}}%[[VAL_55]]] %[[VAL_13]] : memref<?xindex>
// CHECK: %[[VAL_57:.*]] = gpu.dealloc async {{\[}}%[[VAL_56]]] %[[VAL_18]] : memref<?xindex>