[mlir][sparse] introduce sparse_tensor::unpack operation

An inverse operation of sparse_tenosr::pack Reviewed By: aartbik Differential Revision: https://reviews.llvm.org/D143669
2026-01-19 01:15:50 +08:00 · 2023-02-09 19:08:36 +00:00
parent e74bb3471f
commit 6dbca86d83
5 changed files with 136 additions and 26 deletions
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h
@@ -56,7 +56,7 @@ SparseTensorEncodingAttr getSparseTensorEncoding(Type type);

 /// Returns true iff the given type is a type for a COO tensor with the last
 /// dimension level type being unique.
-bool isUniqueCOOType(RankedTensorType tp);
+bool isUniqueCOOType(TensorType tp);

 /// Returns the starting dimension for a trailing COO region that spans across
 /// at least two dimensions. If no such COO region is found, returns the rank
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td
@@ -59,8 +59,8 @@ def SparseTensor_NewOp : SparseTensor_Op<"new", [Pure]>,
 }

 def SparseTensor_PackOp : SparseTensor_Op<"pack", [Pure]>,
-    Arguments<(ins AnyRankedTensor:$data,
-                   AnyRankedTensor:$indices)>,
+    Arguments<(ins 1DTensorOf<[AnyType]>:$data,
+                   2DTensorOf<[AnySignlessIntegerOrIndex]>:$indices)>,
    Results<(outs AnySparseTensor: $result)> {
  let summary = "Returns a sparse tensor from the given (data, indices) pair";

@@ -99,6 +99,46 @@ def SparseTensor_PackOp : SparseTensor_Op<"pack", [Pure]>,
  let hasVerifier = 1;
 }

+def SparseTensor_UnpackOp : SparseTensor_Op<"unpack", [Pure]>,
+    Arguments<(ins AnySparseTensor:$tensor)>,
+    Results<(outs 1DTensorOf<[AnyType]>:$data,
+                  2DTensorOf<[AnySignlessIntegerOrIndex]>:$indices,
+                  AnySignlessIntegerOrIndex:$nnz)> {
+  let summary = "Returns the (data, indices) pair unpacked from the input tensor";
+
+  let description = [{
+    Unpack is the inverse operation of `sparse_tensor::pack`. It returns the data/indices
+    extracted from a COO sparse tensor. Additionally, it also returns an integer value
+    indicating the number of entries in the source tensor.
+
+    The operation can be used to return an unpacked MLIR sparse tensor to frontend.
+    E.g., returning two numpy arrays for data and indices.
+
+    The unpack operation ends the life time of the sparse tensor, and using this
+    after the unpack is undefined behavior.
+
+    Example:
+    ```mlir
+    // input COO format |1.1, 0.0, 0.0, 0.0|
+    //    of 3x4 matrix |0.0, 0.0, 2.2, 3.3|
+    //                  |0.0, 0.0, 0.0, 0.0|
+    %data, %indices, %nnz = sparse_tensor.unpack %st
+                          : tensor<3x4xf64, #COO>
+                         to tensor<2xf64>, tensor<2x2xindex>, index
+
+    // %data    = arith.constant dense<[ 1.1,   2.2,   3.3 ]> : tensor<3xf64>
+    // %indices = arith.constant dense<[[0,0], [1,2], [1,3]]> : tensor<3x2xindex>
+    // %nnz = 2
+
+    ```
+  }];
+
+  let assemblyFormat = "$tensor attr-dict `:` type($tensor) "
+                                        "`to` type($data) `,` type($indices)`,` type($nnz)";
+
+  let hasVerifier = 1;
+}
+
 def SparseTensor_ConvertOp : SparseTensor_Op<"convert",
  [Pure, SameOperandsAndResultElementType]>,
    Arguments<(ins AnyTensor:$source)>,
--- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
+++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
@@ -424,7 +424,7 @@ static bool isCOOType(SparseTensorEncodingAttr enc, uint64_t s, bool isUnique) {
  return !isUnique || isUniqueDLT(getDimLevelType(enc, rank - 1));
 }

-bool mlir::sparse_tensor::isUniqueCOOType(RankedTensorType tp) {
+bool mlir::sparse_tensor::isUniqueCOOType(TensorType tp) {
  SparseTensorEncodingAttr enc = getSparseTensorEncoding(tp);
  return enc && isCOOType(enc, 0, /*isUnique=*/true);
 }
@@ -617,42 +617,49 @@ LogicalResult NewOp::verify() {
  return success();
 }

-LogicalResult PackOp::verify() {
-  TensorType dataTp = getData().getType(), idxTp = getIndices().getType();
-  TensorType retTp = getResult().getType();
+static LogicalResult verifyPackUnPack(Operation *op, TensorType cooTp,
+                                      TensorType dataTp, TensorType idxTp) {
+  if (!isUniqueCOOType(cooTp))
+    return op->emitError("must operate on a COO tensor");

-  if (!isUniqueCOOType(retTp.cast<RankedTensorType>()))
-    return emitError("must be packed into a COO tensor");
-
-  if (!retTp.hasStaticShape() || !dataTp.hasStaticShape() ||
-      !idxTp.hasStaticShape())
-    return emitError("all input types must be statically shaped");
-
-  if (dataTp.getRank() != 1 || idxTp.getRank() != 2) {
-    return emitError(
-        "requires rank 1 tensor for value and rank 2 tensor for indices");
-  }
-
-  auto enc = getSparseTensorEncoding(retTp);
+  auto enc = getSparseTensorEncoding(cooTp);
  if (idxTp.getElementType() != enc.getIndexType() ||
-      dataTp.getElementType() != retTp.getElementType())
-    return emitError("unmatched type between input and output");
+      dataTp.getElementType() != cooTp.getElementType())
+    return op->emitError("unmatched type between input and output");

  auto dNOE = dataTp.getShape()[0];
  auto iNOE = idxTp.getShape()[0];
  if (!ShapedType::isDynamic(dNOE) && !ShapedType::isDynamic(iNOE) &&
      dNOE != iNOE)
-    return emitError("unmatched number of elements in data and indices");
+    return op->emitError("unmatched number of elements in data and indices");

  // A tensor<?xNxi32> for indices means the input COO is rank N
  auto inRank = idxTp.getShape()[1];
-  auto ouRank = retTp.getRank();
+  auto ouRank = cooTp.getRank();
  if (!ShapedType::isDynamic(inRank) && inRank != ouRank)
-    return emitError("unmatched rank between input and output");
+    return op->emitError("unmatched rank between input and output");

  return success();
 }

+LogicalResult PackOp::verify() {
+  TensorType dataTp = getData().getType(), idxTp = getIndices().getType();
+  TensorType retTp = getResult().getType();
+
+  if (!retTp.hasStaticShape() || !dataTp.hasStaticShape() ||
+      !idxTp.hasStaticShape())
+    return emitError("all input types must be statically shaped");
+
+  return verifyPackUnPack(*this, retTp, dataTp, idxTp);
+}
+
+LogicalResult UnpackOp::verify() {
+  TensorType dataTp = getData().getType(), idxTp = getIndices().getType();
+  TensorType srcTp = getTensor().getType();
+
+  return verifyPackUnPack(*this, srcTp, dataTp, idxTp);
+}
+
 LogicalResult ConvertOp::verify() {
  if (auto tp1 = getSource().getType().dyn_cast<RankedTensorType>()) {
    if (auto tp2 = getDest().getType().dyn_cast<RankedTensorType>()) {
--- a/mlir/test/Dialect/SparseTensor/invalid.mlir
+++ b/mlir/test/Dialect/SparseTensor/invalid.mlir
@@ -20,11 +20,23 @@ func.func @non_static_pack_ret(%data: tensor<6xf64>, %index: tensor<6x1xi32>)

 // -----

+#DenseVector = #sparse_tensor.encoding<{dimLevelType = ["dense"], indexBitWidth=32}>
+
+func.func @invalid_pack_dense(%data: tensor<6xf64>, %index: tensor<6x1xi32>)
+                            -> tensor<100xf64, #DenseVector> {
+  // expected-error@+1 {{must operate on a COO tensor}}
+  %0 = sparse_tensor.pack %data, %index : tensor<6xf64>, tensor<6x1xi32>
+                                       to tensor<100xf64, #DenseVector>
+  return %0 : tensor<100xf64, #DenseVector>
+}
+
+// -----
+
 #SparseVector = #sparse_tensor.encoding<{dimLevelType = ["compressed"], indexBitWidth=32}>

 func.func @invalid_pack_data(%data: tensor<6x1xf64>, %index: tensor<6x1xi32>)
                            -> tensor<100xf64, #SparseVector> {
-  // expected-error@+1 {{requires rank 1 tensor for value and rank 2 tensor for indices}}
+  // expected-error@+1 {{'sparse_tensor.pack' op operand #0 must be 1D tensor of any type values}}
  %0 = sparse_tensor.pack %data, %index : tensor<6x1xf64>, tensor<6x1xi32>
                                       to tensor<100xf64, #SparseVector>
  return %0 : tensor<100xf64, #SparseVector>
@@ -68,6 +80,42 @@ func.func @invalid_pack_type(%data: tensor<6xf64>, %index: tensor<6x2xi32>)

 // -----

+#SparseVector = #sparse_tensor.encoding<{dimLevelType = ["compressed"], indexBitWidth=32}>
+
+func.func @invalid_unpack_type(%sp: tensor<100xf32, #SparseVector>)
+                            -> (tensor<6xf64>, tensor<6x1xi32>, i32) {
+  // expected-error@+1 {{unmatched type between input and output}}
+  %d, %i, %n = sparse_tensor.unpack %sp :  tensor<100xf32, #SparseVector>
+                                        to tensor<6xf64>, tensor<6x1xi32>, i32
+  return %d, %i, %n : tensor<6xf64>, tensor<6x1xi32>, i32
+}
+
+// -----
+
+#SparseVector = #sparse_tensor.encoding<{dimLevelType = ["compressed"], indexBitWidth=32}>
+
+func.func @invalid_unpack_type(%sp: tensor<100xf32, #SparseVector>)
+                            -> (tensor<5xf32>, tensor<6x1xi32>, i32) {
+  // expected-error@+1 {{unmatched number of elements in data and indices}}
+  %d, %i, %n = sparse_tensor.unpack %sp :  tensor<100xf32, #SparseVector>
+                                        to tensor<5xf32>, tensor<6x1xi32>, i32
+  return %d, %i, %n : tensor<5xf32>, tensor<6x1xi32>, i32
+}
+
+// -----
+
+#SparseVector = #sparse_tensor.encoding<{dimLevelType = ["compressed"], indexBitWidth=32}>
+
+func.func @invalid_unpack_type(%sp: tensor<100xf32, #SparseVector>)
+                            -> (tensor<6xf32>, tensor<6x2xi32>, i32) {
+  // expected-error@+1 {{unmatched rank between input and output}}
+  %d, %i, %n = sparse_tensor.unpack %sp :  tensor<100xf32, #SparseVector>
+                                        to tensor<6xf32>, tensor<6x2xi32>, i32
+  return %d, %i, %n : tensor<6xf32>, tensor<6x2xi32>, i32
+}
+
+// -----
+
 func.func @invalid_pointers_dense(%arg0: tensor<128xf64>) -> memref<?xindex> {
  // expected-error@+1 {{'sparse_tensor.pointers' op operand #0 must be sparse tensor of any type values, but got 'tensor<128xf64>'}}
  %0 = sparse_tensor.pointers %arg0 { dimension = 0 : index } : tensor<128xf64> to memref<?xindex>
--- a/mlir/test/Dialect/SparseTensor/roundtrip.mlir
+++ b/mlir/test/Dialect/SparseTensor/roundtrip.mlir
@@ -29,6 +29,21 @@ func.func @sparse_pack(%data: tensor<6xf64>, %index: tensor<6x1xi32>)

 // -----

+#SparseVector = #sparse_tensor.encoding<{dimLevelType = ["compressed"], indexBitWidth=32}>
+
+// CHECK-LABEL: func @sparse_unpack(
+//  CHECK-SAME: %[[T:.*]]: tensor<100xf64, #
+//       CHECK: %[[D:.*]], %[[I:.*]], %[[N:.*]] = sparse_tensor.unpack %[[T]]
+//       CHECK: return %[[D]], %[[I]], %[[N]]
+func.func @sparse_unpack(%sp : tensor<100xf64, #SparseVector>)
+                       -> (tensor<6xf64>, tensor<6x1xi32>, i32) {
+  %data, %indices, %nnz = sparse_tensor.unpack %sp : tensor<100xf64, #SparseVector>
+                                                  to tensor<6xf64>, tensor<6x1xi32>, i32
+  return %data, %indices, %nnz : tensor<6xf64>, tensor<6x1xi32>, i32
+}
+
+// -----
+
 #SparseMatrix = #sparse_tensor.encoding<{dimLevelType = ["compressed", "compressed"]}>

 // CHECK-LABEL: func @sparse_new_symmetry(