2021-04-29 14:31:18 -07:00
|
|
|
//===- SparseTensorDialect.cpp - Sparse tensor dialect implementation -----===//
|
|
|
|
|
//
|
|
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
|
|
|
//
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
2022-11-03 20:33:56 +00:00
|
|
|
#include <utility>
|
|
|
|
|
|
2021-04-29 14:31:18 -07:00
|
|
|
#include "mlir/Dialect/SparseTensor/IR/SparseTensor.h"
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
#include "mlir/Dialect/SparseTensor/IR/SparseTensorType.h"
|
2022-02-11 21:18:20 -08:00
|
|
|
|
2022-09-29 11:14:47 -04:00
|
|
|
#include "mlir/Dialect/Arith/IR/Arith.h"
|
2021-04-29 14:31:18 -07:00
|
|
|
#include "mlir/IR/Builders.h"
|
2021-04-30 18:07:28 -07:00
|
|
|
#include "mlir/IR/DialectImplementation.h"
|
2022-01-24 11:41:00 -08:00
|
|
|
#include "mlir/IR/Matchers.h"
|
2021-04-29 14:31:18 -07:00
|
|
|
#include "mlir/IR/OpImplementation.h"
|
2023-02-02 23:34:28 +00:00
|
|
|
#include "mlir/IR/PatternMatch.h"
|
2021-04-30 18:07:28 -07:00
|
|
|
#include "llvm/ADT/TypeSwitch.h"
|
2022-08-03 19:23:42 +00:00
|
|
|
#include "llvm/Support/FormatVariadic.h"
|
2021-04-29 14:31:18 -07:00
|
|
|
|
2022-12-13 19:49:43 +00:00
|
|
|
#define GET_ATTRDEF_CLASSES
|
|
|
|
|
#include "mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.cpp.inc"
|
|
|
|
|
#include "mlir/Dialect/SparseTensor/IR/SparseTensorAttrEnums.cpp.inc"
|
|
|
|
|
|
|
|
|
|
#define GET_TYPEDEF_CLASSES
|
|
|
|
|
#include "mlir/Dialect/SparseTensor/IR/SparseTensorTypes.cpp.inc"
|
|
|
|
|
|
2021-04-29 14:31:18 -07:00
|
|
|
using namespace mlir;
|
|
|
|
|
using namespace mlir::sparse_tensor;
|
|
|
|
|
|
2023-01-24 13:23:52 -08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
// Additional convenience methods.
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
/// Gets the dimension-rank of the type of some `T`. (In particular
|
|
|
|
|
/// this is only used for `Value` and `TypedValue<RankedTensorType>`.)
|
2023-01-24 13:23:52 -08:00
|
|
|
template <typename T>
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
static inline Dimension getDimRank(T t) {
|
2023-01-24 13:23:52 -08:00
|
|
|
return getRankedTensorType(t).getRank();
|
|
|
|
|
}
|
|
|
|
|
|
2021-04-30 18:07:28 -07:00
|
|
|
//===----------------------------------------------------------------------===//
|
2021-05-10 10:34:21 -07:00
|
|
|
// TensorDialect Attribute Methods.
|
2021-04-30 18:07:28 -07:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
|
|
static bool acceptBitWidth(unsigned bitWidth) {
|
|
|
|
|
switch (bitWidth) {
|
|
|
|
|
case 0:
|
|
|
|
|
case 8:
|
|
|
|
|
case 16:
|
|
|
|
|
case 32:
|
|
|
|
|
case 64:
|
|
|
|
|
return true;
|
|
|
|
|
default:
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-12-27 22:10:37 +00:00
|
|
|
void SparseTensorDimSliceAttr::print(AsmPrinter &printer) const {
|
|
|
|
|
printer << "(";
|
|
|
|
|
printer << (getStaticOffset() ? std::to_string(*getStaticOffset()) : "?");
|
|
|
|
|
printer << ", ";
|
|
|
|
|
printer << (getStaticSize() ? std::to_string(*getStaticSize()) : "?");
|
|
|
|
|
printer << ", ";
|
|
|
|
|
printer << (getStaticStride() ? std::to_string(*getStaticStride()) : "?");
|
|
|
|
|
printer << ")";
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static ParseResult parseOptionalStaticSlice(int64_t &result,
|
|
|
|
|
AsmParser &parser) {
|
|
|
|
|
auto parseResult = parser.parseOptionalInteger(result);
|
|
|
|
|
if (parseResult.has_value()) {
|
|
|
|
|
if (parseResult.value().succeeded() && result < 0) {
|
|
|
|
|
parser.emitError(
|
|
|
|
|
parser.getCurrentLocation(),
|
|
|
|
|
"expect positive value or ? for slice offset/size/stride");
|
|
|
|
|
return failure();
|
|
|
|
|
}
|
|
|
|
|
return parseResult.value();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Else, and '?' which represented dynamic slice
|
|
|
|
|
result = SparseTensorDimSliceAttr::kDynamic;
|
|
|
|
|
return parser.parseQuestion();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Attribute SparseTensorDimSliceAttr::parse(AsmParser &parser, Type type) {
|
|
|
|
|
int64_t offset = -1, size = -1, stride = -1;
|
|
|
|
|
|
|
|
|
|
if (failed(parser.parseLParen()) ||
|
|
|
|
|
failed(parseOptionalStaticSlice(offset, parser)) ||
|
|
|
|
|
failed(parser.parseComma()) ||
|
|
|
|
|
failed(parseOptionalStaticSlice(size, parser)) ||
|
|
|
|
|
failed(parser.parseComma()) ||
|
|
|
|
|
failed(parseOptionalStaticSlice(stride, parser)) ||
|
|
|
|
|
failed(parser.parseRParen()))
|
|
|
|
|
return {};
|
|
|
|
|
|
|
|
|
|
return parser.getChecked<SparseTensorDimSliceAttr>(parser.getContext(),
|
|
|
|
|
offset, size, stride);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
LogicalResult
|
|
|
|
|
SparseTensorDimSliceAttr::verify(function_ref<InFlightDiagnostic()> emitError,
|
|
|
|
|
int64_t offset, int64_t size, int64_t stride) {
|
|
|
|
|
if ((offset == SparseTensorDimSliceAttr::kDynamic || offset >= 0) &&
|
|
|
|
|
(size == SparseTensorDimSliceAttr::kDynamic || size > 0) &&
|
|
|
|
|
(stride == SparseTensorDimSliceAttr::kDynamic || stride > 0)) {
|
|
|
|
|
return success();
|
|
|
|
|
}
|
|
|
|
|
return emitError()
|
|
|
|
|
<< "expect positive value or ? for slice offset/size/stride";
|
|
|
|
|
}
|
|
|
|
|
|
2023-02-15 13:31:05 -08:00
|
|
|
Type mlir::sparse_tensor::detail::getIntegerOrIndexType(MLIRContext *ctx,
|
|
|
|
|
unsigned bitwidth) {
|
2023-01-18 18:22:48 -08:00
|
|
|
if (bitwidth)
|
|
|
|
|
return IntegerType::get(ctx, bitwidth);
|
|
|
|
|
return IndexType::get(ctx);
|
|
|
|
|
}
|
|
|
|
|
|
[mlir][sparse] Renaming "pointer/index" to "position/coordinate"
The old "pointer/index" names often cause confusion since these names clash with names of unrelated things in MLIR; so this change rectifies this by changing everything to use "position/coordinate" terminology instead.
In addition to the basic terminology, there have also been various conventions for making certain distinctions like: (1) the overall storage for coordinates in the sparse-tensor, vs the particular collection of coordinates of a given element; and (2) particular coordinates given as a `Value` or `TypedValue<MemRefType>`, vs particular coordinates given as `ValueRange` or similar. I have striven to maintain these distinctions
as follows:
* "p/c" are used for individual position/coordinate values, when there is no risk of confusion. (Just like we use "d/l" to abbreviate "dim/lvl".)
* "pos/crd" are used for individual position/coordinate values, when a longer name is helpful to avoid ambiguity or to form compound names (e.g., "parentPos"). (Just like we use "dim/lvl" when we need a longer form of "d/l".)
I have also used these forms for a handful of compound names where the old name had been using a three-letter form previously, even though a longer form would be more appropriate. I've avoided renaming these to use a longer form purely for expediency sake, since changing them would require a cascade of other renamings. They should be updated to follow the new naming scheme, but that can be done in future patches.
* "coords" is used for the complete collection of crd values associated with a single element. In the runtime library this includes both `std::vector` and raw pointer representations. In the compiler, this is used specifically for buffer variables with C++ type `Value`, `TypedValue<MemRefType>`, etc.
The bare form "coords" is discouraged, since it fails to make the dim/lvl distinction; so the compound names "dimCoords/lvlCoords" should be used instead. (Though there may exist a rare few cases where is is appropriate to be intentionally ambiguous about what coordinate-space the coords live in; in which case the bare "coords" is appropriate.)
There is seldom the need for the pos variant of this notion. In most circumstances we use the term "cursor", since the same buffer is reused for a 'moving' pos-collection.
* "dcvs/lcvs" is used in the compiler as the `ValueRange` analogue of "dimCoords/lvlCoords". (The "vs" stands for "`Value`s".) I haven't found the need for it, but "pvs" would be the obvious name for a pos-`ValueRange`.
The old "ind"-vs-"ivs" naming scheme does not seem to have been sustained in more recent code, which instead prefers other mnemonics (e.g., adding "Buf" to the end of the names for `TypeValue<MemRefType>`). I have cleaned up a lot of these to follow the "coords"-vs-"cvs" naming scheme, though haven't done an exhaustive cleanup.
* "positions/coordinates" are used for larger collections of pos/crd values; in particular, these are used when referring to the complete sparse-tensor storage components.
I also prefer to use these unabbreviated names in the documentation, unless there is some specific reason why using the abbreviated forms helps resolve ambiguity.
In addition to making this terminology change, this change also does some cleanup along the way:
* correcting the dim/lvl terminology in certain places.
* adding `const` when it requires no other code changes.
* miscellaneous cleanup that was entailed in order to make the proper distinctions. Most of these are in CodegenUtils.{h,cpp}
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D144773
2023-03-06 12:19:41 -08:00
|
|
|
Type SparseTensorEncodingAttr::getPosType() const {
|
|
|
|
|
return detail::getIntegerOrIndexType(getContext(), getPosWidth());
|
2022-12-01 21:23:52 +00:00
|
|
|
}
|
|
|
|
|
|
[mlir][sparse] Renaming "pointer/index" to "position/coordinate"
The old "pointer/index" names often cause confusion since these names clash with names of unrelated things in MLIR; so this change rectifies this by changing everything to use "position/coordinate" terminology instead.
In addition to the basic terminology, there have also been various conventions for making certain distinctions like: (1) the overall storage for coordinates in the sparse-tensor, vs the particular collection of coordinates of a given element; and (2) particular coordinates given as a `Value` or `TypedValue<MemRefType>`, vs particular coordinates given as `ValueRange` or similar. I have striven to maintain these distinctions
as follows:
* "p/c" are used for individual position/coordinate values, when there is no risk of confusion. (Just like we use "d/l" to abbreviate "dim/lvl".)
* "pos/crd" are used for individual position/coordinate values, when a longer name is helpful to avoid ambiguity or to form compound names (e.g., "parentPos"). (Just like we use "dim/lvl" when we need a longer form of "d/l".)
I have also used these forms for a handful of compound names where the old name had been using a three-letter form previously, even though a longer form would be more appropriate. I've avoided renaming these to use a longer form purely for expediency sake, since changing them would require a cascade of other renamings. They should be updated to follow the new naming scheme, but that can be done in future patches.
* "coords" is used for the complete collection of crd values associated with a single element. In the runtime library this includes both `std::vector` and raw pointer representations. In the compiler, this is used specifically for buffer variables with C++ type `Value`, `TypedValue<MemRefType>`, etc.
The bare form "coords" is discouraged, since it fails to make the dim/lvl distinction; so the compound names "dimCoords/lvlCoords" should be used instead. (Though there may exist a rare few cases where is is appropriate to be intentionally ambiguous about what coordinate-space the coords live in; in which case the bare "coords" is appropriate.)
There is seldom the need for the pos variant of this notion. In most circumstances we use the term "cursor", since the same buffer is reused for a 'moving' pos-collection.
* "dcvs/lcvs" is used in the compiler as the `ValueRange` analogue of "dimCoords/lvlCoords". (The "vs" stands for "`Value`s".) I haven't found the need for it, but "pvs" would be the obvious name for a pos-`ValueRange`.
The old "ind"-vs-"ivs" naming scheme does not seem to have been sustained in more recent code, which instead prefers other mnemonics (e.g., adding "Buf" to the end of the names for `TypeValue<MemRefType>`). I have cleaned up a lot of these to follow the "coords"-vs-"cvs" naming scheme, though haven't done an exhaustive cleanup.
* "positions/coordinates" are used for larger collections of pos/crd values; in particular, these are used when referring to the complete sparse-tensor storage components.
I also prefer to use these unabbreviated names in the documentation, unless there is some specific reason why using the abbreviated forms helps resolve ambiguity.
In addition to making this terminology change, this change also does some cleanup along the way:
* correcting the dim/lvl terminology in certain places.
* adding `const` when it requires no other code changes.
* miscellaneous cleanup that was entailed in order to make the proper distinctions. Most of these are in CodegenUtils.{h,cpp}
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D144773
2023-03-06 12:19:41 -08:00
|
|
|
Type SparseTensorEncodingAttr::getCrdType() const {
|
|
|
|
|
return detail::getIntegerOrIndexType(getContext(), getCrdWidth());
|
2022-12-01 21:23:52 +00:00
|
|
|
}
|
|
|
|
|
|
2022-12-15 14:32:58 -08:00
|
|
|
SparseTensorEncodingAttr SparseTensorEncodingAttr::withoutOrdering() const {
|
[mlir][sparse] Renaming "pointer/index" to "position/coordinate"
The old "pointer/index" names often cause confusion since these names clash with names of unrelated things in MLIR; so this change rectifies this by changing everything to use "position/coordinate" terminology instead.
In addition to the basic terminology, there have also been various conventions for making certain distinctions like: (1) the overall storage for coordinates in the sparse-tensor, vs the particular collection of coordinates of a given element; and (2) particular coordinates given as a `Value` or `TypedValue<MemRefType>`, vs particular coordinates given as `ValueRange` or similar. I have striven to maintain these distinctions
as follows:
* "p/c" are used for individual position/coordinate values, when there is no risk of confusion. (Just like we use "d/l" to abbreviate "dim/lvl".)
* "pos/crd" are used for individual position/coordinate values, when a longer name is helpful to avoid ambiguity or to form compound names (e.g., "parentPos"). (Just like we use "dim/lvl" when we need a longer form of "d/l".)
I have also used these forms for a handful of compound names where the old name had been using a three-letter form previously, even though a longer form would be more appropriate. I've avoided renaming these to use a longer form purely for expediency sake, since changing them would require a cascade of other renamings. They should be updated to follow the new naming scheme, but that can be done in future patches.
* "coords" is used for the complete collection of crd values associated with a single element. In the runtime library this includes both `std::vector` and raw pointer representations. In the compiler, this is used specifically for buffer variables with C++ type `Value`, `TypedValue<MemRefType>`, etc.
The bare form "coords" is discouraged, since it fails to make the dim/lvl distinction; so the compound names "dimCoords/lvlCoords" should be used instead. (Though there may exist a rare few cases where is is appropriate to be intentionally ambiguous about what coordinate-space the coords live in; in which case the bare "coords" is appropriate.)
There is seldom the need for the pos variant of this notion. In most circumstances we use the term "cursor", since the same buffer is reused for a 'moving' pos-collection.
* "dcvs/lcvs" is used in the compiler as the `ValueRange` analogue of "dimCoords/lvlCoords". (The "vs" stands for "`Value`s".) I haven't found the need for it, but "pvs" would be the obvious name for a pos-`ValueRange`.
The old "ind"-vs-"ivs" naming scheme does not seem to have been sustained in more recent code, which instead prefers other mnemonics (e.g., adding "Buf" to the end of the names for `TypeValue<MemRefType>`). I have cleaned up a lot of these to follow the "coords"-vs-"cvs" naming scheme, though haven't done an exhaustive cleanup.
* "positions/coordinates" are used for larger collections of pos/crd values; in particular, these are used when referring to the complete sparse-tensor storage components.
I also prefer to use these unabbreviated names in the documentation, unless there is some specific reason why using the abbreviated forms helps resolve ambiguity.
In addition to making this terminology change, this change also does some cleanup along the way:
* correcting the dim/lvl terminology in certain places.
* adding `const` when it requires no other code changes.
* miscellaneous cleanup that was entailed in order to make the proper distinctions. Most of these are in CodegenUtils.{h,cpp}
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D144773
2023-03-06 12:19:41 -08:00
|
|
|
return SparseTensorEncodingAttr::get(getContext(), getDimLevelType(),
|
|
|
|
|
AffineMap(), AffineMap(), getPosWidth(),
|
|
|
|
|
getCrdWidth());
|
2022-12-15 14:32:58 -08:00
|
|
|
}
|
|
|
|
|
|
2023-02-22 19:04:02 +00:00
|
|
|
SparseTensorEncodingAttr SparseTensorEncodingAttr::withoutBitWidths() const {
|
|
|
|
|
return SparseTensorEncodingAttr::get(getContext(), getDimLevelType(),
|
|
|
|
|
getDimOrdering(), getHigherOrdering(), 0,
|
|
|
|
|
0);
|
|
|
|
|
}
|
|
|
|
|
|
2022-12-16 18:12:05 +00:00
|
|
|
bool SparseTensorEncodingAttr::isAllDense() const {
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
return !getImpl() || llvm::all_of(getDimLevelType(), isDenseDLT);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool SparseTensorEncodingAttr::isAllOrdered() const {
|
|
|
|
|
return !getImpl() || llvm::all_of(getDimLevelType(), isOrderedDLT);
|
2022-12-16 18:12:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool SparseTensorEncodingAttr::hasIdDimOrdering() const {
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
return !getImpl() || !getDimOrdering() || getDimOrdering().isIdentity();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Level SparseTensorEncodingAttr::getLvlRank() const {
|
|
|
|
|
assert(getImpl() && "Uninitialized SparseTensorEncodingAttr");
|
|
|
|
|
return getDimLevelType().size();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
DimLevelType SparseTensorEncodingAttr::getLvlType(Level l) const {
|
|
|
|
|
if (!getImpl())
|
|
|
|
|
return DimLevelType::Dense;
|
|
|
|
|
assert(l < getLvlRank() && "Level is out of bounds");
|
|
|
|
|
return getDimLevelType()[l];
|
2022-12-16 18:12:05 +00:00
|
|
|
}
|
|
|
|
|
|
2022-12-27 22:10:37 +00:00
|
|
|
std::optional<uint64_t>
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
SparseTensorEncodingAttr::getStaticDimSliceOffset(Dimension dim) const {
|
2022-12-27 22:10:37 +00:00
|
|
|
return getDimSlices()[dim].getStaticOffset();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::optional<uint64_t>
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
SparseTensorEncodingAttr::getStaticDimSliceSize(Dimension dim) const {
|
2022-12-27 22:10:37 +00:00
|
|
|
return getDimSlices()[dim].getStaticSize();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::optional<uint64_t>
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
SparseTensorEncodingAttr::getStaticDimSliceStride(Dimension dim) const {
|
2022-12-27 22:10:37 +00:00
|
|
|
return getDimSlices()[dim].getStaticStride();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::optional<uint64_t>
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
SparseTensorEncodingAttr::getStaticLvlSliceOffset(Level lvl) const {
|
|
|
|
|
// FIXME: `toOrigDim` is deprecated.
|
2022-12-27 22:10:37 +00:00
|
|
|
return getStaticDimSliceOffset(toOrigDim(*this, lvl));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::optional<uint64_t>
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
SparseTensorEncodingAttr::getStaticLvlSliceSize(Level lvl) const {
|
|
|
|
|
// FIXME: `toOrigDim` is deprecated.
|
2022-12-27 22:10:37 +00:00
|
|
|
return getStaticDimSliceSize(toOrigDim(*this, lvl));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::optional<uint64_t>
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
SparseTensorEncodingAttr::getStaticLvlSliceStride(Level lvl) const {
|
|
|
|
|
// FIXME: `toOrigDim` is deprecated.
|
2022-12-27 22:10:37 +00:00
|
|
|
return getStaticDimSliceStride(toOrigDim(*this, lvl));
|
|
|
|
|
}
|
|
|
|
|
|
2023-01-18 18:22:48 -08:00
|
|
|
const static DimLevelType validDLTs[] = {
|
|
|
|
|
DimLevelType::Dense, DimLevelType::Compressed,
|
|
|
|
|
DimLevelType::CompressedNu, DimLevelType::CompressedNo,
|
|
|
|
|
DimLevelType::CompressedNuNo, DimLevelType::Singleton,
|
|
|
|
|
DimLevelType::SingletonNu, DimLevelType::SingletonNo,
|
|
|
|
|
DimLevelType::SingletonNuNo};
|
|
|
|
|
|
|
|
|
|
static std::optional<DimLevelType> parseDLT(StringRef str) {
|
|
|
|
|
for (DimLevelType dlt : validDLTs)
|
|
|
|
|
if (str == toMLIRString(dlt))
|
|
|
|
|
return dlt;
|
|
|
|
|
return std::nullopt;
|
|
|
|
|
}
|
|
|
|
|
|
2021-11-11 06:12:06 +00:00
|
|
|
Attribute SparseTensorEncodingAttr::parse(AsmParser &parser, Type type) {
|
2022-12-27 22:10:37 +00:00
|
|
|
#define RETURN_ON_FAIL(stmt) \
|
|
|
|
|
if (failed(stmt)) { \
|
|
|
|
|
return {}; \
|
|
|
|
|
}
|
2023-01-18 18:22:48 -08:00
|
|
|
#define ERROR_IF(COND, MSG) \
|
|
|
|
|
if (COND) { \
|
|
|
|
|
parser.emitError(parser.getNameLoc(), MSG); \
|
|
|
|
|
return {}; \
|
|
|
|
|
}
|
2022-12-27 22:10:37 +00:00
|
|
|
|
|
|
|
|
RETURN_ON_FAIL(parser.parseLess())
|
|
|
|
|
RETURN_ON_FAIL(parser.parseLBrace())
|
|
|
|
|
|
2021-04-30 18:07:28 -07:00
|
|
|
// Process the data from the parsed dictionary value into struct-like data.
|
[mlir][sparse] Renaming "pointer/index" to "position/coordinate"
The old "pointer/index" names often cause confusion since these names clash with names of unrelated things in MLIR; so this change rectifies this by changing everything to use "position/coordinate" terminology instead.
In addition to the basic terminology, there have also been various conventions for making certain distinctions like: (1) the overall storage for coordinates in the sparse-tensor, vs the particular collection of coordinates of a given element; and (2) particular coordinates given as a `Value` or `TypedValue<MemRefType>`, vs particular coordinates given as `ValueRange` or similar. I have striven to maintain these distinctions
as follows:
* "p/c" are used for individual position/coordinate values, when there is no risk of confusion. (Just like we use "d/l" to abbreviate "dim/lvl".)
* "pos/crd" are used for individual position/coordinate values, when a longer name is helpful to avoid ambiguity or to form compound names (e.g., "parentPos"). (Just like we use "dim/lvl" when we need a longer form of "d/l".)
I have also used these forms for a handful of compound names where the old name had been using a three-letter form previously, even though a longer form would be more appropriate. I've avoided renaming these to use a longer form purely for expediency sake, since changing them would require a cascade of other renamings. They should be updated to follow the new naming scheme, but that can be done in future patches.
* "coords" is used for the complete collection of crd values associated with a single element. In the runtime library this includes both `std::vector` and raw pointer representations. In the compiler, this is used specifically for buffer variables with C++ type `Value`, `TypedValue<MemRefType>`, etc.
The bare form "coords" is discouraged, since it fails to make the dim/lvl distinction; so the compound names "dimCoords/lvlCoords" should be used instead. (Though there may exist a rare few cases where is is appropriate to be intentionally ambiguous about what coordinate-space the coords live in; in which case the bare "coords" is appropriate.)
There is seldom the need for the pos variant of this notion. In most circumstances we use the term "cursor", since the same buffer is reused for a 'moving' pos-collection.
* "dcvs/lcvs" is used in the compiler as the `ValueRange` analogue of "dimCoords/lvlCoords". (The "vs" stands for "`Value`s".) I haven't found the need for it, but "pvs" would be the obvious name for a pos-`ValueRange`.
The old "ind"-vs-"ivs" naming scheme does not seem to have been sustained in more recent code, which instead prefers other mnemonics (e.g., adding "Buf" to the end of the names for `TypeValue<MemRefType>`). I have cleaned up a lot of these to follow the "coords"-vs-"cvs" naming scheme, though haven't done an exhaustive cleanup.
* "positions/coordinates" are used for larger collections of pos/crd values; in particular, these are used when referring to the complete sparse-tensor storage components.
I also prefer to use these unabbreviated names in the documentation, unless there is some specific reason why using the abbreviated forms helps resolve ambiguity.
In addition to making this terminology change, this change also does some cleanup along the way:
* correcting the dim/lvl terminology in certain places.
* adding `const` when it requires no other code changes.
* miscellaneous cleanup that was entailed in order to make the proper distinctions. Most of these are in CodegenUtils.{h,cpp}
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D144773
2023-03-06 12:19:41 -08:00
|
|
|
SmallVector<DimLevelType> lvlTypes;
|
2022-12-27 22:10:37 +00:00
|
|
|
SmallVector<SparseTensorDimSliceAttr> slices;
|
2022-08-17 16:59:08 -07:00
|
|
|
AffineMap dimOrd = {};
|
2022-10-04 14:34:37 -07:00
|
|
|
AffineMap higherOrd = {};
|
[mlir][sparse] Renaming "pointer/index" to "position/coordinate"
The old "pointer/index" names often cause confusion since these names clash with names of unrelated things in MLIR; so this change rectifies this by changing everything to use "position/coordinate" terminology instead.
In addition to the basic terminology, there have also been various conventions for making certain distinctions like: (1) the overall storage for coordinates in the sparse-tensor, vs the particular collection of coordinates of a given element; and (2) particular coordinates given as a `Value` or `TypedValue<MemRefType>`, vs particular coordinates given as `ValueRange` or similar. I have striven to maintain these distinctions
as follows:
* "p/c" are used for individual position/coordinate values, when there is no risk of confusion. (Just like we use "d/l" to abbreviate "dim/lvl".)
* "pos/crd" are used for individual position/coordinate values, when a longer name is helpful to avoid ambiguity or to form compound names (e.g., "parentPos"). (Just like we use "dim/lvl" when we need a longer form of "d/l".)
I have also used these forms for a handful of compound names where the old name had been using a three-letter form previously, even though a longer form would be more appropriate. I've avoided renaming these to use a longer form purely for expediency sake, since changing them would require a cascade of other renamings. They should be updated to follow the new naming scheme, but that can be done in future patches.
* "coords" is used for the complete collection of crd values associated with a single element. In the runtime library this includes both `std::vector` and raw pointer representations. In the compiler, this is used specifically for buffer variables with C++ type `Value`, `TypedValue<MemRefType>`, etc.
The bare form "coords" is discouraged, since it fails to make the dim/lvl distinction; so the compound names "dimCoords/lvlCoords" should be used instead. (Though there may exist a rare few cases where is is appropriate to be intentionally ambiguous about what coordinate-space the coords live in; in which case the bare "coords" is appropriate.)
There is seldom the need for the pos variant of this notion. In most circumstances we use the term "cursor", since the same buffer is reused for a 'moving' pos-collection.
* "dcvs/lcvs" is used in the compiler as the `ValueRange` analogue of "dimCoords/lvlCoords". (The "vs" stands for "`Value`s".) I haven't found the need for it, but "pvs" would be the obvious name for a pos-`ValueRange`.
The old "ind"-vs-"ivs" naming scheme does not seem to have been sustained in more recent code, which instead prefers other mnemonics (e.g., adding "Buf" to the end of the names for `TypeValue<MemRefType>`). I have cleaned up a lot of these to follow the "coords"-vs-"cvs" naming scheme, though haven't done an exhaustive cleanup.
* "positions/coordinates" are used for larger collections of pos/crd values; in particular, these are used when referring to the complete sparse-tensor storage components.
I also prefer to use these unabbreviated names in the documentation, unless there is some specific reason why using the abbreviated forms helps resolve ambiguity.
In addition to making this terminology change, this change also does some cleanup along the way:
* correcting the dim/lvl terminology in certain places.
* adding `const` when it requires no other code changes.
* miscellaneous cleanup that was entailed in order to make the proper distinctions. Most of these are in CodegenUtils.{h,cpp}
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D144773
2023-03-06 12:19:41 -08:00
|
|
|
unsigned posWidth = 0;
|
|
|
|
|
unsigned crdWidth = 0;
|
2022-12-27 22:10:37 +00:00
|
|
|
|
|
|
|
|
StringRef attrName;
|
|
|
|
|
// Exactly 6 keys.
|
|
|
|
|
SmallVector<StringRef, 6> keys = {"dimLevelType", "dimOrdering",
|
[mlir][sparse] Renaming "pointer/index" to "position/coordinate"
The old "pointer/index" names often cause confusion since these names clash with names of unrelated things in MLIR; so this change rectifies this by changing everything to use "position/coordinate" terminology instead.
In addition to the basic terminology, there have also been various conventions for making certain distinctions like: (1) the overall storage for coordinates in the sparse-tensor, vs the particular collection of coordinates of a given element; and (2) particular coordinates given as a `Value` or `TypedValue<MemRefType>`, vs particular coordinates given as `ValueRange` or similar. I have striven to maintain these distinctions
as follows:
* "p/c" are used for individual position/coordinate values, when there is no risk of confusion. (Just like we use "d/l" to abbreviate "dim/lvl".)
* "pos/crd" are used for individual position/coordinate values, when a longer name is helpful to avoid ambiguity or to form compound names (e.g., "parentPos"). (Just like we use "dim/lvl" when we need a longer form of "d/l".)
I have also used these forms for a handful of compound names where the old name had been using a three-letter form previously, even though a longer form would be more appropriate. I've avoided renaming these to use a longer form purely for expediency sake, since changing them would require a cascade of other renamings. They should be updated to follow the new naming scheme, but that can be done in future patches.
* "coords" is used for the complete collection of crd values associated with a single element. In the runtime library this includes both `std::vector` and raw pointer representations. In the compiler, this is used specifically for buffer variables with C++ type `Value`, `TypedValue<MemRefType>`, etc.
The bare form "coords" is discouraged, since it fails to make the dim/lvl distinction; so the compound names "dimCoords/lvlCoords" should be used instead. (Though there may exist a rare few cases where is is appropriate to be intentionally ambiguous about what coordinate-space the coords live in; in which case the bare "coords" is appropriate.)
There is seldom the need for the pos variant of this notion. In most circumstances we use the term "cursor", since the same buffer is reused for a 'moving' pos-collection.
* "dcvs/lcvs" is used in the compiler as the `ValueRange` analogue of "dimCoords/lvlCoords". (The "vs" stands for "`Value`s".) I haven't found the need for it, but "pvs" would be the obvious name for a pos-`ValueRange`.
The old "ind"-vs-"ivs" naming scheme does not seem to have been sustained in more recent code, which instead prefers other mnemonics (e.g., adding "Buf" to the end of the names for `TypeValue<MemRefType>`). I have cleaned up a lot of these to follow the "coords"-vs-"cvs" naming scheme, though haven't done an exhaustive cleanup.
* "positions/coordinates" are used for larger collections of pos/crd values; in particular, these are used when referring to the complete sparse-tensor storage components.
I also prefer to use these unabbreviated names in the documentation, unless there is some specific reason why using the abbreviated forms helps resolve ambiguity.
In addition to making this terminology change, this change also does some cleanup along the way:
* correcting the dim/lvl terminology in certain places.
* adding `const` when it requires no other code changes.
* miscellaneous cleanup that was entailed in order to make the proper distinctions. Most of these are in CodegenUtils.{h,cpp}
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D144773
2023-03-06 12:19:41 -08:00
|
|
|
"higherOrdering", "posWidth",
|
|
|
|
|
"crdWidth", "slice"};
|
2022-12-27 22:10:37 +00:00
|
|
|
while (succeeded(parser.parseOptionalKeyword(&attrName))) {
|
|
|
|
|
if (!llvm::is_contained(keys, attrName)) {
|
|
|
|
|
parser.emitError(parser.getNameLoc(), "unexpected key: ") << attrName;
|
|
|
|
|
return {};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Consume the `=` after keys
|
|
|
|
|
RETURN_ON_FAIL(parser.parseEqual())
|
[mlir][sparse] Renaming "pointer/index" to "position/coordinate"
The old "pointer/index" names often cause confusion since these names clash with names of unrelated things in MLIR; so this change rectifies this by changing everything to use "position/coordinate" terminology instead.
In addition to the basic terminology, there have also been various conventions for making certain distinctions like: (1) the overall storage for coordinates in the sparse-tensor, vs the particular collection of coordinates of a given element; and (2) particular coordinates given as a `Value` or `TypedValue<MemRefType>`, vs particular coordinates given as `ValueRange` or similar. I have striven to maintain these distinctions
as follows:
* "p/c" are used for individual position/coordinate values, when there is no risk of confusion. (Just like we use "d/l" to abbreviate "dim/lvl".)
* "pos/crd" are used for individual position/coordinate values, when a longer name is helpful to avoid ambiguity or to form compound names (e.g., "parentPos"). (Just like we use "dim/lvl" when we need a longer form of "d/l".)
I have also used these forms for a handful of compound names where the old name had been using a three-letter form previously, even though a longer form would be more appropriate. I've avoided renaming these to use a longer form purely for expediency sake, since changing them would require a cascade of other renamings. They should be updated to follow the new naming scheme, but that can be done in future patches.
* "coords" is used for the complete collection of crd values associated with a single element. In the runtime library this includes both `std::vector` and raw pointer representations. In the compiler, this is used specifically for buffer variables with C++ type `Value`, `TypedValue<MemRefType>`, etc.
The bare form "coords" is discouraged, since it fails to make the dim/lvl distinction; so the compound names "dimCoords/lvlCoords" should be used instead. (Though there may exist a rare few cases where is is appropriate to be intentionally ambiguous about what coordinate-space the coords live in; in which case the bare "coords" is appropriate.)
There is seldom the need for the pos variant of this notion. In most circumstances we use the term "cursor", since the same buffer is reused for a 'moving' pos-collection.
* "dcvs/lcvs" is used in the compiler as the `ValueRange` analogue of "dimCoords/lvlCoords". (The "vs" stands for "`Value`s".) I haven't found the need for it, but "pvs" would be the obvious name for a pos-`ValueRange`.
The old "ind"-vs-"ivs" naming scheme does not seem to have been sustained in more recent code, which instead prefers other mnemonics (e.g., adding "Buf" to the end of the names for `TypeValue<MemRefType>`). I have cleaned up a lot of these to follow the "coords"-vs-"cvs" naming scheme, though haven't done an exhaustive cleanup.
* "positions/coordinates" are used for larger collections of pos/crd values; in particular, these are used when referring to the complete sparse-tensor storage components.
I also prefer to use these unabbreviated names in the documentation, unless there is some specific reason why using the abbreviated forms helps resolve ambiguity.
In addition to making this terminology change, this change also does some cleanup along the way:
* correcting the dim/lvl terminology in certain places.
* adding `const` when it requires no other code changes.
* miscellaneous cleanup that was entailed in order to make the proper distinctions. Most of these are in CodegenUtils.{h,cpp}
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D144773
2023-03-06 12:19:41 -08:00
|
|
|
// FIXME: using `operator==` below duplicates the string comparison
|
|
|
|
|
// cost of the `is_contained` check above. Should instead use some
|
|
|
|
|
// "find" function that returns the index into `keys` so that we can
|
|
|
|
|
// dispatch on that instead.
|
2022-12-27 22:10:37 +00:00
|
|
|
if (attrName == "dimLevelType") {
|
|
|
|
|
Attribute attr;
|
|
|
|
|
RETURN_ON_FAIL(parser.parseAttribute(attr));
|
|
|
|
|
auto arrayAttr = attr.dyn_cast<ArrayAttr>();
|
2023-01-18 18:22:48 -08:00
|
|
|
ERROR_IF(!arrayAttr, "expected an array for dimension level types")
|
2021-12-22 00:19:53 +00:00
|
|
|
for (auto i : arrayAttr) {
|
|
|
|
|
auto strAttr = i.dyn_cast<StringAttr>();
|
2023-01-18 18:22:48 -08:00
|
|
|
ERROR_IF(!strAttr, "expected a string value in dimension level types")
|
2021-04-30 18:07:28 -07:00
|
|
|
auto strVal = strAttr.getValue();
|
2023-01-18 18:22:48 -08:00
|
|
|
if (auto optDLT = parseDLT(strVal)) {
|
[mlir][sparse] Renaming "pointer/index" to "position/coordinate"
The old "pointer/index" names often cause confusion since these names clash with names of unrelated things in MLIR; so this change rectifies this by changing everything to use "position/coordinate" terminology instead.
In addition to the basic terminology, there have also been various conventions for making certain distinctions like: (1) the overall storage for coordinates in the sparse-tensor, vs the particular collection of coordinates of a given element; and (2) particular coordinates given as a `Value` or `TypedValue<MemRefType>`, vs particular coordinates given as `ValueRange` or similar. I have striven to maintain these distinctions
as follows:
* "p/c" are used for individual position/coordinate values, when there is no risk of confusion. (Just like we use "d/l" to abbreviate "dim/lvl".)
* "pos/crd" are used for individual position/coordinate values, when a longer name is helpful to avoid ambiguity or to form compound names (e.g., "parentPos"). (Just like we use "dim/lvl" when we need a longer form of "d/l".)
I have also used these forms for a handful of compound names where the old name had been using a three-letter form previously, even though a longer form would be more appropriate. I've avoided renaming these to use a longer form purely for expediency sake, since changing them would require a cascade of other renamings. They should be updated to follow the new naming scheme, but that can be done in future patches.
* "coords" is used for the complete collection of crd values associated with a single element. In the runtime library this includes both `std::vector` and raw pointer representations. In the compiler, this is used specifically for buffer variables with C++ type `Value`, `TypedValue<MemRefType>`, etc.
The bare form "coords" is discouraged, since it fails to make the dim/lvl distinction; so the compound names "dimCoords/lvlCoords" should be used instead. (Though there may exist a rare few cases where is is appropriate to be intentionally ambiguous about what coordinate-space the coords live in; in which case the bare "coords" is appropriate.)
There is seldom the need for the pos variant of this notion. In most circumstances we use the term "cursor", since the same buffer is reused for a 'moving' pos-collection.
* "dcvs/lcvs" is used in the compiler as the `ValueRange` analogue of "dimCoords/lvlCoords". (The "vs" stands for "`Value`s".) I haven't found the need for it, but "pvs" would be the obvious name for a pos-`ValueRange`.
The old "ind"-vs-"ivs" naming scheme does not seem to have been sustained in more recent code, which instead prefers other mnemonics (e.g., adding "Buf" to the end of the names for `TypeValue<MemRefType>`). I have cleaned up a lot of these to follow the "coords"-vs-"cvs" naming scheme, though haven't done an exhaustive cleanup.
* "positions/coordinates" are used for larger collections of pos/crd values; in particular, these are used when referring to the complete sparse-tensor storage components.
I also prefer to use these unabbreviated names in the documentation, unless there is some specific reason why using the abbreviated forms helps resolve ambiguity.
In addition to making this terminology change, this change also does some cleanup along the way:
* correcting the dim/lvl terminology in certain places.
* adding `const` when it requires no other code changes.
* miscellaneous cleanup that was entailed in order to make the proper distinctions. Most of these are in CodegenUtils.{h,cpp}
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D144773
2023-03-06 12:19:41 -08:00
|
|
|
lvlTypes.push_back(optDLT.value());
|
2021-04-30 18:07:28 -07:00
|
|
|
} else {
|
|
|
|
|
parser.emitError(parser.getNameLoc(),
|
|
|
|
|
"unexpected dimension level type: ")
|
|
|
|
|
<< strVal;
|
|
|
|
|
return {};
|
|
|
|
|
}
|
|
|
|
|
}
|
2022-12-27 22:10:37 +00:00
|
|
|
} else if (attrName == "dimOrdering") {
|
|
|
|
|
Attribute attr;
|
|
|
|
|
RETURN_ON_FAIL(parser.parseAttribute(attr))
|
|
|
|
|
auto affineAttr = attr.dyn_cast<AffineMapAttr>();
|
2023-01-18 18:22:48 -08:00
|
|
|
ERROR_IF(!affineAttr, "expected an affine map for dimension ordering")
|
2022-08-17 16:59:08 -07:00
|
|
|
dimOrd = affineAttr.getValue();
|
2022-12-27 22:10:37 +00:00
|
|
|
} else if (attrName == "higherOrdering") {
|
|
|
|
|
Attribute attr;
|
|
|
|
|
RETURN_ON_FAIL(parser.parseAttribute(attr))
|
|
|
|
|
auto affineAttr = attr.dyn_cast<AffineMapAttr>();
|
2023-01-18 18:22:48 -08:00
|
|
|
ERROR_IF(!affineAttr, "expected an affine map for higher ordering")
|
2022-10-04 14:34:37 -07:00
|
|
|
higherOrd = affineAttr.getValue();
|
[mlir][sparse] Renaming "pointer/index" to "position/coordinate"
The old "pointer/index" names often cause confusion since these names clash with names of unrelated things in MLIR; so this change rectifies this by changing everything to use "position/coordinate" terminology instead.
In addition to the basic terminology, there have also been various conventions for making certain distinctions like: (1) the overall storage for coordinates in the sparse-tensor, vs the particular collection of coordinates of a given element; and (2) particular coordinates given as a `Value` or `TypedValue<MemRefType>`, vs particular coordinates given as `ValueRange` or similar. I have striven to maintain these distinctions
as follows:
* "p/c" are used for individual position/coordinate values, when there is no risk of confusion. (Just like we use "d/l" to abbreviate "dim/lvl".)
* "pos/crd" are used for individual position/coordinate values, when a longer name is helpful to avoid ambiguity or to form compound names (e.g., "parentPos"). (Just like we use "dim/lvl" when we need a longer form of "d/l".)
I have also used these forms for a handful of compound names where the old name had been using a three-letter form previously, even though a longer form would be more appropriate. I've avoided renaming these to use a longer form purely for expediency sake, since changing them would require a cascade of other renamings. They should be updated to follow the new naming scheme, but that can be done in future patches.
* "coords" is used for the complete collection of crd values associated with a single element. In the runtime library this includes both `std::vector` and raw pointer representations. In the compiler, this is used specifically for buffer variables with C++ type `Value`, `TypedValue<MemRefType>`, etc.
The bare form "coords" is discouraged, since it fails to make the dim/lvl distinction; so the compound names "dimCoords/lvlCoords" should be used instead. (Though there may exist a rare few cases where is is appropriate to be intentionally ambiguous about what coordinate-space the coords live in; in which case the bare "coords" is appropriate.)
There is seldom the need for the pos variant of this notion. In most circumstances we use the term "cursor", since the same buffer is reused for a 'moving' pos-collection.
* "dcvs/lcvs" is used in the compiler as the `ValueRange` analogue of "dimCoords/lvlCoords". (The "vs" stands for "`Value`s".) I haven't found the need for it, but "pvs" would be the obvious name for a pos-`ValueRange`.
The old "ind"-vs-"ivs" naming scheme does not seem to have been sustained in more recent code, which instead prefers other mnemonics (e.g., adding "Buf" to the end of the names for `TypeValue<MemRefType>`). I have cleaned up a lot of these to follow the "coords"-vs-"cvs" naming scheme, though haven't done an exhaustive cleanup.
* "positions/coordinates" are used for larger collections of pos/crd values; in particular, these are used when referring to the complete sparse-tensor storage components.
I also prefer to use these unabbreviated names in the documentation, unless there is some specific reason why using the abbreviated forms helps resolve ambiguity.
In addition to making this terminology change, this change also does some cleanup along the way:
* correcting the dim/lvl terminology in certain places.
* adding `const` when it requires no other code changes.
* miscellaneous cleanup that was entailed in order to make the proper distinctions. Most of these are in CodegenUtils.{h,cpp}
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D144773
2023-03-06 12:19:41 -08:00
|
|
|
} else if (attrName == "posWidth") {
|
2022-12-27 22:10:37 +00:00
|
|
|
Attribute attr;
|
|
|
|
|
RETURN_ON_FAIL(parser.parseAttribute(attr))
|
|
|
|
|
auto intAttr = attr.dyn_cast<IntegerAttr>();
|
[mlir][sparse] Renaming "pointer/index" to "position/coordinate"
The old "pointer/index" names often cause confusion since these names clash with names of unrelated things in MLIR; so this change rectifies this by changing everything to use "position/coordinate" terminology instead.
In addition to the basic terminology, there have also been various conventions for making certain distinctions like: (1) the overall storage for coordinates in the sparse-tensor, vs the particular collection of coordinates of a given element; and (2) particular coordinates given as a `Value` or `TypedValue<MemRefType>`, vs particular coordinates given as `ValueRange` or similar. I have striven to maintain these distinctions
as follows:
* "p/c" are used for individual position/coordinate values, when there is no risk of confusion. (Just like we use "d/l" to abbreviate "dim/lvl".)
* "pos/crd" are used for individual position/coordinate values, when a longer name is helpful to avoid ambiguity or to form compound names (e.g., "parentPos"). (Just like we use "dim/lvl" when we need a longer form of "d/l".)
I have also used these forms for a handful of compound names where the old name had been using a three-letter form previously, even though a longer form would be more appropriate. I've avoided renaming these to use a longer form purely for expediency sake, since changing them would require a cascade of other renamings. They should be updated to follow the new naming scheme, but that can be done in future patches.
* "coords" is used for the complete collection of crd values associated with a single element. In the runtime library this includes both `std::vector` and raw pointer representations. In the compiler, this is used specifically for buffer variables with C++ type `Value`, `TypedValue<MemRefType>`, etc.
The bare form "coords" is discouraged, since it fails to make the dim/lvl distinction; so the compound names "dimCoords/lvlCoords" should be used instead. (Though there may exist a rare few cases where is is appropriate to be intentionally ambiguous about what coordinate-space the coords live in; in which case the bare "coords" is appropriate.)
There is seldom the need for the pos variant of this notion. In most circumstances we use the term "cursor", since the same buffer is reused for a 'moving' pos-collection.
* "dcvs/lcvs" is used in the compiler as the `ValueRange` analogue of "dimCoords/lvlCoords". (The "vs" stands for "`Value`s".) I haven't found the need for it, but "pvs" would be the obvious name for a pos-`ValueRange`.
The old "ind"-vs-"ivs" naming scheme does not seem to have been sustained in more recent code, which instead prefers other mnemonics (e.g., adding "Buf" to the end of the names for `TypeValue<MemRefType>`). I have cleaned up a lot of these to follow the "coords"-vs-"cvs" naming scheme, though haven't done an exhaustive cleanup.
* "positions/coordinates" are used for larger collections of pos/crd values; in particular, these are used when referring to the complete sparse-tensor storage components.
I also prefer to use these unabbreviated names in the documentation, unless there is some specific reason why using the abbreviated forms helps resolve ambiguity.
In addition to making this terminology change, this change also does some cleanup along the way:
* correcting the dim/lvl terminology in certain places.
* adding `const` when it requires no other code changes.
* miscellaneous cleanup that was entailed in order to make the proper distinctions. Most of these are in CodegenUtils.{h,cpp}
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D144773
2023-03-06 12:19:41 -08:00
|
|
|
ERROR_IF(!intAttr, "expected an integral position bitwidth")
|
|
|
|
|
posWidth = intAttr.getInt();
|
|
|
|
|
} else if (attrName == "crdWidth") {
|
2022-12-27 22:10:37 +00:00
|
|
|
Attribute attr;
|
|
|
|
|
RETURN_ON_FAIL(parser.parseAttribute(attr))
|
|
|
|
|
auto intAttr = attr.dyn_cast<IntegerAttr>();
|
2023-01-18 18:22:48 -08:00
|
|
|
ERROR_IF(!intAttr, "expected an integral index bitwidth")
|
[mlir][sparse] Renaming "pointer/index" to "position/coordinate"
The old "pointer/index" names often cause confusion since these names clash with names of unrelated things in MLIR; so this change rectifies this by changing everything to use "position/coordinate" terminology instead.
In addition to the basic terminology, there have also been various conventions for making certain distinctions like: (1) the overall storage for coordinates in the sparse-tensor, vs the particular collection of coordinates of a given element; and (2) particular coordinates given as a `Value` or `TypedValue<MemRefType>`, vs particular coordinates given as `ValueRange` or similar. I have striven to maintain these distinctions
as follows:
* "p/c" are used for individual position/coordinate values, when there is no risk of confusion. (Just like we use "d/l" to abbreviate "dim/lvl".)
* "pos/crd" are used for individual position/coordinate values, when a longer name is helpful to avoid ambiguity or to form compound names (e.g., "parentPos"). (Just like we use "dim/lvl" when we need a longer form of "d/l".)
I have also used these forms for a handful of compound names where the old name had been using a three-letter form previously, even though a longer form would be more appropriate. I've avoided renaming these to use a longer form purely for expediency sake, since changing them would require a cascade of other renamings. They should be updated to follow the new naming scheme, but that can be done in future patches.
* "coords" is used for the complete collection of crd values associated with a single element. In the runtime library this includes both `std::vector` and raw pointer representations. In the compiler, this is used specifically for buffer variables with C++ type `Value`, `TypedValue<MemRefType>`, etc.
The bare form "coords" is discouraged, since it fails to make the dim/lvl distinction; so the compound names "dimCoords/lvlCoords" should be used instead. (Though there may exist a rare few cases where is is appropriate to be intentionally ambiguous about what coordinate-space the coords live in; in which case the bare "coords" is appropriate.)
There is seldom the need for the pos variant of this notion. In most circumstances we use the term "cursor", since the same buffer is reused for a 'moving' pos-collection.
* "dcvs/lcvs" is used in the compiler as the `ValueRange` analogue of "dimCoords/lvlCoords". (The "vs" stands for "`Value`s".) I haven't found the need for it, but "pvs" would be the obvious name for a pos-`ValueRange`.
The old "ind"-vs-"ivs" naming scheme does not seem to have been sustained in more recent code, which instead prefers other mnemonics (e.g., adding "Buf" to the end of the names for `TypeValue<MemRefType>`). I have cleaned up a lot of these to follow the "coords"-vs-"cvs" naming scheme, though haven't done an exhaustive cleanup.
* "positions/coordinates" are used for larger collections of pos/crd values; in particular, these are used when referring to the complete sparse-tensor storage components.
I also prefer to use these unabbreviated names in the documentation, unless there is some specific reason why using the abbreviated forms helps resolve ambiguity.
In addition to making this terminology change, this change also does some cleanup along the way:
* correcting the dim/lvl terminology in certain places.
* adding `const` when it requires no other code changes.
* miscellaneous cleanup that was entailed in order to make the proper distinctions. Most of these are in CodegenUtils.{h,cpp}
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D144773
2023-03-06 12:19:41 -08:00
|
|
|
crdWidth = intAttr.getInt();
|
2022-12-27 22:10:37 +00:00
|
|
|
} else if (attrName == "slice") {
|
|
|
|
|
RETURN_ON_FAIL(parser.parseLSquare())
|
|
|
|
|
// Dispatches to DimSliceAttr to skip mnemonic
|
|
|
|
|
bool finished = false;
|
|
|
|
|
while (auto attr = SparseTensorDimSliceAttr::parse(parser, nullptr)) {
|
|
|
|
|
auto sliceAttr = attr.cast<SparseTensorDimSliceAttr>();
|
|
|
|
|
slices.push_back(sliceAttr);
|
|
|
|
|
if (parser.parseOptionalComma().failed()) {
|
|
|
|
|
finished = true;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
// Wrong when parsing slices
|
|
|
|
|
if (!finished)
|
|
|
|
|
return {};
|
|
|
|
|
RETURN_ON_FAIL(parser.parseRSquare())
|
2021-04-30 18:07:28 -07:00
|
|
|
}
|
2022-12-27 22:10:37 +00:00
|
|
|
|
|
|
|
|
// Only the last item can omit the comma
|
|
|
|
|
if (parser.parseOptionalComma().failed())
|
|
|
|
|
break;
|
2021-04-30 18:07:28 -07:00
|
|
|
}
|
2022-12-27 22:10:37 +00:00
|
|
|
|
|
|
|
|
RETURN_ON_FAIL(parser.parseRBrace())
|
|
|
|
|
RETURN_ON_FAIL(parser.parseGreater())
|
2023-01-18 18:22:48 -08:00
|
|
|
#undef ERROR_IF
|
2022-12-27 22:10:37 +00:00
|
|
|
#undef RETURN_ON_FAIL
|
|
|
|
|
|
2021-04-30 18:07:28 -07:00
|
|
|
// Construct struct-like storage for attribute.
|
2022-10-04 14:34:37 -07:00
|
|
|
return parser.getChecked<SparseTensorEncodingAttr>(
|
[mlir][sparse] Renaming "pointer/index" to "position/coordinate"
The old "pointer/index" names often cause confusion since these names clash with names of unrelated things in MLIR; so this change rectifies this by changing everything to use "position/coordinate" terminology instead.
In addition to the basic terminology, there have also been various conventions for making certain distinctions like: (1) the overall storage for coordinates in the sparse-tensor, vs the particular collection of coordinates of a given element; and (2) particular coordinates given as a `Value` or `TypedValue<MemRefType>`, vs particular coordinates given as `ValueRange` or similar. I have striven to maintain these distinctions
as follows:
* "p/c" are used for individual position/coordinate values, when there is no risk of confusion. (Just like we use "d/l" to abbreviate "dim/lvl".)
* "pos/crd" are used for individual position/coordinate values, when a longer name is helpful to avoid ambiguity or to form compound names (e.g., "parentPos"). (Just like we use "dim/lvl" when we need a longer form of "d/l".)
I have also used these forms for a handful of compound names where the old name had been using a three-letter form previously, even though a longer form would be more appropriate. I've avoided renaming these to use a longer form purely for expediency sake, since changing them would require a cascade of other renamings. They should be updated to follow the new naming scheme, but that can be done in future patches.
* "coords" is used for the complete collection of crd values associated with a single element. In the runtime library this includes both `std::vector` and raw pointer representations. In the compiler, this is used specifically for buffer variables with C++ type `Value`, `TypedValue<MemRefType>`, etc.
The bare form "coords" is discouraged, since it fails to make the dim/lvl distinction; so the compound names "dimCoords/lvlCoords" should be used instead. (Though there may exist a rare few cases where is is appropriate to be intentionally ambiguous about what coordinate-space the coords live in; in which case the bare "coords" is appropriate.)
There is seldom the need for the pos variant of this notion. In most circumstances we use the term "cursor", since the same buffer is reused for a 'moving' pos-collection.
* "dcvs/lcvs" is used in the compiler as the `ValueRange` analogue of "dimCoords/lvlCoords". (The "vs" stands for "`Value`s".) I haven't found the need for it, but "pvs" would be the obvious name for a pos-`ValueRange`.
The old "ind"-vs-"ivs" naming scheme does not seem to have been sustained in more recent code, which instead prefers other mnemonics (e.g., adding "Buf" to the end of the names for `TypeValue<MemRefType>`). I have cleaned up a lot of these to follow the "coords"-vs-"cvs" naming scheme, though haven't done an exhaustive cleanup.
* "positions/coordinates" are used for larger collections of pos/crd values; in particular, these are used when referring to the complete sparse-tensor storage components.
I also prefer to use these unabbreviated names in the documentation, unless there is some specific reason why using the abbreviated forms helps resolve ambiguity.
In addition to making this terminology change, this change also does some cleanup along the way:
* correcting the dim/lvl terminology in certain places.
* adding `const` when it requires no other code changes.
* miscellaneous cleanup that was entailed in order to make the proper distinctions. Most of these are in CodegenUtils.{h,cpp}
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D144773
2023-03-06 12:19:41 -08:00
|
|
|
parser.getContext(), lvlTypes, dimOrd, higherOrd, posWidth, crdWidth,
|
|
|
|
|
slices);
|
2021-04-30 18:07:28 -07:00
|
|
|
}
|
|
|
|
|
|
2021-11-11 06:12:06 +00:00
|
|
|
void SparseTensorEncodingAttr::print(AsmPrinter &printer) const {
|
2021-04-30 18:07:28 -07:00
|
|
|
// Print the struct-like storage in dictionary fashion.
|
2021-11-10 00:38:01 +00:00
|
|
|
printer << "<{ dimLevelType = [ ";
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
llvm::interleaveComma(getDimLevelType(), printer, [&](DimLevelType dlt) {
|
|
|
|
|
printer << "\"" << toMLIRString(dlt) << "\"";
|
|
|
|
|
});
|
2021-04-30 18:07:28 -07:00
|
|
|
printer << " ]";
|
2022-08-17 16:59:08 -07:00
|
|
|
// Print remaining members only for non-default values.
|
2022-12-16 18:12:05 +00:00
|
|
|
if (!hasIdDimOrdering())
|
2021-04-30 18:07:28 -07:00
|
|
|
printer << ", dimOrdering = affine_map<" << getDimOrdering() << ">";
|
2022-10-04 14:34:37 -07:00
|
|
|
if (getHigherOrdering())
|
|
|
|
|
printer << ", higherOrdering = affine_map<" << getHigherOrdering() << ">";
|
[mlir][sparse] Renaming "pointer/index" to "position/coordinate"
The old "pointer/index" names often cause confusion since these names clash with names of unrelated things in MLIR; so this change rectifies this by changing everything to use "position/coordinate" terminology instead.
In addition to the basic terminology, there have also been various conventions for making certain distinctions like: (1) the overall storage for coordinates in the sparse-tensor, vs the particular collection of coordinates of a given element; and (2) particular coordinates given as a `Value` or `TypedValue<MemRefType>`, vs particular coordinates given as `ValueRange` or similar. I have striven to maintain these distinctions
as follows:
* "p/c" are used for individual position/coordinate values, when there is no risk of confusion. (Just like we use "d/l" to abbreviate "dim/lvl".)
* "pos/crd" are used for individual position/coordinate values, when a longer name is helpful to avoid ambiguity or to form compound names (e.g., "parentPos"). (Just like we use "dim/lvl" when we need a longer form of "d/l".)
I have also used these forms for a handful of compound names where the old name had been using a three-letter form previously, even though a longer form would be more appropriate. I've avoided renaming these to use a longer form purely for expediency sake, since changing them would require a cascade of other renamings. They should be updated to follow the new naming scheme, but that can be done in future patches.
* "coords" is used for the complete collection of crd values associated with a single element. In the runtime library this includes both `std::vector` and raw pointer representations. In the compiler, this is used specifically for buffer variables with C++ type `Value`, `TypedValue<MemRefType>`, etc.
The bare form "coords" is discouraged, since it fails to make the dim/lvl distinction; so the compound names "dimCoords/lvlCoords" should be used instead. (Though there may exist a rare few cases where is is appropriate to be intentionally ambiguous about what coordinate-space the coords live in; in which case the bare "coords" is appropriate.)
There is seldom the need for the pos variant of this notion. In most circumstances we use the term "cursor", since the same buffer is reused for a 'moving' pos-collection.
* "dcvs/lcvs" is used in the compiler as the `ValueRange` analogue of "dimCoords/lvlCoords". (The "vs" stands for "`Value`s".) I haven't found the need for it, but "pvs" would be the obvious name for a pos-`ValueRange`.
The old "ind"-vs-"ivs" naming scheme does not seem to have been sustained in more recent code, which instead prefers other mnemonics (e.g., adding "Buf" to the end of the names for `TypeValue<MemRefType>`). I have cleaned up a lot of these to follow the "coords"-vs-"cvs" naming scheme, though haven't done an exhaustive cleanup.
* "positions/coordinates" are used for larger collections of pos/crd values; in particular, these are used when referring to the complete sparse-tensor storage components.
I also prefer to use these unabbreviated names in the documentation, unless there is some specific reason why using the abbreviated forms helps resolve ambiguity.
In addition to making this terminology change, this change also does some cleanup along the way:
* correcting the dim/lvl terminology in certain places.
* adding `const` when it requires no other code changes.
* miscellaneous cleanup that was entailed in order to make the proper distinctions. Most of these are in CodegenUtils.{h,cpp}
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D144773
2023-03-06 12:19:41 -08:00
|
|
|
if (getPosWidth())
|
|
|
|
|
printer << ", posWidth = " << getPosWidth();
|
|
|
|
|
if (getCrdWidth())
|
|
|
|
|
printer << ", crdWidth = " << getCrdWidth();
|
2022-12-27 22:10:37 +00:00
|
|
|
if (!getDimSlices().empty()) {
|
|
|
|
|
printer << ", slice = [ ";
|
|
|
|
|
llvm::interleaveComma(getDimSlices(), printer,
|
|
|
|
|
[&](SparseTensorDimSliceAttr attr) {
|
|
|
|
|
// Calls SparseTensorDimSliceAttr::print directly to
|
|
|
|
|
// skip mnemonic.
|
|
|
|
|
attr.print(printer);
|
|
|
|
|
});
|
|
|
|
|
printer << " ]";
|
|
|
|
|
}
|
|
|
|
|
|
2022-08-17 16:59:08 -07:00
|
|
|
printer << " }>";
|
2021-04-30 18:07:28 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
LogicalResult SparseTensorEncodingAttr::verify(
|
|
|
|
|
function_ref<InFlightDiagnostic()> emitError,
|
|
|
|
|
ArrayRef<DimLevelType> dimLevelType, AffineMap dimOrdering,
|
[mlir][sparse] Renaming "pointer/index" to "position/coordinate"
The old "pointer/index" names often cause confusion since these names clash with names of unrelated things in MLIR; so this change rectifies this by changing everything to use "position/coordinate" terminology instead.
In addition to the basic terminology, there have also been various conventions for making certain distinctions like: (1) the overall storage for coordinates in the sparse-tensor, vs the particular collection of coordinates of a given element; and (2) particular coordinates given as a `Value` or `TypedValue<MemRefType>`, vs particular coordinates given as `ValueRange` or similar. I have striven to maintain these distinctions
as follows:
* "p/c" are used for individual position/coordinate values, when there is no risk of confusion. (Just like we use "d/l" to abbreviate "dim/lvl".)
* "pos/crd" are used for individual position/coordinate values, when a longer name is helpful to avoid ambiguity or to form compound names (e.g., "parentPos"). (Just like we use "dim/lvl" when we need a longer form of "d/l".)
I have also used these forms for a handful of compound names where the old name had been using a three-letter form previously, even though a longer form would be more appropriate. I've avoided renaming these to use a longer form purely for expediency sake, since changing them would require a cascade of other renamings. They should be updated to follow the new naming scheme, but that can be done in future patches.
* "coords" is used for the complete collection of crd values associated with a single element. In the runtime library this includes both `std::vector` and raw pointer representations. In the compiler, this is used specifically for buffer variables with C++ type `Value`, `TypedValue<MemRefType>`, etc.
The bare form "coords" is discouraged, since it fails to make the dim/lvl distinction; so the compound names "dimCoords/lvlCoords" should be used instead. (Though there may exist a rare few cases where is is appropriate to be intentionally ambiguous about what coordinate-space the coords live in; in which case the bare "coords" is appropriate.)
There is seldom the need for the pos variant of this notion. In most circumstances we use the term "cursor", since the same buffer is reused for a 'moving' pos-collection.
* "dcvs/lcvs" is used in the compiler as the `ValueRange` analogue of "dimCoords/lvlCoords". (The "vs" stands for "`Value`s".) I haven't found the need for it, but "pvs" would be the obvious name for a pos-`ValueRange`.
The old "ind"-vs-"ivs" naming scheme does not seem to have been sustained in more recent code, which instead prefers other mnemonics (e.g., adding "Buf" to the end of the names for `TypeValue<MemRefType>`). I have cleaned up a lot of these to follow the "coords"-vs-"cvs" naming scheme, though haven't done an exhaustive cleanup.
* "positions/coordinates" are used for larger collections of pos/crd values; in particular, these are used when referring to the complete sparse-tensor storage components.
I also prefer to use these unabbreviated names in the documentation, unless there is some specific reason why using the abbreviated forms helps resolve ambiguity.
In addition to making this terminology change, this change also does some cleanup along the way:
* correcting the dim/lvl terminology in certain places.
* adding `const` when it requires no other code changes.
* miscellaneous cleanup that was entailed in order to make the proper distinctions. Most of these are in CodegenUtils.{h,cpp}
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D144773
2023-03-06 12:19:41 -08:00
|
|
|
AffineMap higherOrdering, unsigned posWidth, unsigned crdWidth,
|
2022-12-27 22:10:37 +00:00
|
|
|
ArrayRef<SparseTensorDimSliceAttr> dimSlices) {
|
[mlir][sparse] Renaming "pointer/index" to "position/coordinate"
The old "pointer/index" names often cause confusion since these names clash with names of unrelated things in MLIR; so this change rectifies this by changing everything to use "position/coordinate" terminology instead.
In addition to the basic terminology, there have also been various conventions for making certain distinctions like: (1) the overall storage for coordinates in the sparse-tensor, vs the particular collection of coordinates of a given element; and (2) particular coordinates given as a `Value` or `TypedValue<MemRefType>`, vs particular coordinates given as `ValueRange` or similar. I have striven to maintain these distinctions
as follows:
* "p/c" are used for individual position/coordinate values, when there is no risk of confusion. (Just like we use "d/l" to abbreviate "dim/lvl".)
* "pos/crd" are used for individual position/coordinate values, when a longer name is helpful to avoid ambiguity or to form compound names (e.g., "parentPos"). (Just like we use "dim/lvl" when we need a longer form of "d/l".)
I have also used these forms for a handful of compound names where the old name had been using a three-letter form previously, even though a longer form would be more appropriate. I've avoided renaming these to use a longer form purely for expediency sake, since changing them would require a cascade of other renamings. They should be updated to follow the new naming scheme, but that can be done in future patches.
* "coords" is used for the complete collection of crd values associated with a single element. In the runtime library this includes both `std::vector` and raw pointer representations. In the compiler, this is used specifically for buffer variables with C++ type `Value`, `TypedValue<MemRefType>`, etc.
The bare form "coords" is discouraged, since it fails to make the dim/lvl distinction; so the compound names "dimCoords/lvlCoords" should be used instead. (Though there may exist a rare few cases where is is appropriate to be intentionally ambiguous about what coordinate-space the coords live in; in which case the bare "coords" is appropriate.)
There is seldom the need for the pos variant of this notion. In most circumstances we use the term "cursor", since the same buffer is reused for a 'moving' pos-collection.
* "dcvs/lcvs" is used in the compiler as the `ValueRange` analogue of "dimCoords/lvlCoords". (The "vs" stands for "`Value`s".) I haven't found the need for it, but "pvs" would be the obvious name for a pos-`ValueRange`.
The old "ind"-vs-"ivs" naming scheme does not seem to have been sustained in more recent code, which instead prefers other mnemonics (e.g., adding "Buf" to the end of the names for `TypeValue<MemRefType>`). I have cleaned up a lot of these to follow the "coords"-vs-"cvs" naming scheme, though haven't done an exhaustive cleanup.
* "positions/coordinates" are used for larger collections of pos/crd values; in particular, these are used when referring to the complete sparse-tensor storage components.
I also prefer to use these unabbreviated names in the documentation, unless there is some specific reason why using the abbreviated forms helps resolve ambiguity.
In addition to making this terminology change, this change also does some cleanup along the way:
* correcting the dim/lvl terminology in certain places.
* adding `const` when it requires no other code changes.
* miscellaneous cleanup that was entailed in order to make the proper distinctions. Most of these are in CodegenUtils.{h,cpp}
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D144773
2023-03-06 12:19:41 -08:00
|
|
|
if (!acceptBitWidth(posWidth))
|
|
|
|
|
return emitError() << "unexpected position bitwidth: " << posWidth;
|
|
|
|
|
if (!acceptBitWidth(crdWidth))
|
|
|
|
|
return emitError() << "unexpected coordinate bitwidth: " << crdWidth;
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
// Before we can check that the level-rank is consistent/coherent
|
|
|
|
|
// across all fields, we need to define it. The source-of-truth for
|
|
|
|
|
// the `getLvlRank` method is the length of the level-types array,
|
|
|
|
|
// since it must always be provided and have full rank; therefore we
|
|
|
|
|
// use that same source-of-truth here.
|
|
|
|
|
const Level lvlRank = dimLevelType.size();
|
|
|
|
|
if (lvlRank == 0)
|
|
|
|
|
return emitError() << "expected a non-empty array for level types";
|
2021-04-30 18:07:28 -07:00
|
|
|
if (dimOrdering) {
|
|
|
|
|
if (!dimOrdering.isPermutation())
|
|
|
|
|
return emitError()
|
|
|
|
|
<< "expected a permutation affine map for dimension ordering";
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
if (dimOrdering.getNumResults() != lvlRank)
|
2021-04-30 18:07:28 -07:00
|
|
|
return emitError() << "unexpected mismatch in ordering and dimension "
|
|
|
|
|
"level types size";
|
|
|
|
|
}
|
2022-10-04 14:34:37 -07:00
|
|
|
if (higherOrdering) {
|
|
|
|
|
if (higherOrdering.getNumDims() >= higherOrdering.getNumResults())
|
|
|
|
|
return emitError() << "unexpected higher ordering mapping from "
|
|
|
|
|
<< higherOrdering.getNumDims() << " to "
|
|
|
|
|
<< higherOrdering.getNumResults();
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
if (higherOrdering.getNumResults() != lvlRank)
|
2022-10-04 14:34:37 -07:00
|
|
|
return emitError() << "unexpected mismatch in higher ordering and "
|
|
|
|
|
"dimension level types size";
|
|
|
|
|
}
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
if (!dimSlices.empty() && dimSlices.size() != lvlRank) {
|
2022-12-27 22:10:37 +00:00
|
|
|
return emitError() << "unexpected mismatch in dimension slices and "
|
|
|
|
|
"dimension level type size";
|
|
|
|
|
}
|
2021-04-30 18:07:28 -07:00
|
|
|
return success();
|
|
|
|
|
}
|
|
|
|
|
|
2023-01-18 18:22:48 -08:00
|
|
|
#define RETURN_FAILURE_IF_FAILED(X) \
|
|
|
|
|
if (failed(X)) { \
|
|
|
|
|
return failure(); \
|
|
|
|
|
}
|
|
|
|
|
|
2021-04-30 18:07:28 -07:00
|
|
|
LogicalResult SparseTensorEncodingAttr::verifyEncoding(
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
ArrayRef<DynSize> dimShape, Type elementType,
|
2021-04-30 18:07:28 -07:00
|
|
|
function_ref<InFlightDiagnostic()> emitError) const {
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
// Check structural integrity. In particular, this ensures that the
|
|
|
|
|
// level-rank is coherent across all the fields.
|
[mlir][sparse] Renaming "pointer/index" to "position/coordinate"
The old "pointer/index" names often cause confusion since these names clash with names of unrelated things in MLIR; so this change rectifies this by changing everything to use "position/coordinate" terminology instead.
In addition to the basic terminology, there have also been various conventions for making certain distinctions like: (1) the overall storage for coordinates in the sparse-tensor, vs the particular collection of coordinates of a given element; and (2) particular coordinates given as a `Value` or `TypedValue<MemRefType>`, vs particular coordinates given as `ValueRange` or similar. I have striven to maintain these distinctions
as follows:
* "p/c" are used for individual position/coordinate values, when there is no risk of confusion. (Just like we use "d/l" to abbreviate "dim/lvl".)
* "pos/crd" are used for individual position/coordinate values, when a longer name is helpful to avoid ambiguity or to form compound names (e.g., "parentPos"). (Just like we use "dim/lvl" when we need a longer form of "d/l".)
I have also used these forms for a handful of compound names where the old name had been using a three-letter form previously, even though a longer form would be more appropriate. I've avoided renaming these to use a longer form purely for expediency sake, since changing them would require a cascade of other renamings. They should be updated to follow the new naming scheme, but that can be done in future patches.
* "coords" is used for the complete collection of crd values associated with a single element. In the runtime library this includes both `std::vector` and raw pointer representations. In the compiler, this is used specifically for buffer variables with C++ type `Value`, `TypedValue<MemRefType>`, etc.
The bare form "coords" is discouraged, since it fails to make the dim/lvl distinction; so the compound names "dimCoords/lvlCoords" should be used instead. (Though there may exist a rare few cases where is is appropriate to be intentionally ambiguous about what coordinate-space the coords live in; in which case the bare "coords" is appropriate.)
There is seldom the need for the pos variant of this notion. In most circumstances we use the term "cursor", since the same buffer is reused for a 'moving' pos-collection.
* "dcvs/lcvs" is used in the compiler as the `ValueRange` analogue of "dimCoords/lvlCoords". (The "vs" stands for "`Value`s".) I haven't found the need for it, but "pvs" would be the obvious name for a pos-`ValueRange`.
The old "ind"-vs-"ivs" naming scheme does not seem to have been sustained in more recent code, which instead prefers other mnemonics (e.g., adding "Buf" to the end of the names for `TypeValue<MemRefType>`). I have cleaned up a lot of these to follow the "coords"-vs-"cvs" naming scheme, though haven't done an exhaustive cleanup.
* "positions/coordinates" are used for larger collections of pos/crd values; in particular, these are used when referring to the complete sparse-tensor storage components.
I also prefer to use these unabbreviated names in the documentation, unless there is some specific reason why using the abbreviated forms helps resolve ambiguity.
In addition to making this terminology change, this change also does some cleanup along the way:
* correcting the dim/lvl terminology in certain places.
* adding `const` when it requires no other code changes.
* miscellaneous cleanup that was entailed in order to make the proper distinctions. Most of these are in CodegenUtils.{h,cpp}
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D144773
2023-03-06 12:19:41 -08:00
|
|
|
RETURN_FAILURE_IF_FAILED(verify(emitError, getDimLevelType(),
|
|
|
|
|
getDimOrdering(), getHigherOrdering(),
|
|
|
|
|
getPosWidth(), getCrdWidth(), getDimSlices()))
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
// Check integrity with tensor type specifics. In particular, we
|
|
|
|
|
// need only check that the dimension-rank of the tensor agrees with
|
|
|
|
|
// the dimension-rank of the encoding.
|
|
|
|
|
const Dimension dimRank = dimShape.size();
|
|
|
|
|
if (dimRank == 0)
|
2021-11-03 17:04:42 -07:00
|
|
|
return emitError() << "expected non-scalar sparse tensor";
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
if (const auto higherOrdering = getHigherOrdering()) {
|
|
|
|
|
if (higherOrdering.getNumDims() != dimRank)
|
|
|
|
|
return emitError() << "expected an affine map with " << dimRank
|
|
|
|
|
<< " dimensions for higher ordering";
|
2022-10-04 14:34:37 -07:00
|
|
|
// TODO: verification of higher ordering contents
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
} else if (dimRank != getLvlRank()) {
|
|
|
|
|
return emitError() << "expected an array of size " << dimRank
|
2021-04-30 18:07:28 -07:00
|
|
|
<< " for dimension level types";
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
}
|
2021-04-30 18:07:28 -07:00
|
|
|
return success();
|
|
|
|
|
}
|
|
|
|
|
|
2022-09-27 17:06:20 -07:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
// Convenience Methods.
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
2021-05-10 10:34:21 -07:00
|
|
|
SparseTensorEncodingAttr
|
|
|
|
|
mlir::sparse_tensor::getSparseTensorEncoding(Type type) {
|
|
|
|
|
if (auto ttp = type.dyn_cast<RankedTensorType>())
|
|
|
|
|
return ttp.getEncoding().dyn_cast_or_null<SparseTensorEncodingAttr>();
|
2022-12-13 19:49:43 +00:00
|
|
|
if (auto mdtp = type.dyn_cast<StorageSpecifierType>())
|
|
|
|
|
return mdtp.getEncoding();
|
2021-05-10 10:34:21 -07:00
|
|
|
return nullptr;
|
|
|
|
|
}
|
|
|
|
|
|
2023-01-03 15:16:12 -08:00
|
|
|
/// Returns true iff the given sparse tensor encoding attribute has a trailing
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
/// COO region starting at the given level.
|
|
|
|
|
static bool isCOOType(SparseTensorEncodingAttr enc, Level startLvl,
|
|
|
|
|
bool isUnique) {
|
|
|
|
|
if (!enc || !enc.isCompressedLvl(startLvl))
|
2023-01-03 15:16:12 -08:00
|
|
|
return false;
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
const Level lvlRank = enc.getLvlRank();
|
|
|
|
|
for (Level l = startLvl + 1; l < lvlRank; ++l)
|
|
|
|
|
if (!enc.isSingletonLvl(l))
|
2023-01-03 15:16:12 -08:00
|
|
|
return false;
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
// If isUnique is true, then make sure that the last level is unique,
|
|
|
|
|
// that is, lvlRank == 1 (unique the only compressed) and lvlRank > 1
|
2023-01-03 15:16:12 -08:00
|
|
|
// (unique on the last singleton).
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
return !isUnique || enc.isUniqueLvl(lvlRank - 1);
|
2023-01-03 15:16:12 -08:00
|
|
|
}
|
|
|
|
|
|
2023-02-27 22:40:34 +00:00
|
|
|
bool mlir::sparse_tensor::isUniqueCOOType(Type tp) {
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
return isCOOType(getSparseTensorEncoding(tp), 0, /*isUnique=*/true);
|
2023-01-03 15:16:12 -08:00
|
|
|
}
|
2022-10-30 21:55:25 -07:00
|
|
|
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
Level mlir::sparse_tensor::getCOOStart(SparseTensorEncodingAttr enc) {
|
|
|
|
|
// We only consider COO region with at least two levels for the purpose
|
2023-01-03 15:16:12 -08:00
|
|
|
// of AOS storage optimization.
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
const Level lvlRank = enc.getLvlRank();
|
|
|
|
|
if (lvlRank > 1)
|
|
|
|
|
for (Level l = 0; l < lvlRank - 1; l++)
|
|
|
|
|
if (isCOOType(enc, l, /*isUnique=*/false))
|
|
|
|
|
return l;
|
|
|
|
|
return lvlRank;
|
2022-10-30 21:55:25 -07:00
|
|
|
}
|
|
|
|
|
|
2023-02-02 23:34:28 +00:00
|
|
|
// Helpers to setup a COO type.
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
RankedTensorType sparse_tensor::getCOOFromTypeWithOrdering(RankedTensorType rtt,
|
|
|
|
|
AffineMap lvlPerm,
|
2023-02-02 23:34:28 +00:00
|
|
|
bool ordered) {
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
const SparseTensorType src(rtt);
|
|
|
|
|
// The dim-rank of the source `RankedTensorType` is used as the lvl-rank
|
|
|
|
|
// of the result `RankedTensorType`. This follows from the fact that the
|
|
|
|
|
// result's encoding has the default higher-ordering (hence the result's
|
|
|
|
|
// lvl-rank equals its dim-rank). We don't need to assert that `lvlRank`
|
|
|
|
|
// agrees with the size of `lvlPerm` because that will be verified by
|
|
|
|
|
// `STEA::get`.
|
|
|
|
|
const Level lvlRank = src.getDimRank();
|
|
|
|
|
SmallVector<DimLevelType> lvlTypes;
|
|
|
|
|
|
|
|
|
|
// An unordered and non-unique compressed level at beginning.
|
|
|
|
|
// If this is also the last level, then it is unique.
|
|
|
|
|
lvlTypes.push_back(
|
|
|
|
|
*getDimLevelType(LevelFormat::Compressed, ordered, lvlRank == 1));
|
|
|
|
|
if (lvlRank > 1) {
|
2023-02-02 23:34:28 +00:00
|
|
|
// TODO: it is actually ordered at the level for ordered input.
|
|
|
|
|
// Followed by unordered non-unique n-2 singleton levels.
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
std::fill_n(std::back_inserter(lvlTypes), lvlRank - 2,
|
2023-02-02 23:34:28 +00:00
|
|
|
*getDimLevelType(LevelFormat::Singleton, ordered, false));
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
// Ends by a unique singleton level unless the lvlRank is 1.
|
|
|
|
|
lvlTypes.push_back(*getDimLevelType(LevelFormat::Singleton, ordered, true));
|
2023-02-02 23:34:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// TODO: Maybe pick the bitwidth based on input/output tensors (probably the
|
|
|
|
|
// largest one among them) in the original operation instead of using the
|
|
|
|
|
// default value.
|
[mlir][sparse] Renaming "pointer/index" to "position/coordinate"
The old "pointer/index" names often cause confusion since these names clash with names of unrelated things in MLIR; so this change rectifies this by changing everything to use "position/coordinate" terminology instead.
In addition to the basic terminology, there have also been various conventions for making certain distinctions like: (1) the overall storage for coordinates in the sparse-tensor, vs the particular collection of coordinates of a given element; and (2) particular coordinates given as a `Value` or `TypedValue<MemRefType>`, vs particular coordinates given as `ValueRange` or similar. I have striven to maintain these distinctions
as follows:
* "p/c" are used for individual position/coordinate values, when there is no risk of confusion. (Just like we use "d/l" to abbreviate "dim/lvl".)
* "pos/crd" are used for individual position/coordinate values, when a longer name is helpful to avoid ambiguity or to form compound names (e.g., "parentPos"). (Just like we use "dim/lvl" when we need a longer form of "d/l".)
I have also used these forms for a handful of compound names where the old name had been using a three-letter form previously, even though a longer form would be more appropriate. I've avoided renaming these to use a longer form purely for expediency sake, since changing them would require a cascade of other renamings. They should be updated to follow the new naming scheme, but that can be done in future patches.
* "coords" is used for the complete collection of crd values associated with a single element. In the runtime library this includes both `std::vector` and raw pointer representations. In the compiler, this is used specifically for buffer variables with C++ type `Value`, `TypedValue<MemRefType>`, etc.
The bare form "coords" is discouraged, since it fails to make the dim/lvl distinction; so the compound names "dimCoords/lvlCoords" should be used instead. (Though there may exist a rare few cases where is is appropriate to be intentionally ambiguous about what coordinate-space the coords live in; in which case the bare "coords" is appropriate.)
There is seldom the need for the pos variant of this notion. In most circumstances we use the term "cursor", since the same buffer is reused for a 'moving' pos-collection.
* "dcvs/lcvs" is used in the compiler as the `ValueRange` analogue of "dimCoords/lvlCoords". (The "vs" stands for "`Value`s".) I haven't found the need for it, but "pvs" would be the obvious name for a pos-`ValueRange`.
The old "ind"-vs-"ivs" naming scheme does not seem to have been sustained in more recent code, which instead prefers other mnemonics (e.g., adding "Buf" to the end of the names for `TypeValue<MemRefType>`). I have cleaned up a lot of these to follow the "coords"-vs-"cvs" naming scheme, though haven't done an exhaustive cleanup.
* "positions/coordinates" are used for larger collections of pos/crd values; in particular, these are used when referring to the complete sparse-tensor storage components.
I also prefer to use these unabbreviated names in the documentation, unless there is some specific reason why using the abbreviated forms helps resolve ambiguity.
In addition to making this terminology change, this change also does some cleanup along the way:
* correcting the dim/lvl terminology in certain places.
* adding `const` when it requires no other code changes.
* miscellaneous cleanup that was entailed in order to make the proper distinctions. Most of these are in CodegenUtils.{h,cpp}
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D144773
2023-03-06 12:19:41 -08:00
|
|
|
unsigned posWidth = src.getPosWidth();
|
|
|
|
|
unsigned crdWidth = src.getCrdWidth();
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
auto enc = SparseTensorEncodingAttr::get(src.getContext(), lvlTypes, lvlPerm,
|
[mlir][sparse] Renaming "pointer/index" to "position/coordinate"
The old "pointer/index" names often cause confusion since these names clash with names of unrelated things in MLIR; so this change rectifies this by changing everything to use "position/coordinate" terminology instead.
In addition to the basic terminology, there have also been various conventions for making certain distinctions like: (1) the overall storage for coordinates in the sparse-tensor, vs the particular collection of coordinates of a given element; and (2) particular coordinates given as a `Value` or `TypedValue<MemRefType>`, vs particular coordinates given as `ValueRange` or similar. I have striven to maintain these distinctions
as follows:
* "p/c" are used for individual position/coordinate values, when there is no risk of confusion. (Just like we use "d/l" to abbreviate "dim/lvl".)
* "pos/crd" are used for individual position/coordinate values, when a longer name is helpful to avoid ambiguity or to form compound names (e.g., "parentPos"). (Just like we use "dim/lvl" when we need a longer form of "d/l".)
I have also used these forms for a handful of compound names where the old name had been using a three-letter form previously, even though a longer form would be more appropriate. I've avoided renaming these to use a longer form purely for expediency sake, since changing them would require a cascade of other renamings. They should be updated to follow the new naming scheme, but that can be done in future patches.
* "coords" is used for the complete collection of crd values associated with a single element. In the runtime library this includes both `std::vector` and raw pointer representations. In the compiler, this is used specifically for buffer variables with C++ type `Value`, `TypedValue<MemRefType>`, etc.
The bare form "coords" is discouraged, since it fails to make the dim/lvl distinction; so the compound names "dimCoords/lvlCoords" should be used instead. (Though there may exist a rare few cases where is is appropriate to be intentionally ambiguous about what coordinate-space the coords live in; in which case the bare "coords" is appropriate.)
There is seldom the need for the pos variant of this notion. In most circumstances we use the term "cursor", since the same buffer is reused for a 'moving' pos-collection.
* "dcvs/lcvs" is used in the compiler as the `ValueRange` analogue of "dimCoords/lvlCoords". (The "vs" stands for "`Value`s".) I haven't found the need for it, but "pvs" would be the obvious name for a pos-`ValueRange`.
The old "ind"-vs-"ivs" naming scheme does not seem to have been sustained in more recent code, which instead prefers other mnemonics (e.g., adding "Buf" to the end of the names for `TypeValue<MemRefType>`). I have cleaned up a lot of these to follow the "coords"-vs-"cvs" naming scheme, though haven't done an exhaustive cleanup.
* "positions/coordinates" are used for larger collections of pos/crd values; in particular, these are used when referring to the complete sparse-tensor storage components.
I also prefer to use these unabbreviated names in the documentation, unless there is some specific reason why using the abbreviated forms helps resolve ambiguity.
In addition to making this terminology change, this change also does some cleanup along the way:
* correcting the dim/lvl terminology in certain places.
* adding `const` when it requires no other code changes.
* miscellaneous cleanup that was entailed in order to make the proper distinctions. Most of these are in CodegenUtils.{h,cpp}
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D144773
2023-03-06 12:19:41 -08:00
|
|
|
AffineMap(), posWidth, crdWidth);
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
return RankedTensorType::get(src.getDimShape(), src.getElementType(), enc);
|
2023-02-02 23:34:28 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
RankedTensorType sparse_tensor::getCOOFromType(RankedTensorType src,
|
|
|
|
|
bool ordered) {
|
|
|
|
|
return getCOOFromTypeWithOrdering(
|
|
|
|
|
src, AffineMap::getMultiDimIdentityMap(src.getRank(), src.getContext()),
|
|
|
|
|
ordered);
|
|
|
|
|
}
|
|
|
|
|
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
// TODO: Remove this definition once all use-sites have been fixed to
|
|
|
|
|
// properly handle non-permutations.
|
|
|
|
|
Dimension mlir::sparse_tensor::toOrigDim(SparseTensorEncodingAttr enc,
|
|
|
|
|
Level l) {
|
2022-09-28 15:04:17 -07:00
|
|
|
if (enc) {
|
|
|
|
|
auto order = enc.getDimOrdering();
|
|
|
|
|
if (order) {
|
|
|
|
|
assert(order.isPermutation());
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
return order.getDimPosition(l);
|
2022-09-28 15:04:17 -07:00
|
|
|
}
|
|
|
|
|
}
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
return l;
|
2022-09-28 15:04:17 -07:00
|
|
|
}
|
|
|
|
|
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
// TODO: Remove this definition once all use-sites have been fixed to
|
|
|
|
|
// properly handle non-permutations.
|
|
|
|
|
Level mlir::sparse_tensor::toStoredDim(SparseTensorEncodingAttr enc,
|
|
|
|
|
Dimension d) {
|
2022-09-28 15:04:17 -07:00
|
|
|
if (enc) {
|
|
|
|
|
auto order = enc.getDimOrdering();
|
|
|
|
|
if (order) {
|
|
|
|
|
assert(order.isPermutation());
|
2022-12-01 18:43:18 +00:00
|
|
|
auto maybePos =
|
|
|
|
|
order.getResultPosition(getAffineDimExpr(d, enc.getContext()));
|
|
|
|
|
assert(maybePos.has_value());
|
|
|
|
|
return *maybePos;
|
2022-09-28 15:04:17 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return d;
|
|
|
|
|
}
|
|
|
|
|
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
// TODO: Remove this definition once all use-sites have been fixed to
|
|
|
|
|
// properly handle non-permutations.
|
|
|
|
|
Dimension mlir::sparse_tensor::toOrigDim(RankedTensorType type, Level l) {
|
|
|
|
|
const auto enc = getSparseTensorEncoding(type);
|
|
|
|
|
assert(l < enc.getLvlRank());
|
|
|
|
|
return toOrigDim(enc, l);
|
2022-09-28 15:04:17 -07:00
|
|
|
}
|
|
|
|
|
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
// TODO: Remove this definition once all use-sites have been fixed to
|
|
|
|
|
// properly handle non-permutations.
|
|
|
|
|
Level mlir::sparse_tensor::toStoredDim(RankedTensorType type, Dimension d) {
|
|
|
|
|
assert(d < static_cast<Dimension>(type.getRank()));
|
2022-09-28 15:04:17 -07:00
|
|
|
return toStoredDim(getSparseTensorEncoding(type), d);
|
|
|
|
|
}
|
|
|
|
|
|
2021-05-10 10:34:21 -07:00
|
|
|
//===----------------------------------------------------------------------===//
|
2022-12-13 19:49:43 +00:00
|
|
|
// SparseTensorDialect Types.
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
2022-12-15 18:28:30 +00:00
|
|
|
/// We normalized sparse tensor encoding attribute by always using
|
|
|
|
|
/// ordered/unique DLT such that "compressed-nu-no" and "compressed-nu" (as well
|
|
|
|
|
/// as other variants) lead to the same storage specifier type, and stripping
|
2023-01-10 22:35:49 +00:00
|
|
|
/// irrelevant fields that do not alter the sparse tensor memory layout.
|
2022-12-15 18:28:30 +00:00
|
|
|
static SparseTensorEncodingAttr
|
|
|
|
|
getNormalizedEncodingForSpecifier(SparseTensorEncodingAttr enc) {
|
|
|
|
|
SmallVector<DimLevelType> dlts;
|
|
|
|
|
for (auto dlt : enc.getDimLevelType())
|
|
|
|
|
dlts.push_back(*getDimLevelType(*getLevelFormat(dlt), true, true));
|
|
|
|
|
|
|
|
|
|
return SparseTensorEncodingAttr::get(
|
|
|
|
|
enc.getContext(), dlts,
|
|
|
|
|
AffineMap(), // dimOrdering (irrelavant to storage speicifer)
|
|
|
|
|
AffineMap(), // highLvlOrdering (irrelavant to storage specifer)
|
[mlir][sparse] Renaming "pointer/index" to "position/coordinate"
The old "pointer/index" names often cause confusion since these names clash with names of unrelated things in MLIR; so this change rectifies this by changing everything to use "position/coordinate" terminology instead.
In addition to the basic terminology, there have also been various conventions for making certain distinctions like: (1) the overall storage for coordinates in the sparse-tensor, vs the particular collection of coordinates of a given element; and (2) particular coordinates given as a `Value` or `TypedValue<MemRefType>`, vs particular coordinates given as `ValueRange` or similar. I have striven to maintain these distinctions
as follows:
* "p/c" are used for individual position/coordinate values, when there is no risk of confusion. (Just like we use "d/l" to abbreviate "dim/lvl".)
* "pos/crd" are used for individual position/coordinate values, when a longer name is helpful to avoid ambiguity or to form compound names (e.g., "parentPos"). (Just like we use "dim/lvl" when we need a longer form of "d/l".)
I have also used these forms for a handful of compound names where the old name had been using a three-letter form previously, even though a longer form would be more appropriate. I've avoided renaming these to use a longer form purely for expediency sake, since changing them would require a cascade of other renamings. They should be updated to follow the new naming scheme, but that can be done in future patches.
* "coords" is used for the complete collection of crd values associated with a single element. In the runtime library this includes both `std::vector` and raw pointer representations. In the compiler, this is used specifically for buffer variables with C++ type `Value`, `TypedValue<MemRefType>`, etc.
The bare form "coords" is discouraged, since it fails to make the dim/lvl distinction; so the compound names "dimCoords/lvlCoords" should be used instead. (Though there may exist a rare few cases where is is appropriate to be intentionally ambiguous about what coordinate-space the coords live in; in which case the bare "coords" is appropriate.)
There is seldom the need for the pos variant of this notion. In most circumstances we use the term "cursor", since the same buffer is reused for a 'moving' pos-collection.
* "dcvs/lcvs" is used in the compiler as the `ValueRange` analogue of "dimCoords/lvlCoords". (The "vs" stands for "`Value`s".) I haven't found the need for it, but "pvs" would be the obvious name for a pos-`ValueRange`.
The old "ind"-vs-"ivs" naming scheme does not seem to have been sustained in more recent code, which instead prefers other mnemonics (e.g., adding "Buf" to the end of the names for `TypeValue<MemRefType>`). I have cleaned up a lot of these to follow the "coords"-vs-"cvs" naming scheme, though haven't done an exhaustive cleanup.
* "positions/coordinates" are used for larger collections of pos/crd values; in particular, these are used when referring to the complete sparse-tensor storage components.
I also prefer to use these unabbreviated names in the documentation, unless there is some specific reason why using the abbreviated forms helps resolve ambiguity.
In addition to making this terminology change, this change also does some cleanup along the way:
* correcting the dim/lvl terminology in certain places.
* adding `const` when it requires no other code changes.
* miscellaneous cleanup that was entailed in order to make the proper distinctions. Most of these are in CodegenUtils.{h,cpp}
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D144773
2023-03-06 12:19:41 -08:00
|
|
|
// Always use `index` for memSize and lvlSize instead of reusing
|
2023-01-10 22:35:49 +00:00
|
|
|
// `getPosWidth` and `getCrdWidth`. It allows us to reuse the same SSA
|
|
|
|
|
// value for different bitwidth, it also avoids casting between index and
|
|
|
|
|
// integer (returned by DimOp)
|
|
|
|
|
0, 0, enc.getDimSlices());
|
2022-12-15 18:28:30 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
StorageSpecifierType
|
|
|
|
|
StorageSpecifierType::get(MLIRContext *ctx, SparseTensorEncodingAttr encoding) {
|
|
|
|
|
return Base::get(ctx, getNormalizedEncodingForSpecifier(encoding));
|
|
|
|
|
}
|
|
|
|
|
|
2022-12-13 19:49:43 +00:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
// SparseTensorDialect Operations.
|
2021-05-10 10:34:21 -07:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
[mlir][sparse] Renaming "pointer/index" to "position/coordinate"
The old "pointer/index" names often cause confusion since these names clash with names of unrelated things in MLIR; so this change rectifies this by changing everything to use "position/coordinate" terminology instead.
In addition to the basic terminology, there have also been various conventions for making certain distinctions like: (1) the overall storage for coordinates in the sparse-tensor, vs the particular collection of coordinates of a given element; and (2) particular coordinates given as a `Value` or `TypedValue<MemRefType>`, vs particular coordinates given as `ValueRange` or similar. I have striven to maintain these distinctions
as follows:
* "p/c" are used for individual position/coordinate values, when there is no risk of confusion. (Just like we use "d/l" to abbreviate "dim/lvl".)
* "pos/crd" are used for individual position/coordinate values, when a longer name is helpful to avoid ambiguity or to form compound names (e.g., "parentPos"). (Just like we use "dim/lvl" when we need a longer form of "d/l".)
I have also used these forms for a handful of compound names where the old name had been using a three-letter form previously, even though a longer form would be more appropriate. I've avoided renaming these to use a longer form purely for expediency sake, since changing them would require a cascade of other renamings. They should be updated to follow the new naming scheme, but that can be done in future patches.
* "coords" is used for the complete collection of crd values associated with a single element. In the runtime library this includes both `std::vector` and raw pointer representations. In the compiler, this is used specifically for buffer variables with C++ type `Value`, `TypedValue<MemRefType>`, etc.
The bare form "coords" is discouraged, since it fails to make the dim/lvl distinction; so the compound names "dimCoords/lvlCoords" should be used instead. (Though there may exist a rare few cases where is is appropriate to be intentionally ambiguous about what coordinate-space the coords live in; in which case the bare "coords" is appropriate.)
There is seldom the need for the pos variant of this notion. In most circumstances we use the term "cursor", since the same buffer is reused for a 'moving' pos-collection.
* "dcvs/lcvs" is used in the compiler as the `ValueRange` analogue of "dimCoords/lvlCoords". (The "vs" stands for "`Value`s".) I haven't found the need for it, but "pvs" would be the obvious name for a pos-`ValueRange`.
The old "ind"-vs-"ivs" naming scheme does not seem to have been sustained in more recent code, which instead prefers other mnemonics (e.g., adding "Buf" to the end of the names for `TypeValue<MemRefType>`). I have cleaned up a lot of these to follow the "coords"-vs-"cvs" naming scheme, though haven't done an exhaustive cleanup.
* "positions/coordinates" are used for larger collections of pos/crd values; in particular, these are used when referring to the complete sparse-tensor storage components.
I also prefer to use these unabbreviated names in the documentation, unless there is some specific reason why using the abbreviated forms helps resolve ambiguity.
In addition to making this terminology change, this change also does some cleanup along the way:
* correcting the dim/lvl terminology in certain places.
* adding `const` when it requires no other code changes.
* miscellaneous cleanup that was entailed in order to make the proper distinctions. Most of these are in CodegenUtils.{h,cpp}
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D144773
2023-03-06 12:19:41 -08:00
|
|
|
static LogicalResult lvlIsInBounds(Level lvl, Value tensor) {
|
|
|
|
|
return success(lvl < getSparseTensorType(tensor).getLvlRank());
|
2021-05-10 10:34:21 -07:00
|
|
|
}
|
|
|
|
|
|
[mlir][sparse] Renaming "pointer/index" to "position/coordinate"
The old "pointer/index" names often cause confusion since these names clash with names of unrelated things in MLIR; so this change rectifies this by changing everything to use "position/coordinate" terminology instead.
In addition to the basic terminology, there have also been various conventions for making certain distinctions like: (1) the overall storage for coordinates in the sparse-tensor, vs the particular collection of coordinates of a given element; and (2) particular coordinates given as a `Value` or `TypedValue<MemRefType>`, vs particular coordinates given as `ValueRange` or similar. I have striven to maintain these distinctions
as follows:
* "p/c" are used for individual position/coordinate values, when there is no risk of confusion. (Just like we use "d/l" to abbreviate "dim/lvl".)
* "pos/crd" are used for individual position/coordinate values, when a longer name is helpful to avoid ambiguity or to form compound names (e.g., "parentPos"). (Just like we use "dim/lvl" when we need a longer form of "d/l".)
I have also used these forms for a handful of compound names where the old name had been using a three-letter form previously, even though a longer form would be more appropriate. I've avoided renaming these to use a longer form purely for expediency sake, since changing them would require a cascade of other renamings. They should be updated to follow the new naming scheme, but that can be done in future patches.
* "coords" is used for the complete collection of crd values associated with a single element. In the runtime library this includes both `std::vector` and raw pointer representations. In the compiler, this is used specifically for buffer variables with C++ type `Value`, `TypedValue<MemRefType>`, etc.
The bare form "coords" is discouraged, since it fails to make the dim/lvl distinction; so the compound names "dimCoords/lvlCoords" should be used instead. (Though there may exist a rare few cases where is is appropriate to be intentionally ambiguous about what coordinate-space the coords live in; in which case the bare "coords" is appropriate.)
There is seldom the need for the pos variant of this notion. In most circumstances we use the term "cursor", since the same buffer is reused for a 'moving' pos-collection.
* "dcvs/lcvs" is used in the compiler as the `ValueRange` analogue of "dimCoords/lvlCoords". (The "vs" stands for "`Value`s".) I haven't found the need for it, but "pvs" would be the obvious name for a pos-`ValueRange`.
The old "ind"-vs-"ivs" naming scheme does not seem to have been sustained in more recent code, which instead prefers other mnemonics (e.g., adding "Buf" to the end of the names for `TypeValue<MemRefType>`). I have cleaned up a lot of these to follow the "coords"-vs-"cvs" naming scheme, though haven't done an exhaustive cleanup.
* "positions/coordinates" are used for larger collections of pos/crd values; in particular, these are used when referring to the complete sparse-tensor storage components.
I also prefer to use these unabbreviated names in the documentation, unless there is some specific reason why using the abbreviated forms helps resolve ambiguity.
In addition to making this terminology change, this change also does some cleanup along the way:
* correcting the dim/lvl terminology in certain places.
* adding `const` when it requires no other code changes.
* miscellaneous cleanup that was entailed in order to make the proper distinctions. Most of these are in CodegenUtils.{h,cpp}
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D144773
2023-03-06 12:19:41 -08:00
|
|
|
static LogicalResult isMatchingWidth(Value mem, unsigned width) {
|
|
|
|
|
const Type etp = getMemRefType(mem).getElementType();
|
2023-01-18 18:22:48 -08:00
|
|
|
return success(width == 0 ? etp.isIndex() : etp.isInteger(width));
|
2021-05-10 10:34:21 -07:00
|
|
|
}
|
|
|
|
|
|
2022-12-14 11:39:19 +01:00
|
|
|
static LogicalResult verifySparsifierGetterSetter(
|
[mlir][sparse] Renaming "pointer/index" to "position/coordinate"
The old "pointer/index" names often cause confusion since these names clash with names of unrelated things in MLIR; so this change rectifies this by changing everything to use "position/coordinate" terminology instead.
In addition to the basic terminology, there have also been various conventions for making certain distinctions like: (1) the overall storage for coordinates in the sparse-tensor, vs the particular collection of coordinates of a given element; and (2) particular coordinates given as a `Value` or `TypedValue<MemRefType>`, vs particular coordinates given as `ValueRange` or similar. I have striven to maintain these distinctions
as follows:
* "p/c" are used for individual position/coordinate values, when there is no risk of confusion. (Just like we use "d/l" to abbreviate "dim/lvl".)
* "pos/crd" are used for individual position/coordinate values, when a longer name is helpful to avoid ambiguity or to form compound names (e.g., "parentPos"). (Just like we use "dim/lvl" when we need a longer form of "d/l".)
I have also used these forms for a handful of compound names where the old name had been using a three-letter form previously, even though a longer form would be more appropriate. I've avoided renaming these to use a longer form purely for expediency sake, since changing them would require a cascade of other renamings. They should be updated to follow the new naming scheme, but that can be done in future patches.
* "coords" is used for the complete collection of crd values associated with a single element. In the runtime library this includes both `std::vector` and raw pointer representations. In the compiler, this is used specifically for buffer variables with C++ type `Value`, `TypedValue<MemRefType>`, etc.
The bare form "coords" is discouraged, since it fails to make the dim/lvl distinction; so the compound names "dimCoords/lvlCoords" should be used instead. (Though there may exist a rare few cases where is is appropriate to be intentionally ambiguous about what coordinate-space the coords live in; in which case the bare "coords" is appropriate.)
There is seldom the need for the pos variant of this notion. In most circumstances we use the term "cursor", since the same buffer is reused for a 'moving' pos-collection.
* "dcvs/lcvs" is used in the compiler as the `ValueRange` analogue of "dimCoords/lvlCoords". (The "vs" stands for "`Value`s".) I haven't found the need for it, but "pvs" would be the obvious name for a pos-`ValueRange`.
The old "ind"-vs-"ivs" naming scheme does not seem to have been sustained in more recent code, which instead prefers other mnemonics (e.g., adding "Buf" to the end of the names for `TypeValue<MemRefType>`). I have cleaned up a lot of these to follow the "coords"-vs-"cvs" naming scheme, though haven't done an exhaustive cleanup.
* "positions/coordinates" are used for larger collections of pos/crd values; in particular, these are used when referring to the complete sparse-tensor storage components.
I also prefer to use these unabbreviated names in the documentation, unless there is some specific reason why using the abbreviated forms helps resolve ambiguity.
In addition to making this terminology change, this change also does some cleanup along the way:
* correcting the dim/lvl terminology in certain places.
* adding `const` when it requires no other code changes.
* miscellaneous cleanup that was entailed in order to make the proper distinctions. Most of these are in CodegenUtils.{h,cpp}
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D144773
2023-03-06 12:19:41 -08:00
|
|
|
StorageSpecifierKind mdKind, std::optional<Level> lvl,
|
2022-12-14 11:39:19 +01:00
|
|
|
TypedValue<StorageSpecifierType> md, Operation *op) {
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
if (mdKind == StorageSpecifierKind::ValMemSize && lvl) {
|
2022-12-13 19:49:43 +00:00
|
|
|
return op->emitError(
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
"redundant level argument for querying value memory size");
|
2022-12-13 19:49:43 +00:00
|
|
|
}
|
|
|
|
|
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
const auto enc = md.getType().getEncoding();
|
|
|
|
|
const Level lvlRank = enc.getLvlRank();
|
2022-12-13 19:49:43 +00:00
|
|
|
|
2023-01-10 22:35:49 +00:00
|
|
|
if (mdKind == StorageSpecifierKind::DimOffset ||
|
|
|
|
|
mdKind == StorageSpecifierKind::DimStride)
|
|
|
|
|
if (!enc.isSlice())
|
|
|
|
|
return op->emitError("requested slice data on non-slice tensor");
|
2023-01-10 23:46:45 +00:00
|
|
|
|
2022-12-13 19:49:43 +00:00
|
|
|
if (mdKind != StorageSpecifierKind::ValMemSize) {
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
if (!lvl)
|
|
|
|
|
return op->emitError("missing level argument");
|
2022-12-13 19:49:43 +00:00
|
|
|
|
[mlir][sparse] Renaming "pointer/index" to "position/coordinate"
The old "pointer/index" names often cause confusion since these names clash with names of unrelated things in MLIR; so this change rectifies this by changing everything to use "position/coordinate" terminology instead.
In addition to the basic terminology, there have also been various conventions for making certain distinctions like: (1) the overall storage for coordinates in the sparse-tensor, vs the particular collection of coordinates of a given element; and (2) particular coordinates given as a `Value` or `TypedValue<MemRefType>`, vs particular coordinates given as `ValueRange` or similar. I have striven to maintain these distinctions
as follows:
* "p/c" are used for individual position/coordinate values, when there is no risk of confusion. (Just like we use "d/l" to abbreviate "dim/lvl".)
* "pos/crd" are used for individual position/coordinate values, when a longer name is helpful to avoid ambiguity or to form compound names (e.g., "parentPos"). (Just like we use "dim/lvl" when we need a longer form of "d/l".)
I have also used these forms for a handful of compound names where the old name had been using a three-letter form previously, even though a longer form would be more appropriate. I've avoided renaming these to use a longer form purely for expediency sake, since changing them would require a cascade of other renamings. They should be updated to follow the new naming scheme, but that can be done in future patches.
* "coords" is used for the complete collection of crd values associated with a single element. In the runtime library this includes both `std::vector` and raw pointer representations. In the compiler, this is used specifically for buffer variables with C++ type `Value`, `TypedValue<MemRefType>`, etc.
The bare form "coords" is discouraged, since it fails to make the dim/lvl distinction; so the compound names "dimCoords/lvlCoords" should be used instead. (Though there may exist a rare few cases where is is appropriate to be intentionally ambiguous about what coordinate-space the coords live in; in which case the bare "coords" is appropriate.)
There is seldom the need for the pos variant of this notion. In most circumstances we use the term "cursor", since the same buffer is reused for a 'moving' pos-collection.
* "dcvs/lcvs" is used in the compiler as the `ValueRange` analogue of "dimCoords/lvlCoords". (The "vs" stands for "`Value`s".) I haven't found the need for it, but "pvs" would be the obvious name for a pos-`ValueRange`.
The old "ind"-vs-"ivs" naming scheme does not seem to have been sustained in more recent code, which instead prefers other mnemonics (e.g., adding "Buf" to the end of the names for `TypeValue<MemRefType>`). I have cleaned up a lot of these to follow the "coords"-vs-"cvs" naming scheme, though haven't done an exhaustive cleanup.
* "positions/coordinates" are used for larger collections of pos/crd values; in particular, these are used when referring to the complete sparse-tensor storage components.
I also prefer to use these unabbreviated names in the documentation, unless there is some specific reason why using the abbreviated forms helps resolve ambiguity.
In addition to making this terminology change, this change also does some cleanup along the way:
* correcting the dim/lvl terminology in certain places.
* adding `const` when it requires no other code changes.
* miscellaneous cleanup that was entailed in order to make the proper distinctions. Most of these are in CodegenUtils.{h,cpp}
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D144773
2023-03-06 12:19:41 -08:00
|
|
|
const Level l = lvl.value();
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
if (l >= lvlRank)
|
[mlir][sparse] Renaming "pointer/index" to "position/coordinate"
The old "pointer/index" names often cause confusion since these names clash with names of unrelated things in MLIR; so this change rectifies this by changing everything to use "position/coordinate" terminology instead.
In addition to the basic terminology, there have also been various conventions for making certain distinctions like: (1) the overall storage for coordinates in the sparse-tensor, vs the particular collection of coordinates of a given element; and (2) particular coordinates given as a `Value` or `TypedValue<MemRefType>`, vs particular coordinates given as `ValueRange` or similar. I have striven to maintain these distinctions
as follows:
* "p/c" are used for individual position/coordinate values, when there is no risk of confusion. (Just like we use "d/l" to abbreviate "dim/lvl".)
* "pos/crd" are used for individual position/coordinate values, when a longer name is helpful to avoid ambiguity or to form compound names (e.g., "parentPos"). (Just like we use "dim/lvl" when we need a longer form of "d/l".)
I have also used these forms for a handful of compound names where the old name had been using a three-letter form previously, even though a longer form would be more appropriate. I've avoided renaming these to use a longer form purely for expediency sake, since changing them would require a cascade of other renamings. They should be updated to follow the new naming scheme, but that can be done in future patches.
* "coords" is used for the complete collection of crd values associated with a single element. In the runtime library this includes both `std::vector` and raw pointer representations. In the compiler, this is used specifically for buffer variables with C++ type `Value`, `TypedValue<MemRefType>`, etc.
The bare form "coords" is discouraged, since it fails to make the dim/lvl distinction; so the compound names "dimCoords/lvlCoords" should be used instead. (Though there may exist a rare few cases where is is appropriate to be intentionally ambiguous about what coordinate-space the coords live in; in which case the bare "coords" is appropriate.)
There is seldom the need for the pos variant of this notion. In most circumstances we use the term "cursor", since the same buffer is reused for a 'moving' pos-collection.
* "dcvs/lcvs" is used in the compiler as the `ValueRange` analogue of "dimCoords/lvlCoords". (The "vs" stands for "`Value`s".) I haven't found the need for it, but "pvs" would be the obvious name for a pos-`ValueRange`.
The old "ind"-vs-"ivs" naming scheme does not seem to have been sustained in more recent code, which instead prefers other mnemonics (e.g., adding "Buf" to the end of the names for `TypeValue<MemRefType>`). I have cleaned up a lot of these to follow the "coords"-vs-"cvs" naming scheme, though haven't done an exhaustive cleanup.
* "positions/coordinates" are used for larger collections of pos/crd values; in particular, these are used when referring to the complete sparse-tensor storage components.
I also prefer to use these unabbreviated names in the documentation, unless there is some specific reason why using the abbreviated forms helps resolve ambiguity.
In addition to making this terminology change, this change also does some cleanup along the way:
* correcting the dim/lvl terminology in certain places.
* adding `const` when it requires no other code changes.
* miscellaneous cleanup that was entailed in order to make the proper distinctions. Most of these are in CodegenUtils.{h,cpp}
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D144773
2023-03-06 12:19:41 -08:00
|
|
|
return op->emitError("requested level is out of bounds");
|
2022-12-13 19:49:43 +00:00
|
|
|
|
[mlir][sparse] Renaming "pointer/index" to "position/coordinate"
The old "pointer/index" names often cause confusion since these names clash with names of unrelated things in MLIR; so this change rectifies this by changing everything to use "position/coordinate" terminology instead.
In addition to the basic terminology, there have also been various conventions for making certain distinctions like: (1) the overall storage for coordinates in the sparse-tensor, vs the particular collection of coordinates of a given element; and (2) particular coordinates given as a `Value` or `TypedValue<MemRefType>`, vs particular coordinates given as `ValueRange` or similar. I have striven to maintain these distinctions
as follows:
* "p/c" are used for individual position/coordinate values, when there is no risk of confusion. (Just like we use "d/l" to abbreviate "dim/lvl".)
* "pos/crd" are used for individual position/coordinate values, when a longer name is helpful to avoid ambiguity or to form compound names (e.g., "parentPos"). (Just like we use "dim/lvl" when we need a longer form of "d/l".)
I have also used these forms for a handful of compound names where the old name had been using a three-letter form previously, even though a longer form would be more appropriate. I've avoided renaming these to use a longer form purely for expediency sake, since changing them would require a cascade of other renamings. They should be updated to follow the new naming scheme, but that can be done in future patches.
* "coords" is used for the complete collection of crd values associated with a single element. In the runtime library this includes both `std::vector` and raw pointer representations. In the compiler, this is used specifically for buffer variables with C++ type `Value`, `TypedValue<MemRefType>`, etc.
The bare form "coords" is discouraged, since it fails to make the dim/lvl distinction; so the compound names "dimCoords/lvlCoords" should be used instead. (Though there may exist a rare few cases where is is appropriate to be intentionally ambiguous about what coordinate-space the coords live in; in which case the bare "coords" is appropriate.)
There is seldom the need for the pos variant of this notion. In most circumstances we use the term "cursor", since the same buffer is reused for a 'moving' pos-collection.
* "dcvs/lcvs" is used in the compiler as the `ValueRange` analogue of "dimCoords/lvlCoords". (The "vs" stands for "`Value`s".) I haven't found the need for it, but "pvs" would be the obvious name for a pos-`ValueRange`.
The old "ind"-vs-"ivs" naming scheme does not seem to have been sustained in more recent code, which instead prefers other mnemonics (e.g., adding "Buf" to the end of the names for `TypeValue<MemRefType>`). I have cleaned up a lot of these to follow the "coords"-vs-"cvs" naming scheme, though haven't done an exhaustive cleanup.
* "positions/coordinates" are used for larger collections of pos/crd values; in particular, these are used when referring to the complete sparse-tensor storage components.
I also prefer to use these unabbreviated names in the documentation, unless there is some specific reason why using the abbreviated forms helps resolve ambiguity.
In addition to making this terminology change, this change also does some cleanup along the way:
* correcting the dim/lvl terminology in certain places.
* adding `const` when it requires no other code changes.
* miscellaneous cleanup that was entailed in order to make the proper distinctions. Most of these are in CodegenUtils.{h,cpp}
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D144773
2023-03-06 12:19:41 -08:00
|
|
|
if (mdKind == StorageSpecifierKind::PosMemSize && enc.isSingletonLvl(l))
|
2022-12-13 19:49:43 +00:00
|
|
|
return op->emitError(
|
[mlir][sparse] Renaming "pointer/index" to "position/coordinate"
The old "pointer/index" names often cause confusion since these names clash with names of unrelated things in MLIR; so this change rectifies this by changing everything to use "position/coordinate" terminology instead.
In addition to the basic terminology, there have also been various conventions for making certain distinctions like: (1) the overall storage for coordinates in the sparse-tensor, vs the particular collection of coordinates of a given element; and (2) particular coordinates given as a `Value` or `TypedValue<MemRefType>`, vs particular coordinates given as `ValueRange` or similar. I have striven to maintain these distinctions
as follows:
* "p/c" are used for individual position/coordinate values, when there is no risk of confusion. (Just like we use "d/l" to abbreviate "dim/lvl".)
* "pos/crd" are used for individual position/coordinate values, when a longer name is helpful to avoid ambiguity or to form compound names (e.g., "parentPos"). (Just like we use "dim/lvl" when we need a longer form of "d/l".)
I have also used these forms for a handful of compound names where the old name had been using a three-letter form previously, even though a longer form would be more appropriate. I've avoided renaming these to use a longer form purely for expediency sake, since changing them would require a cascade of other renamings. They should be updated to follow the new naming scheme, but that can be done in future patches.
* "coords" is used for the complete collection of crd values associated with a single element. In the runtime library this includes both `std::vector` and raw pointer representations. In the compiler, this is used specifically for buffer variables with C++ type `Value`, `TypedValue<MemRefType>`, etc.
The bare form "coords" is discouraged, since it fails to make the dim/lvl distinction; so the compound names "dimCoords/lvlCoords" should be used instead. (Though there may exist a rare few cases where is is appropriate to be intentionally ambiguous about what coordinate-space the coords live in; in which case the bare "coords" is appropriate.)
There is seldom the need for the pos variant of this notion. In most circumstances we use the term "cursor", since the same buffer is reused for a 'moving' pos-collection.
* "dcvs/lcvs" is used in the compiler as the `ValueRange` analogue of "dimCoords/lvlCoords". (The "vs" stands for "`Value`s".) I haven't found the need for it, but "pvs" would be the obvious name for a pos-`ValueRange`.
The old "ind"-vs-"ivs" naming scheme does not seem to have been sustained in more recent code, which instead prefers other mnemonics (e.g., adding "Buf" to the end of the names for `TypeValue<MemRefType>`). I have cleaned up a lot of these to follow the "coords"-vs-"cvs" naming scheme, though haven't done an exhaustive cleanup.
* "positions/coordinates" are used for larger collections of pos/crd values; in particular, these are used when referring to the complete sparse-tensor storage components.
I also prefer to use these unabbreviated names in the documentation, unless there is some specific reason why using the abbreviated forms helps resolve ambiguity.
In addition to making this terminology change, this change also does some cleanup along the way:
* correcting the dim/lvl terminology in certain places.
* adding `const` when it requires no other code changes.
* miscellaneous cleanup that was entailed in order to make the proper distinctions. Most of these are in CodegenUtils.{h,cpp}
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D144773
2023-03-06 12:19:41 -08:00
|
|
|
"requested position memory size on a singleton level");
|
2022-12-13 19:49:43 +00:00
|
|
|
}
|
|
|
|
|
return success();
|
|
|
|
|
}
|
|
|
|
|
|
[mlir][sparse] Renaming "pointer/index" to "position/coordinate"
The old "pointer/index" names often cause confusion since these names clash with names of unrelated things in MLIR; so this change rectifies this by changing everything to use "position/coordinate" terminology instead.
In addition to the basic terminology, there have also been various conventions for making certain distinctions like: (1) the overall storage for coordinates in the sparse-tensor, vs the particular collection of coordinates of a given element; and (2) particular coordinates given as a `Value` or `TypedValue<MemRefType>`, vs particular coordinates given as `ValueRange` or similar. I have striven to maintain these distinctions
as follows:
* "p/c" are used for individual position/coordinate values, when there is no risk of confusion. (Just like we use "d/l" to abbreviate "dim/lvl".)
* "pos/crd" are used for individual position/coordinate values, when a longer name is helpful to avoid ambiguity or to form compound names (e.g., "parentPos"). (Just like we use "dim/lvl" when we need a longer form of "d/l".)
I have also used these forms for a handful of compound names where the old name had been using a three-letter form previously, even though a longer form would be more appropriate. I've avoided renaming these to use a longer form purely for expediency sake, since changing them would require a cascade of other renamings. They should be updated to follow the new naming scheme, but that can be done in future patches.
* "coords" is used for the complete collection of crd values associated with a single element. In the runtime library this includes both `std::vector` and raw pointer representations. In the compiler, this is used specifically for buffer variables with C++ type `Value`, `TypedValue<MemRefType>`, etc.
The bare form "coords" is discouraged, since it fails to make the dim/lvl distinction; so the compound names "dimCoords/lvlCoords" should be used instead. (Though there may exist a rare few cases where is is appropriate to be intentionally ambiguous about what coordinate-space the coords live in; in which case the bare "coords" is appropriate.)
There is seldom the need for the pos variant of this notion. In most circumstances we use the term "cursor", since the same buffer is reused for a 'moving' pos-collection.
* "dcvs/lcvs" is used in the compiler as the `ValueRange` analogue of "dimCoords/lvlCoords". (The "vs" stands for "`Value`s".) I haven't found the need for it, but "pvs" would be the obvious name for a pos-`ValueRange`.
The old "ind"-vs-"ivs" naming scheme does not seem to have been sustained in more recent code, which instead prefers other mnemonics (e.g., adding "Buf" to the end of the names for `TypeValue<MemRefType>`). I have cleaned up a lot of these to follow the "coords"-vs-"cvs" naming scheme, though haven't done an exhaustive cleanup.
* "positions/coordinates" are used for larger collections of pos/crd values; in particular, these are used when referring to the complete sparse-tensor storage components.
I also prefer to use these unabbreviated names in the documentation, unless there is some specific reason why using the abbreviated forms helps resolve ambiguity.
In addition to making this terminology change, this change also does some cleanup along the way:
* correcting the dim/lvl terminology in certain places.
* adding `const` when it requires no other code changes.
* miscellaneous cleanup that was entailed in order to make the proper distinctions. Most of these are in CodegenUtils.{h,cpp}
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D144773
2023-03-06 12:19:41 -08:00
|
|
|
static LogicalResult verifyPackUnPack(Operation *op, bool requiresStaticShape,
|
|
|
|
|
SparseTensorType tensorTp,
|
|
|
|
|
RankedTensorType valuesTp,
|
|
|
|
|
RankedTensorType coordinatesTp) {
|
|
|
|
|
if (requiresStaticShape && !tensorTp.hasStaticDimShape())
|
|
|
|
|
return op->emitError("the sparse-tensor must have static shape");
|
|
|
|
|
if (!tensorTp.hasEncoding())
|
|
|
|
|
return op->emitError("the sparse-tensor must have an encoding attribute");
|
|
|
|
|
if (!tensorTp.isIdentity())
|
|
|
|
|
return op->emitError("the sparse-tensor must have the identity mapping");
|
|
|
|
|
if (!isUniqueCOOType(tensorTp))
|
|
|
|
|
return op->emitError("the sparse-tensor must have a COO type");
|
|
|
|
|
|
|
|
|
|
if (coordinatesTp.getRank() != 2)
|
|
|
|
|
return op->emitError("coordinates must have rank 2");
|
|
|
|
|
if (requiresStaticShape && !coordinatesTp.hasStaticShape())
|
|
|
|
|
return op->emitError("coordinates must have static shape");
|
|
|
|
|
if (coordinatesTp.getElementType() != tensorTp.getCrdType())
|
|
|
|
|
return op->emitError("input/output coordinate-types don't match");
|
|
|
|
|
|
|
|
|
|
if (valuesTp.getRank() != 1)
|
|
|
|
|
return op->emitError("values must have rank 1");
|
|
|
|
|
if (requiresStaticShape && !valuesTp.hasStaticShape())
|
|
|
|
|
return op->emitError("values must have static shape");
|
|
|
|
|
if (valuesTp.getElementType() != tensorTp.getElementType())
|
|
|
|
|
return op->emitError("input/output element-types don't match");
|
|
|
|
|
|
|
|
|
|
const auto valuesNSE = valuesTp.getShape()[0];
|
|
|
|
|
const auto coordsNSE = coordinatesTp.getShape()[0];
|
|
|
|
|
if (!ShapedType::isDynamic(valuesNSE) && !ShapedType::isDynamic(coordsNSE) &&
|
|
|
|
|
valuesNSE != coordsNSE)
|
|
|
|
|
return op->emitError("values/coordinates number-of-elements don't match");
|
|
|
|
|
|
|
|
|
|
// NOTE: We use `getLvlRank` because the `coordinatesTp` is for
|
|
|
|
|
// level-coordinates (cf., the op documentation).
|
2023-03-06 13:46:12 -08:00
|
|
|
const DynSize coordsRank = coordinatesTp.getShape()[1];
|
|
|
|
|
const Level tensorRank = tensorTp.getLvlRank();
|
|
|
|
|
// FIXME: replace the `operator!=` with our backported `safelyNE`.
|
|
|
|
|
if (!ShapedType::isDynamic(coordsRank) &&
|
|
|
|
|
coordsRank != static_cast<DynSize>(tensorRank))
|
[mlir][sparse] Renaming "pointer/index" to "position/coordinate"
The old "pointer/index" names often cause confusion since these names clash with names of unrelated things in MLIR; so this change rectifies this by changing everything to use "position/coordinate" terminology instead.
In addition to the basic terminology, there have also been various conventions for making certain distinctions like: (1) the overall storage for coordinates in the sparse-tensor, vs the particular collection of coordinates of a given element; and (2) particular coordinates given as a `Value` or `TypedValue<MemRefType>`, vs particular coordinates given as `ValueRange` or similar. I have striven to maintain these distinctions
as follows:
* "p/c" are used for individual position/coordinate values, when there is no risk of confusion. (Just like we use "d/l" to abbreviate "dim/lvl".)
* "pos/crd" are used for individual position/coordinate values, when a longer name is helpful to avoid ambiguity or to form compound names (e.g., "parentPos"). (Just like we use "dim/lvl" when we need a longer form of "d/l".)
I have also used these forms for a handful of compound names where the old name had been using a three-letter form previously, even though a longer form would be more appropriate. I've avoided renaming these to use a longer form purely for expediency sake, since changing them would require a cascade of other renamings. They should be updated to follow the new naming scheme, but that can be done in future patches.
* "coords" is used for the complete collection of crd values associated with a single element. In the runtime library this includes both `std::vector` and raw pointer representations. In the compiler, this is used specifically for buffer variables with C++ type `Value`, `TypedValue<MemRefType>`, etc.
The bare form "coords" is discouraged, since it fails to make the dim/lvl distinction; so the compound names "dimCoords/lvlCoords" should be used instead. (Though there may exist a rare few cases where is is appropriate to be intentionally ambiguous about what coordinate-space the coords live in; in which case the bare "coords" is appropriate.)
There is seldom the need for the pos variant of this notion. In most circumstances we use the term "cursor", since the same buffer is reused for a 'moving' pos-collection.
* "dcvs/lcvs" is used in the compiler as the `ValueRange` analogue of "dimCoords/lvlCoords". (The "vs" stands for "`Value`s".) I haven't found the need for it, but "pvs" would be the obvious name for a pos-`ValueRange`.
The old "ind"-vs-"ivs" naming scheme does not seem to have been sustained in more recent code, which instead prefers other mnemonics (e.g., adding "Buf" to the end of the names for `TypeValue<MemRefType>`). I have cleaned up a lot of these to follow the "coords"-vs-"cvs" naming scheme, though haven't done an exhaustive cleanup.
* "positions/coordinates" are used for larger collections of pos/crd values; in particular, these are used when referring to the complete sparse-tensor storage components.
I also prefer to use these unabbreviated names in the documentation, unless there is some specific reason why using the abbreviated forms helps resolve ambiguity.
In addition to making this terminology change, this change also does some cleanup along the way:
* correcting the dim/lvl terminology in certain places.
* adding `const` when it requires no other code changes.
* miscellaneous cleanup that was entailed in order to make the proper distinctions. Most of these are in CodegenUtils.{h,cpp}
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D144773
2023-03-06 12:19:41 -08:00
|
|
|
return op->emitError("input/output level-ranks don't match");
|
2023-02-02 23:34:28 +00:00
|
|
|
|
|
|
|
|
return success();
|
|
|
|
|
}
|
|
|
|
|
|
2023-02-09 19:08:36 +00:00
|
|
|
LogicalResult PackOp::verify() {
|
[mlir][sparse] Renaming "pointer/index" to "position/coordinate"
The old "pointer/index" names often cause confusion since these names clash with names of unrelated things in MLIR; so this change rectifies this by changing everything to use "position/coordinate" terminology instead.
In addition to the basic terminology, there have also been various conventions for making certain distinctions like: (1) the overall storage for coordinates in the sparse-tensor, vs the particular collection of coordinates of a given element; and (2) particular coordinates given as a `Value` or `TypedValue<MemRefType>`, vs particular coordinates given as `ValueRange` or similar. I have striven to maintain these distinctions
as follows:
* "p/c" are used for individual position/coordinate values, when there is no risk of confusion. (Just like we use "d/l" to abbreviate "dim/lvl".)
* "pos/crd" are used for individual position/coordinate values, when a longer name is helpful to avoid ambiguity or to form compound names (e.g., "parentPos"). (Just like we use "dim/lvl" when we need a longer form of "d/l".)
I have also used these forms for a handful of compound names where the old name had been using a three-letter form previously, even though a longer form would be more appropriate. I've avoided renaming these to use a longer form purely for expediency sake, since changing them would require a cascade of other renamings. They should be updated to follow the new naming scheme, but that can be done in future patches.
* "coords" is used for the complete collection of crd values associated with a single element. In the runtime library this includes both `std::vector` and raw pointer representations. In the compiler, this is used specifically for buffer variables with C++ type `Value`, `TypedValue<MemRefType>`, etc.
The bare form "coords" is discouraged, since it fails to make the dim/lvl distinction; so the compound names "dimCoords/lvlCoords" should be used instead. (Though there may exist a rare few cases where is is appropriate to be intentionally ambiguous about what coordinate-space the coords live in; in which case the bare "coords" is appropriate.)
There is seldom the need for the pos variant of this notion. In most circumstances we use the term "cursor", since the same buffer is reused for a 'moving' pos-collection.
* "dcvs/lcvs" is used in the compiler as the `ValueRange` analogue of "dimCoords/lvlCoords". (The "vs" stands for "`Value`s".) I haven't found the need for it, but "pvs" would be the obvious name for a pos-`ValueRange`.
The old "ind"-vs-"ivs" naming scheme does not seem to have been sustained in more recent code, which instead prefers other mnemonics (e.g., adding "Buf" to the end of the names for `TypeValue<MemRefType>`). I have cleaned up a lot of these to follow the "coords"-vs-"cvs" naming scheme, though haven't done an exhaustive cleanup.
* "positions/coordinates" are used for larger collections of pos/crd values; in particular, these are used when referring to the complete sparse-tensor storage components.
I also prefer to use these unabbreviated names in the documentation, unless there is some specific reason why using the abbreviated forms helps resolve ambiguity.
In addition to making this terminology change, this change also does some cleanup along the way:
* correcting the dim/lvl terminology in certain places.
* adding `const` when it requires no other code changes.
* miscellaneous cleanup that was entailed in order to make the proper distinctions. Most of these are in CodegenUtils.{h,cpp}
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D144773
2023-03-06 12:19:41 -08:00
|
|
|
const auto valuesTp = getRankedTensorType(getValues());
|
|
|
|
|
const auto coordinatesTp = getRankedTensorType(getCoordinates());
|
|
|
|
|
const auto resTp = getSparseTensorType(getResult());
|
|
|
|
|
return verifyPackUnPack(*this, true, resTp, valuesTp, coordinatesTp);
|
2023-02-09 19:08:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
LogicalResult UnpackOp::verify() {
|
[mlir][sparse] Renaming "pointer/index" to "position/coordinate"
The old "pointer/index" names often cause confusion since these names clash with names of unrelated things in MLIR; so this change rectifies this by changing everything to use "position/coordinate" terminology instead.
In addition to the basic terminology, there have also been various conventions for making certain distinctions like: (1) the overall storage for coordinates in the sparse-tensor, vs the particular collection of coordinates of a given element; and (2) particular coordinates given as a `Value` or `TypedValue<MemRefType>`, vs particular coordinates given as `ValueRange` or similar. I have striven to maintain these distinctions
as follows:
* "p/c" are used for individual position/coordinate values, when there is no risk of confusion. (Just like we use "d/l" to abbreviate "dim/lvl".)
* "pos/crd" are used for individual position/coordinate values, when a longer name is helpful to avoid ambiguity or to form compound names (e.g., "parentPos"). (Just like we use "dim/lvl" when we need a longer form of "d/l".)
I have also used these forms for a handful of compound names where the old name had been using a three-letter form previously, even though a longer form would be more appropriate. I've avoided renaming these to use a longer form purely for expediency sake, since changing them would require a cascade of other renamings. They should be updated to follow the new naming scheme, but that can be done in future patches.
* "coords" is used for the complete collection of crd values associated with a single element. In the runtime library this includes both `std::vector` and raw pointer representations. In the compiler, this is used specifically for buffer variables with C++ type `Value`, `TypedValue<MemRefType>`, etc.
The bare form "coords" is discouraged, since it fails to make the dim/lvl distinction; so the compound names "dimCoords/lvlCoords" should be used instead. (Though there may exist a rare few cases where is is appropriate to be intentionally ambiguous about what coordinate-space the coords live in; in which case the bare "coords" is appropriate.)
There is seldom the need for the pos variant of this notion. In most circumstances we use the term "cursor", since the same buffer is reused for a 'moving' pos-collection.
* "dcvs/lcvs" is used in the compiler as the `ValueRange` analogue of "dimCoords/lvlCoords". (The "vs" stands for "`Value`s".) I haven't found the need for it, but "pvs" would be the obvious name for a pos-`ValueRange`.
The old "ind"-vs-"ivs" naming scheme does not seem to have been sustained in more recent code, which instead prefers other mnemonics (e.g., adding "Buf" to the end of the names for `TypeValue<MemRefType>`). I have cleaned up a lot of these to follow the "coords"-vs-"cvs" naming scheme, though haven't done an exhaustive cleanup.
* "positions/coordinates" are used for larger collections of pos/crd values; in particular, these are used when referring to the complete sparse-tensor storage components.
I also prefer to use these unabbreviated names in the documentation, unless there is some specific reason why using the abbreviated forms helps resolve ambiguity.
In addition to making this terminology change, this change also does some cleanup along the way:
* correcting the dim/lvl terminology in certain places.
* adding `const` when it requires no other code changes.
* miscellaneous cleanup that was entailed in order to make the proper distinctions. Most of these are in CodegenUtils.{h,cpp}
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D144773
2023-03-06 12:19:41 -08:00
|
|
|
const auto valuesTp = getRankedTensorType(getValues());
|
|
|
|
|
const auto coordinatesTp = getRankedTensorType(getCoordinates());
|
|
|
|
|
const auto srcTp = getSparseTensorType(getTensor());
|
|
|
|
|
return verifyPackUnPack(*this, false, srcTp, valuesTp, coordinatesTp);
|
2023-02-09 19:08:36 +00:00
|
|
|
}
|
|
|
|
|
|
2022-02-02 10:18:06 -08:00
|
|
|
LogicalResult ConvertOp::verify() {
|
2022-06-28 13:11:25 -07:00
|
|
|
if (auto tp1 = getSource().getType().dyn_cast<RankedTensorType>()) {
|
|
|
|
|
if (auto tp2 = getDest().getType().dyn_cast<RankedTensorType>()) {
|
2021-10-26 14:16:05 -07:00
|
|
|
if (tp1.getRank() != tp2.getRank())
|
2022-02-02 10:18:06 -08:00
|
|
|
return emitError("unexpected conversion mismatch in rank");
|
2021-07-30 17:52:39 -07:00
|
|
|
auto shape1 = tp1.getShape();
|
|
|
|
|
auto shape2 = tp2.getShape();
|
2021-10-15 16:10:30 -07:00
|
|
|
// Accept size matches between the source and the destination type
|
|
|
|
|
// (e.g. 10 vs. 10, 10 vs. ?, or ? vs. ?), but reject direct mismatches or
|
|
|
|
|
// matches that would need a runtime assert (e.g. 10 vs. 20 or ? vs. 10).
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
for (Dimension d = 0, dimRank = tp1.getRank(); d < dimRank; d++)
|
2022-11-18 18:00:10 +00:00
|
|
|
if (shape1[d] != shape2[d] && shape2[d] != ShapedType::kDynamic)
|
2022-02-02 10:18:06 -08:00
|
|
|
return emitError("unexpected conversion mismatch in dimension ") << d;
|
2021-07-30 17:52:39 -07:00
|
|
|
return success();
|
|
|
|
|
}
|
|
|
|
|
}
|
2022-02-02 10:18:06 -08:00
|
|
|
return emitError("unexpected type in convert");
|
2021-07-30 17:52:39 -07:00
|
|
|
}
|
|
|
|
|
|
2023-01-10 21:27:18 +01:00
|
|
|
OpFoldResult ConvertOp::fold(FoldAdaptor adaptor) {
|
2022-10-20 12:05:36 -07:00
|
|
|
Type dstType = getType();
|
|
|
|
|
// Fold trivial dense-to-dense convert and leave trivial sparse-to-sparse
|
|
|
|
|
// convert for codegen to remove. This is because we use trivial
|
|
|
|
|
// sparse-to-sparse convert to tell bufferization that the sparse codegen
|
|
|
|
|
// will expand the tensor buffer into sparse tensor storage.
|
|
|
|
|
if (!getSparseTensorEncoding(dstType) && dstType == getSource().getType())
|
|
|
|
|
return getSource();
|
|
|
|
|
return {};
|
|
|
|
|
}
|
|
|
|
|
|
[mlir][sparse] Renaming "pointer/index" to "position/coordinate"
The old "pointer/index" names often cause confusion since these names clash with names of unrelated things in MLIR; so this change rectifies this by changing everything to use "position/coordinate" terminology instead.
In addition to the basic terminology, there have also been various conventions for making certain distinctions like: (1) the overall storage for coordinates in the sparse-tensor, vs the particular collection of coordinates of a given element; and (2) particular coordinates given as a `Value` or `TypedValue<MemRefType>`, vs particular coordinates given as `ValueRange` or similar. I have striven to maintain these distinctions
as follows:
* "p/c" are used for individual position/coordinate values, when there is no risk of confusion. (Just like we use "d/l" to abbreviate "dim/lvl".)
* "pos/crd" are used for individual position/coordinate values, when a longer name is helpful to avoid ambiguity or to form compound names (e.g., "parentPos"). (Just like we use "dim/lvl" when we need a longer form of "d/l".)
I have also used these forms for a handful of compound names where the old name had been using a three-letter form previously, even though a longer form would be more appropriate. I've avoided renaming these to use a longer form purely for expediency sake, since changing them would require a cascade of other renamings. They should be updated to follow the new naming scheme, but that can be done in future patches.
* "coords" is used for the complete collection of crd values associated with a single element. In the runtime library this includes both `std::vector` and raw pointer representations. In the compiler, this is used specifically for buffer variables with C++ type `Value`, `TypedValue<MemRefType>`, etc.
The bare form "coords" is discouraged, since it fails to make the dim/lvl distinction; so the compound names "dimCoords/lvlCoords" should be used instead. (Though there may exist a rare few cases where is is appropriate to be intentionally ambiguous about what coordinate-space the coords live in; in which case the bare "coords" is appropriate.)
There is seldom the need for the pos variant of this notion. In most circumstances we use the term "cursor", since the same buffer is reused for a 'moving' pos-collection.
* "dcvs/lcvs" is used in the compiler as the `ValueRange` analogue of "dimCoords/lvlCoords". (The "vs" stands for "`Value`s".) I haven't found the need for it, but "pvs" would be the obvious name for a pos-`ValueRange`.
The old "ind"-vs-"ivs" naming scheme does not seem to have been sustained in more recent code, which instead prefers other mnemonics (e.g., adding "Buf" to the end of the names for `TypeValue<MemRefType>`). I have cleaned up a lot of these to follow the "coords"-vs-"cvs" naming scheme, though haven't done an exhaustive cleanup.
* "positions/coordinates" are used for larger collections of pos/crd values; in particular, these are used when referring to the complete sparse-tensor storage components.
I also prefer to use these unabbreviated names in the documentation, unless there is some specific reason why using the abbreviated forms helps resolve ambiguity.
In addition to making this terminology change, this change also does some cleanup along the way:
* correcting the dim/lvl terminology in certain places.
* adding `const` when it requires no other code changes.
* miscellaneous cleanup that was entailed in order to make the proper distinctions. Most of these are in CodegenUtils.{h,cpp}
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D144773
2023-03-06 12:19:41 -08:00
|
|
|
LogicalResult ToPositionsOp::verify() {
|
2022-06-28 13:11:25 -07:00
|
|
|
auto e = getSparseTensorEncoding(getTensor().getType());
|
[mlir][sparse] Renaming "pointer/index" to "position/coordinate"
The old "pointer/index" names often cause confusion since these names clash with names of unrelated things in MLIR; so this change rectifies this by changing everything to use "position/coordinate" terminology instead.
In addition to the basic terminology, there have also been various conventions for making certain distinctions like: (1) the overall storage for coordinates in the sparse-tensor, vs the particular collection of coordinates of a given element; and (2) particular coordinates given as a `Value` or `TypedValue<MemRefType>`, vs particular coordinates given as `ValueRange` or similar. I have striven to maintain these distinctions
as follows:
* "p/c" are used for individual position/coordinate values, when there is no risk of confusion. (Just like we use "d/l" to abbreviate "dim/lvl".)
* "pos/crd" are used for individual position/coordinate values, when a longer name is helpful to avoid ambiguity or to form compound names (e.g., "parentPos"). (Just like we use "dim/lvl" when we need a longer form of "d/l".)
I have also used these forms for a handful of compound names where the old name had been using a three-letter form previously, even though a longer form would be more appropriate. I've avoided renaming these to use a longer form purely for expediency sake, since changing them would require a cascade of other renamings. They should be updated to follow the new naming scheme, but that can be done in future patches.
* "coords" is used for the complete collection of crd values associated with a single element. In the runtime library this includes both `std::vector` and raw pointer representations. In the compiler, this is used specifically for buffer variables with C++ type `Value`, `TypedValue<MemRefType>`, etc.
The bare form "coords" is discouraged, since it fails to make the dim/lvl distinction; so the compound names "dimCoords/lvlCoords" should be used instead. (Though there may exist a rare few cases where is is appropriate to be intentionally ambiguous about what coordinate-space the coords live in; in which case the bare "coords" is appropriate.)
There is seldom the need for the pos variant of this notion. In most circumstances we use the term "cursor", since the same buffer is reused for a 'moving' pos-collection.
* "dcvs/lcvs" is used in the compiler as the `ValueRange` analogue of "dimCoords/lvlCoords". (The "vs" stands for "`Value`s".) I haven't found the need for it, but "pvs" would be the obvious name for a pos-`ValueRange`.
The old "ind"-vs-"ivs" naming scheme does not seem to have been sustained in more recent code, which instead prefers other mnemonics (e.g., adding "Buf" to the end of the names for `TypeValue<MemRefType>`). I have cleaned up a lot of these to follow the "coords"-vs-"cvs" naming scheme, though haven't done an exhaustive cleanup.
* "positions/coordinates" are used for larger collections of pos/crd values; in particular, these are used when referring to the complete sparse-tensor storage components.
I also prefer to use these unabbreviated names in the documentation, unless there is some specific reason why using the abbreviated forms helps resolve ambiguity.
In addition to making this terminology change, this change also does some cleanup along the way:
* correcting the dim/lvl terminology in certain places.
* adding `const` when it requires no other code changes.
* miscellaneous cleanup that was entailed in order to make the proper distinctions. Most of these are in CodegenUtils.{h,cpp}
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D144773
2023-03-06 12:19:41 -08:00
|
|
|
if (failed(lvlIsInBounds(getLevel(), getTensor())))
|
|
|
|
|
return emitError("requested level is out of bounds");
|
|
|
|
|
if (failed(isMatchingWidth(getResult(), e.getPosWidth())))
|
|
|
|
|
return emitError("unexpected type for positions");
|
2022-06-03 16:41:02 -07:00
|
|
|
return success();
|
2021-05-10 10:34:21 -07:00
|
|
|
}
|
|
|
|
|
|
[mlir][sparse] Renaming "pointer/index" to "position/coordinate"
The old "pointer/index" names often cause confusion since these names clash with names of unrelated things in MLIR; so this change rectifies this by changing everything to use "position/coordinate" terminology instead.
In addition to the basic terminology, there have also been various conventions for making certain distinctions like: (1) the overall storage for coordinates in the sparse-tensor, vs the particular collection of coordinates of a given element; and (2) particular coordinates given as a `Value` or `TypedValue<MemRefType>`, vs particular coordinates given as `ValueRange` or similar. I have striven to maintain these distinctions
as follows:
* "p/c" are used for individual position/coordinate values, when there is no risk of confusion. (Just like we use "d/l" to abbreviate "dim/lvl".)
* "pos/crd" are used for individual position/coordinate values, when a longer name is helpful to avoid ambiguity or to form compound names (e.g., "parentPos"). (Just like we use "dim/lvl" when we need a longer form of "d/l".)
I have also used these forms for a handful of compound names where the old name had been using a three-letter form previously, even though a longer form would be more appropriate. I've avoided renaming these to use a longer form purely for expediency sake, since changing them would require a cascade of other renamings. They should be updated to follow the new naming scheme, but that can be done in future patches.
* "coords" is used for the complete collection of crd values associated with a single element. In the runtime library this includes both `std::vector` and raw pointer representations. In the compiler, this is used specifically for buffer variables with C++ type `Value`, `TypedValue<MemRefType>`, etc.
The bare form "coords" is discouraged, since it fails to make the dim/lvl distinction; so the compound names "dimCoords/lvlCoords" should be used instead. (Though there may exist a rare few cases where is is appropriate to be intentionally ambiguous about what coordinate-space the coords live in; in which case the bare "coords" is appropriate.)
There is seldom the need for the pos variant of this notion. In most circumstances we use the term "cursor", since the same buffer is reused for a 'moving' pos-collection.
* "dcvs/lcvs" is used in the compiler as the `ValueRange` analogue of "dimCoords/lvlCoords". (The "vs" stands for "`Value`s".) I haven't found the need for it, but "pvs" would be the obvious name for a pos-`ValueRange`.
The old "ind"-vs-"ivs" naming scheme does not seem to have been sustained in more recent code, which instead prefers other mnemonics (e.g., adding "Buf" to the end of the names for `TypeValue<MemRefType>`). I have cleaned up a lot of these to follow the "coords"-vs-"cvs" naming scheme, though haven't done an exhaustive cleanup.
* "positions/coordinates" are used for larger collections of pos/crd values; in particular, these are used when referring to the complete sparse-tensor storage components.
I also prefer to use these unabbreviated names in the documentation, unless there is some specific reason why using the abbreviated forms helps resolve ambiguity.
In addition to making this terminology change, this change also does some cleanup along the way:
* correcting the dim/lvl terminology in certain places.
* adding `const` when it requires no other code changes.
* miscellaneous cleanup that was entailed in order to make the proper distinctions. Most of these are in CodegenUtils.{h,cpp}
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D144773
2023-03-06 12:19:41 -08:00
|
|
|
LogicalResult ToCoordinatesOp::verify() {
|
2022-06-28 13:11:25 -07:00
|
|
|
auto e = getSparseTensorEncoding(getTensor().getType());
|
[mlir][sparse] Renaming "pointer/index" to "position/coordinate"
The old "pointer/index" names often cause confusion since these names clash with names of unrelated things in MLIR; so this change rectifies this by changing everything to use "position/coordinate" terminology instead.
In addition to the basic terminology, there have also been various conventions for making certain distinctions like: (1) the overall storage for coordinates in the sparse-tensor, vs the particular collection of coordinates of a given element; and (2) particular coordinates given as a `Value` or `TypedValue<MemRefType>`, vs particular coordinates given as `ValueRange` or similar. I have striven to maintain these distinctions
as follows:
* "p/c" are used for individual position/coordinate values, when there is no risk of confusion. (Just like we use "d/l" to abbreviate "dim/lvl".)
* "pos/crd" are used for individual position/coordinate values, when a longer name is helpful to avoid ambiguity or to form compound names (e.g., "parentPos"). (Just like we use "dim/lvl" when we need a longer form of "d/l".)
I have also used these forms for a handful of compound names where the old name had been using a three-letter form previously, even though a longer form would be more appropriate. I've avoided renaming these to use a longer form purely for expediency sake, since changing them would require a cascade of other renamings. They should be updated to follow the new naming scheme, but that can be done in future patches.
* "coords" is used for the complete collection of crd values associated with a single element. In the runtime library this includes both `std::vector` and raw pointer representations. In the compiler, this is used specifically for buffer variables with C++ type `Value`, `TypedValue<MemRefType>`, etc.
The bare form "coords" is discouraged, since it fails to make the dim/lvl distinction; so the compound names "dimCoords/lvlCoords" should be used instead. (Though there may exist a rare few cases where is is appropriate to be intentionally ambiguous about what coordinate-space the coords live in; in which case the bare "coords" is appropriate.)
There is seldom the need for the pos variant of this notion. In most circumstances we use the term "cursor", since the same buffer is reused for a 'moving' pos-collection.
* "dcvs/lcvs" is used in the compiler as the `ValueRange` analogue of "dimCoords/lvlCoords". (The "vs" stands for "`Value`s".) I haven't found the need for it, but "pvs" would be the obvious name for a pos-`ValueRange`.
The old "ind"-vs-"ivs" naming scheme does not seem to have been sustained in more recent code, which instead prefers other mnemonics (e.g., adding "Buf" to the end of the names for `TypeValue<MemRefType>`). I have cleaned up a lot of these to follow the "coords"-vs-"cvs" naming scheme, though haven't done an exhaustive cleanup.
* "positions/coordinates" are used for larger collections of pos/crd values; in particular, these are used when referring to the complete sparse-tensor storage components.
I also prefer to use these unabbreviated names in the documentation, unless there is some specific reason why using the abbreviated forms helps resolve ambiguity.
In addition to making this terminology change, this change also does some cleanup along the way:
* correcting the dim/lvl terminology in certain places.
* adding `const` when it requires no other code changes.
* miscellaneous cleanup that was entailed in order to make the proper distinctions. Most of these are in CodegenUtils.{h,cpp}
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D144773
2023-03-06 12:19:41 -08:00
|
|
|
if (failed(lvlIsInBounds(getLevel(), getTensor())))
|
|
|
|
|
return emitError("requested level is out of bounds");
|
|
|
|
|
if (failed(isMatchingWidth(getResult(), e.getCrdWidth())))
|
|
|
|
|
return emitError("unexpected type for coordinates");
|
2022-06-03 16:41:02 -07:00
|
|
|
return success();
|
2021-05-10 10:34:21 -07:00
|
|
|
}
|
|
|
|
|
|
[mlir][sparse] Renaming "pointer/index" to "position/coordinate"
The old "pointer/index" names often cause confusion since these names clash with names of unrelated things in MLIR; so this change rectifies this by changing everything to use "position/coordinate" terminology instead.
In addition to the basic terminology, there have also been various conventions for making certain distinctions like: (1) the overall storage for coordinates in the sparse-tensor, vs the particular collection of coordinates of a given element; and (2) particular coordinates given as a `Value` or `TypedValue<MemRefType>`, vs particular coordinates given as `ValueRange` or similar. I have striven to maintain these distinctions
as follows:
* "p/c" are used for individual position/coordinate values, when there is no risk of confusion. (Just like we use "d/l" to abbreviate "dim/lvl".)
* "pos/crd" are used for individual position/coordinate values, when a longer name is helpful to avoid ambiguity or to form compound names (e.g., "parentPos"). (Just like we use "dim/lvl" when we need a longer form of "d/l".)
I have also used these forms for a handful of compound names where the old name had been using a three-letter form previously, even though a longer form would be more appropriate. I've avoided renaming these to use a longer form purely for expediency sake, since changing them would require a cascade of other renamings. They should be updated to follow the new naming scheme, but that can be done in future patches.
* "coords" is used for the complete collection of crd values associated with a single element. In the runtime library this includes both `std::vector` and raw pointer representations. In the compiler, this is used specifically for buffer variables with C++ type `Value`, `TypedValue<MemRefType>`, etc.
The bare form "coords" is discouraged, since it fails to make the dim/lvl distinction; so the compound names "dimCoords/lvlCoords" should be used instead. (Though there may exist a rare few cases where is is appropriate to be intentionally ambiguous about what coordinate-space the coords live in; in which case the bare "coords" is appropriate.)
There is seldom the need for the pos variant of this notion. In most circumstances we use the term "cursor", since the same buffer is reused for a 'moving' pos-collection.
* "dcvs/lcvs" is used in the compiler as the `ValueRange` analogue of "dimCoords/lvlCoords". (The "vs" stands for "`Value`s".) I haven't found the need for it, but "pvs" would be the obvious name for a pos-`ValueRange`.
The old "ind"-vs-"ivs" naming scheme does not seem to have been sustained in more recent code, which instead prefers other mnemonics (e.g., adding "Buf" to the end of the names for `TypeValue<MemRefType>`). I have cleaned up a lot of these to follow the "coords"-vs-"cvs" naming scheme, though haven't done an exhaustive cleanup.
* "positions/coordinates" are used for larger collections of pos/crd values; in particular, these are used when referring to the complete sparse-tensor storage components.
I also prefer to use these unabbreviated names in the documentation, unless there is some specific reason why using the abbreviated forms helps resolve ambiguity.
In addition to making this terminology change, this change also does some cleanup along the way:
* correcting the dim/lvl terminology in certain places.
* adding `const` when it requires no other code changes.
* miscellaneous cleanup that was entailed in order to make the proper distinctions. Most of these are in CodegenUtils.{h,cpp}
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D144773
2023-03-06 12:19:41 -08:00
|
|
|
LogicalResult ToCoordinatesBufferOp::verify() {
|
2023-01-04 16:01:35 -08:00
|
|
|
auto e = getSparseTensorEncoding(getTensor().getType());
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
if (getCOOStart(e) >= e.getLvlRank())
|
2023-01-04 16:01:35 -08:00
|
|
|
return emitError("expected sparse tensor with a COO region");
|
|
|
|
|
return success();
|
|
|
|
|
}
|
|
|
|
|
|
2022-02-02 10:18:06 -08:00
|
|
|
LogicalResult ToValuesOp::verify() {
|
2023-01-24 13:23:52 -08:00
|
|
|
auto ttp = getRankedTensorType(getTensor());
|
|
|
|
|
auto mtp = getMemRefType(getResult());
|
2021-05-10 10:34:21 -07:00
|
|
|
if (ttp.getElementType() != mtp.getElementType())
|
2022-02-02 10:18:06 -08:00
|
|
|
return emitError("unexpected mismatch in element types");
|
2021-05-10 10:34:21 -07:00
|
|
|
return success();
|
|
|
|
|
}
|
|
|
|
|
|
2023-01-10 23:46:45 +00:00
|
|
|
LogicalResult ToSliceOffsetOp::verify() {
|
|
|
|
|
auto rank = getRankedTensorType(getSlice()).getRank();
|
|
|
|
|
if (rank <= getDim().getSExtValue() || getDim().getSExtValue() < 0)
|
|
|
|
|
return emitError("requested dimension out of bound");
|
|
|
|
|
return success();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
LogicalResult ToSliceStrideOp::verify() {
|
|
|
|
|
auto rank = getRankedTensorType(getSlice()).getRank();
|
|
|
|
|
if (rank <= getDim().getSExtValue() || getDim().getSExtValue() < 0)
|
|
|
|
|
return emitError("requested dimension out of bound");
|
|
|
|
|
return success();
|
|
|
|
|
}
|
|
|
|
|
|
2022-12-13 19:49:43 +00:00
|
|
|
LogicalResult GetStorageSpecifierOp::verify() {
|
2023-01-18 18:22:48 -08:00
|
|
|
RETURN_FAILURE_IF_FAILED(verifySparsifierGetterSetter(
|
[mlir][sparse] Renaming "pointer/index" to "position/coordinate"
The old "pointer/index" names often cause confusion since these names clash with names of unrelated things in MLIR; so this change rectifies this by changing everything to use "position/coordinate" terminology instead.
In addition to the basic terminology, there have also been various conventions for making certain distinctions like: (1) the overall storage for coordinates in the sparse-tensor, vs the particular collection of coordinates of a given element; and (2) particular coordinates given as a `Value` or `TypedValue<MemRefType>`, vs particular coordinates given as `ValueRange` or similar. I have striven to maintain these distinctions
as follows:
* "p/c" are used for individual position/coordinate values, when there is no risk of confusion. (Just like we use "d/l" to abbreviate "dim/lvl".)
* "pos/crd" are used for individual position/coordinate values, when a longer name is helpful to avoid ambiguity or to form compound names (e.g., "parentPos"). (Just like we use "dim/lvl" when we need a longer form of "d/l".)
I have also used these forms for a handful of compound names where the old name had been using a three-letter form previously, even though a longer form would be more appropriate. I've avoided renaming these to use a longer form purely for expediency sake, since changing them would require a cascade of other renamings. They should be updated to follow the new naming scheme, but that can be done in future patches.
* "coords" is used for the complete collection of crd values associated with a single element. In the runtime library this includes both `std::vector` and raw pointer representations. In the compiler, this is used specifically for buffer variables with C++ type `Value`, `TypedValue<MemRefType>`, etc.
The bare form "coords" is discouraged, since it fails to make the dim/lvl distinction; so the compound names "dimCoords/lvlCoords" should be used instead. (Though there may exist a rare few cases where is is appropriate to be intentionally ambiguous about what coordinate-space the coords live in; in which case the bare "coords" is appropriate.)
There is seldom the need for the pos variant of this notion. In most circumstances we use the term "cursor", since the same buffer is reused for a 'moving' pos-collection.
* "dcvs/lcvs" is used in the compiler as the `ValueRange` analogue of "dimCoords/lvlCoords". (The "vs" stands for "`Value`s".) I haven't found the need for it, but "pvs" would be the obvious name for a pos-`ValueRange`.
The old "ind"-vs-"ivs" naming scheme does not seem to have been sustained in more recent code, which instead prefers other mnemonics (e.g., adding "Buf" to the end of the names for `TypeValue<MemRefType>`). I have cleaned up a lot of these to follow the "coords"-vs-"cvs" naming scheme, though haven't done an exhaustive cleanup.
* "positions/coordinates" are used for larger collections of pos/crd values; in particular, these are used when referring to the complete sparse-tensor storage components.
I also prefer to use these unabbreviated names in the documentation, unless there is some specific reason why using the abbreviated forms helps resolve ambiguity.
In addition to making this terminology change, this change also does some cleanup along the way:
* correcting the dim/lvl terminology in certain places.
* adding `const` when it requires no other code changes.
* miscellaneous cleanup that was entailed in order to make the proper distinctions. Most of these are in CodegenUtils.{h,cpp}
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D144773
2023-03-06 12:19:41 -08:00
|
|
|
getSpecifierKind(), getLevel(), getSpecifier(), getOperation()))
|
2022-12-13 19:49:43 +00:00
|
|
|
return success();
|
|
|
|
|
}
|
|
|
|
|
|
2022-12-15 23:04:17 +00:00
|
|
|
template <typename SpecifierOp>
|
|
|
|
|
static SetStorageSpecifierOp getSpecifierSetDef(SpecifierOp op) {
|
|
|
|
|
return op.getSpecifier().template getDefiningOp<SetStorageSpecifierOp>();
|
|
|
|
|
}
|
|
|
|
|
|
2023-01-10 21:27:18 +01:00
|
|
|
OpFoldResult GetStorageSpecifierOp::fold(FoldAdaptor adaptor) {
|
[mlir][sparse] Renaming "pointer/index" to "position/coordinate"
The old "pointer/index" names often cause confusion since these names clash with names of unrelated things in MLIR; so this change rectifies this by changing everything to use "position/coordinate" terminology instead.
In addition to the basic terminology, there have also been various conventions for making certain distinctions like: (1) the overall storage for coordinates in the sparse-tensor, vs the particular collection of coordinates of a given element; and (2) particular coordinates given as a `Value` or `TypedValue<MemRefType>`, vs particular coordinates given as `ValueRange` or similar. I have striven to maintain these distinctions
as follows:
* "p/c" are used for individual position/coordinate values, when there is no risk of confusion. (Just like we use "d/l" to abbreviate "dim/lvl".)
* "pos/crd" are used for individual position/coordinate values, when a longer name is helpful to avoid ambiguity or to form compound names (e.g., "parentPos"). (Just like we use "dim/lvl" when we need a longer form of "d/l".)
I have also used these forms for a handful of compound names where the old name had been using a three-letter form previously, even though a longer form would be more appropriate. I've avoided renaming these to use a longer form purely for expediency sake, since changing them would require a cascade of other renamings. They should be updated to follow the new naming scheme, but that can be done in future patches.
* "coords" is used for the complete collection of crd values associated with a single element. In the runtime library this includes both `std::vector` and raw pointer representations. In the compiler, this is used specifically for buffer variables with C++ type `Value`, `TypedValue<MemRefType>`, etc.
The bare form "coords" is discouraged, since it fails to make the dim/lvl distinction; so the compound names "dimCoords/lvlCoords" should be used instead. (Though there may exist a rare few cases where is is appropriate to be intentionally ambiguous about what coordinate-space the coords live in; in which case the bare "coords" is appropriate.)
There is seldom the need for the pos variant of this notion. In most circumstances we use the term "cursor", since the same buffer is reused for a 'moving' pos-collection.
* "dcvs/lcvs" is used in the compiler as the `ValueRange` analogue of "dimCoords/lvlCoords". (The "vs" stands for "`Value`s".) I haven't found the need for it, but "pvs" would be the obvious name for a pos-`ValueRange`.
The old "ind"-vs-"ivs" naming scheme does not seem to have been sustained in more recent code, which instead prefers other mnemonics (e.g., adding "Buf" to the end of the names for `TypeValue<MemRefType>`). I have cleaned up a lot of these to follow the "coords"-vs-"cvs" naming scheme, though haven't done an exhaustive cleanup.
* "positions/coordinates" are used for larger collections of pos/crd values; in particular, these are used when referring to the complete sparse-tensor storage components.
I also prefer to use these unabbreviated names in the documentation, unless there is some specific reason why using the abbreviated forms helps resolve ambiguity.
In addition to making this terminology change, this change also does some cleanup along the way:
* correcting the dim/lvl terminology in certain places.
* adding `const` when it requires no other code changes.
* miscellaneous cleanup that was entailed in order to make the proper distinctions. Most of these are in CodegenUtils.{h,cpp}
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D144773
2023-03-06 12:19:41 -08:00
|
|
|
const StorageSpecifierKind kind = getSpecifierKind();
|
|
|
|
|
const auto lvl = getLevel();
|
2022-12-15 23:04:17 +00:00
|
|
|
for (auto op = getSpecifierSetDef(*this); op; op = getSpecifierSetDef(op))
|
[mlir][sparse] Renaming "pointer/index" to "position/coordinate"
The old "pointer/index" names often cause confusion since these names clash with names of unrelated things in MLIR; so this change rectifies this by changing everything to use "position/coordinate" terminology instead.
In addition to the basic terminology, there have also been various conventions for making certain distinctions like: (1) the overall storage for coordinates in the sparse-tensor, vs the particular collection of coordinates of a given element; and (2) particular coordinates given as a `Value` or `TypedValue<MemRefType>`, vs particular coordinates given as `ValueRange` or similar. I have striven to maintain these distinctions
as follows:
* "p/c" are used for individual position/coordinate values, when there is no risk of confusion. (Just like we use "d/l" to abbreviate "dim/lvl".)
* "pos/crd" are used for individual position/coordinate values, when a longer name is helpful to avoid ambiguity or to form compound names (e.g., "parentPos"). (Just like we use "dim/lvl" when we need a longer form of "d/l".)
I have also used these forms for a handful of compound names where the old name had been using a three-letter form previously, even though a longer form would be more appropriate. I've avoided renaming these to use a longer form purely for expediency sake, since changing them would require a cascade of other renamings. They should be updated to follow the new naming scheme, but that can be done in future patches.
* "coords" is used for the complete collection of crd values associated with a single element. In the runtime library this includes both `std::vector` and raw pointer representations. In the compiler, this is used specifically for buffer variables with C++ type `Value`, `TypedValue<MemRefType>`, etc.
The bare form "coords" is discouraged, since it fails to make the dim/lvl distinction; so the compound names "dimCoords/lvlCoords" should be used instead. (Though there may exist a rare few cases where is is appropriate to be intentionally ambiguous about what coordinate-space the coords live in; in which case the bare "coords" is appropriate.)
There is seldom the need for the pos variant of this notion. In most circumstances we use the term "cursor", since the same buffer is reused for a 'moving' pos-collection.
* "dcvs/lcvs" is used in the compiler as the `ValueRange` analogue of "dimCoords/lvlCoords". (The "vs" stands for "`Value`s".) I haven't found the need for it, but "pvs" would be the obvious name for a pos-`ValueRange`.
The old "ind"-vs-"ivs" naming scheme does not seem to have been sustained in more recent code, which instead prefers other mnemonics (e.g., adding "Buf" to the end of the names for `TypeValue<MemRefType>`). I have cleaned up a lot of these to follow the "coords"-vs-"cvs" naming scheme, though haven't done an exhaustive cleanup.
* "positions/coordinates" are used for larger collections of pos/crd values; in particular, these are used when referring to the complete sparse-tensor storage components.
I also prefer to use these unabbreviated names in the documentation, unless there is some specific reason why using the abbreviated forms helps resolve ambiguity.
In addition to making this terminology change, this change also does some cleanup along the way:
* correcting the dim/lvl terminology in certain places.
* adding `const` when it requires no other code changes.
* miscellaneous cleanup that was entailed in order to make the proper distinctions. Most of these are in CodegenUtils.{h,cpp}
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D144773
2023-03-06 12:19:41 -08:00
|
|
|
if (kind == op.getSpecifierKind() && lvl == op.getLevel())
|
2022-12-15 23:04:17 +00:00
|
|
|
return op.getValue();
|
|
|
|
|
return {};
|
|
|
|
|
}
|
|
|
|
|
|
2022-12-13 19:49:43 +00:00
|
|
|
LogicalResult SetStorageSpecifierOp::verify() {
|
2023-01-18 18:22:48 -08:00
|
|
|
RETURN_FAILURE_IF_FAILED(verifySparsifierGetterSetter(
|
[mlir][sparse] Renaming "pointer/index" to "position/coordinate"
The old "pointer/index" names often cause confusion since these names clash with names of unrelated things in MLIR; so this change rectifies this by changing everything to use "position/coordinate" terminology instead.
In addition to the basic terminology, there have also been various conventions for making certain distinctions like: (1) the overall storage for coordinates in the sparse-tensor, vs the particular collection of coordinates of a given element; and (2) particular coordinates given as a `Value` or `TypedValue<MemRefType>`, vs particular coordinates given as `ValueRange` or similar. I have striven to maintain these distinctions
as follows:
* "p/c" are used for individual position/coordinate values, when there is no risk of confusion. (Just like we use "d/l" to abbreviate "dim/lvl".)
* "pos/crd" are used for individual position/coordinate values, when a longer name is helpful to avoid ambiguity or to form compound names (e.g., "parentPos"). (Just like we use "dim/lvl" when we need a longer form of "d/l".)
I have also used these forms for a handful of compound names where the old name had been using a three-letter form previously, even though a longer form would be more appropriate. I've avoided renaming these to use a longer form purely for expediency sake, since changing them would require a cascade of other renamings. They should be updated to follow the new naming scheme, but that can be done in future patches.
* "coords" is used for the complete collection of crd values associated with a single element. In the runtime library this includes both `std::vector` and raw pointer representations. In the compiler, this is used specifically for buffer variables with C++ type `Value`, `TypedValue<MemRefType>`, etc.
The bare form "coords" is discouraged, since it fails to make the dim/lvl distinction; so the compound names "dimCoords/lvlCoords" should be used instead. (Though there may exist a rare few cases where is is appropriate to be intentionally ambiguous about what coordinate-space the coords live in; in which case the bare "coords" is appropriate.)
There is seldom the need for the pos variant of this notion. In most circumstances we use the term "cursor", since the same buffer is reused for a 'moving' pos-collection.
* "dcvs/lcvs" is used in the compiler as the `ValueRange` analogue of "dimCoords/lvlCoords". (The "vs" stands for "`Value`s".) I haven't found the need for it, but "pvs" would be the obvious name for a pos-`ValueRange`.
The old "ind"-vs-"ivs" naming scheme does not seem to have been sustained in more recent code, which instead prefers other mnemonics (e.g., adding "Buf" to the end of the names for `TypeValue<MemRefType>`). I have cleaned up a lot of these to follow the "coords"-vs-"cvs" naming scheme, though haven't done an exhaustive cleanup.
* "positions/coordinates" are used for larger collections of pos/crd values; in particular, these are used when referring to the complete sparse-tensor storage components.
I also prefer to use these unabbreviated names in the documentation, unless there is some specific reason why using the abbreviated forms helps resolve ambiguity.
In addition to making this terminology change, this change also does some cleanup along the way:
* correcting the dim/lvl terminology in certain places.
* adding `const` when it requires no other code changes.
* miscellaneous cleanup that was entailed in order to make the proper distinctions. Most of these are in CodegenUtils.{h,cpp}
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D144773
2023-03-06 12:19:41 -08:00
|
|
|
getSpecifierKind(), getLevel(), getSpecifier(), getOperation()))
|
2022-12-13 19:49:43 +00:00
|
|
|
return success();
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-17 12:16:29 -05:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
// TensorDialect Linalg.Generic Operations.
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
|
|
template <class T>
|
|
|
|
|
static LogicalResult verifyNumBlockArgs(T *op, Region ®ion,
|
|
|
|
|
const char *regionName,
|
|
|
|
|
TypeRange inputTypes, Type outputType) {
|
|
|
|
|
unsigned numArgs = region.getNumArguments();
|
|
|
|
|
unsigned expectedNum = inputTypes.size();
|
|
|
|
|
if (numArgs != expectedNum)
|
|
|
|
|
return op->emitError() << regionName << " region must have exactly "
|
|
|
|
|
<< expectedNum << " arguments";
|
|
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < numArgs; i++) {
|
|
|
|
|
Type typ = region.getArgument(i).getType();
|
|
|
|
|
if (typ != inputTypes[i])
|
|
|
|
|
return op->emitError() << regionName << " region argument " << (i + 1)
|
|
|
|
|
<< " type mismatch";
|
|
|
|
|
}
|
|
|
|
|
Operation *term = region.front().getTerminator();
|
|
|
|
|
YieldOp yield = dyn_cast<YieldOp>(term);
|
|
|
|
|
if (!yield)
|
|
|
|
|
return op->emitError() << regionName
|
|
|
|
|
<< " region must end with sparse_tensor.yield";
|
2022-09-22 21:53:48 +00:00
|
|
|
if (!yield.getResult() || yield.getResult().getType() != outputType)
|
2022-03-17 12:16:29 -05:00
|
|
|
return op->emitError() << regionName << " region yield type mismatch";
|
|
|
|
|
|
|
|
|
|
return success();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
LogicalResult BinaryOp::verify() {
|
|
|
|
|
NamedAttrList attrs = (*this)->getAttrs();
|
2022-06-28 13:11:25 -07:00
|
|
|
Type leftType = getX().getType();
|
|
|
|
|
Type rightType = getY().getType();
|
|
|
|
|
Type outputType = getOutput().getType();
|
|
|
|
|
Region &overlap = getOverlapRegion();
|
|
|
|
|
Region &left = getLeftRegion();
|
|
|
|
|
Region &right = getRightRegion();
|
2022-03-17 12:16:29 -05:00
|
|
|
|
|
|
|
|
// Check correct number of block arguments and return type for each
|
|
|
|
|
// non-empty region.
|
|
|
|
|
if (!overlap.empty()) {
|
2023-01-18 18:22:48 -08:00
|
|
|
RETURN_FAILURE_IF_FAILED(verifyNumBlockArgs(
|
|
|
|
|
this, overlap, "overlap", TypeRange{leftType, rightType}, outputType))
|
2022-03-17 12:16:29 -05:00
|
|
|
}
|
|
|
|
|
if (!left.empty()) {
|
2023-01-18 18:22:48 -08:00
|
|
|
RETURN_FAILURE_IF_FAILED(
|
|
|
|
|
verifyNumBlockArgs(this, left, "left", TypeRange{leftType}, outputType))
|
2022-06-28 13:11:25 -07:00
|
|
|
} else if (getLeftIdentity()) {
|
2022-03-17 12:16:29 -05:00
|
|
|
if (leftType != outputType)
|
|
|
|
|
return emitError("left=identity requires first argument to have the same "
|
|
|
|
|
"type as the output");
|
|
|
|
|
}
|
|
|
|
|
if (!right.empty()) {
|
2023-01-18 18:22:48 -08:00
|
|
|
RETURN_FAILURE_IF_FAILED(verifyNumBlockArgs(
|
|
|
|
|
this, right, "right", TypeRange{rightType}, outputType))
|
2022-06-28 13:11:25 -07:00
|
|
|
} else if (getRightIdentity()) {
|
2022-03-17 12:16:29 -05:00
|
|
|
if (rightType != outputType)
|
|
|
|
|
return emitError("right=identity requires second argument to have the "
|
|
|
|
|
"same type as the output");
|
|
|
|
|
}
|
|
|
|
|
return success();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
LogicalResult UnaryOp::verify() {
|
2022-06-28 13:11:25 -07:00
|
|
|
Type inputType = getX().getType();
|
|
|
|
|
Type outputType = getOutput().getType();
|
2022-03-17 12:16:29 -05:00
|
|
|
|
|
|
|
|
// Check correct number of block arguments and return type for each
|
|
|
|
|
// non-empty region.
|
2022-06-28 13:11:25 -07:00
|
|
|
Region &present = getPresentRegion();
|
2022-03-17 12:16:29 -05:00
|
|
|
if (!present.empty()) {
|
2023-01-18 18:22:48 -08:00
|
|
|
RETURN_FAILURE_IF_FAILED(verifyNumBlockArgs(
|
|
|
|
|
this, present, "present", TypeRange{inputType}, outputType))
|
2022-03-17 12:16:29 -05:00
|
|
|
}
|
2022-06-28 13:11:25 -07:00
|
|
|
Region &absent = getAbsentRegion();
|
2022-03-17 12:16:29 -05:00
|
|
|
if (!absent.empty()) {
|
2023-01-18 18:22:48 -08:00
|
|
|
RETURN_FAILURE_IF_FAILED(
|
|
|
|
|
verifyNumBlockArgs(this, absent, "absent", TypeRange{}, outputType))
|
2022-03-17 12:16:29 -05:00
|
|
|
}
|
|
|
|
|
return success();
|
|
|
|
|
}
|
|
|
|
|
|
2022-08-03 19:23:42 +00:00
|
|
|
LogicalResult ConcatenateOp::verify() {
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
const auto dstTp = getSparseTensorType(*this);
|
[mlir][sparse] Renaming "pointer/index" to "position/coordinate"
The old "pointer/index" names often cause confusion since these names clash with names of unrelated things in MLIR; so this change rectifies this by changing everything to use "position/coordinate" terminology instead.
In addition to the basic terminology, there have also been various conventions for making certain distinctions like: (1) the overall storage for coordinates in the sparse-tensor, vs the particular collection of coordinates of a given element; and (2) particular coordinates given as a `Value` or `TypedValue<MemRefType>`, vs particular coordinates given as `ValueRange` or similar. I have striven to maintain these distinctions
as follows:
* "p/c" are used for individual position/coordinate values, when there is no risk of confusion. (Just like we use "d/l" to abbreviate "dim/lvl".)
* "pos/crd" are used for individual position/coordinate values, when a longer name is helpful to avoid ambiguity or to form compound names (e.g., "parentPos"). (Just like we use "dim/lvl" when we need a longer form of "d/l".)
I have also used these forms for a handful of compound names where the old name had been using a three-letter form previously, even though a longer form would be more appropriate. I've avoided renaming these to use a longer form purely for expediency sake, since changing them would require a cascade of other renamings. They should be updated to follow the new naming scheme, but that can be done in future patches.
* "coords" is used for the complete collection of crd values associated with a single element. In the runtime library this includes both `std::vector` and raw pointer representations. In the compiler, this is used specifically for buffer variables with C++ type `Value`, `TypedValue<MemRefType>`, etc.
The bare form "coords" is discouraged, since it fails to make the dim/lvl distinction; so the compound names "dimCoords/lvlCoords" should be used instead. (Though there may exist a rare few cases where is is appropriate to be intentionally ambiguous about what coordinate-space the coords live in; in which case the bare "coords" is appropriate.)
There is seldom the need for the pos variant of this notion. In most circumstances we use the term "cursor", since the same buffer is reused for a 'moving' pos-collection.
* "dcvs/lcvs" is used in the compiler as the `ValueRange` analogue of "dimCoords/lvlCoords". (The "vs" stands for "`Value`s".) I haven't found the need for it, but "pvs" would be the obvious name for a pos-`ValueRange`.
The old "ind"-vs-"ivs" naming scheme does not seem to have been sustained in more recent code, which instead prefers other mnemonics (e.g., adding "Buf" to the end of the names for `TypeValue<MemRefType>`). I have cleaned up a lot of these to follow the "coords"-vs-"cvs" naming scheme, though haven't done an exhaustive cleanup.
* "positions/coordinates" are used for larger collections of pos/crd values; in particular, these are used when referring to the complete sparse-tensor storage components.
I also prefer to use these unabbreviated names in the documentation, unless there is some specific reason why using the abbreviated forms helps resolve ambiguity.
In addition to making this terminology change, this change also does some cleanup along the way:
* correcting the dim/lvl terminology in certain places.
* adding `const` when it requires no other code changes.
* miscellaneous cleanup that was entailed in order to make the proper distinctions. Most of these are in CodegenUtils.{h,cpp}
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D144773
2023-03-06 12:19:41 -08:00
|
|
|
const Dimension concatDim = getDimension();
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
const Dimension dimRank = dstTp.getDimRank();
|
2022-08-03 19:23:42 +00:00
|
|
|
|
|
|
|
|
if (getInputs().size() <= 1)
|
|
|
|
|
return emitError("Need at least two tensors to concatenate.");
|
|
|
|
|
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
if (concatDim >= dimRank)
|
2022-08-03 19:23:42 +00:00
|
|
|
return emitError(llvm::formatv(
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
"Concat-dimension is out of bounds for dimension-rank ({0} >= {1})",
|
|
|
|
|
concatDim, dimRank));
|
|
|
|
|
|
|
|
|
|
for (const auto &it : llvm::enumerate(getInputs())) {
|
|
|
|
|
const auto i = it.index();
|
|
|
|
|
const auto srcTp = getSparseTensorType(it.value());
|
|
|
|
|
if (srcTp.hasDynamicDimShape())
|
|
|
|
|
return emitError(llvm::formatv("Input tensor ${0} has dynamic shape", i));
|
|
|
|
|
const Dimension srcDimRank = srcTp.getDimRank();
|
|
|
|
|
if (srcDimRank != dimRank)
|
2022-08-03 19:23:42 +00:00
|
|
|
return emitError(
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
llvm::formatv("Input tensor ${0} has a different rank (rank={1}) "
|
2022-08-03 19:23:42 +00:00
|
|
|
"from the output tensor (rank={2}).",
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
i, srcDimRank, dimRank));
|
2022-08-03 19:23:42 +00:00
|
|
|
}
|
|
|
|
|
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
for (Dimension d = 0; d < dimRank; d++) {
|
|
|
|
|
const DynSize dstSh = dstTp.getDimShape()[d];
|
|
|
|
|
if (d == concatDim) {
|
|
|
|
|
if (!ShapedType::isDynamic(dstSh)) {
|
|
|
|
|
// If we reach here, then all inputs have static shapes. So we
|
|
|
|
|
// can use `getDimShape()[d]` instead of `*getDynamicDimSize(d)`
|
|
|
|
|
// to avoid redundant assertions in the loop.
|
|
|
|
|
StaticSize sumSz = 0;
|
|
|
|
|
for (const auto src : getInputs())
|
|
|
|
|
sumSz += getSparseTensorType(src).getDimShape()[d];
|
2022-08-03 19:23:42 +00:00
|
|
|
// If all dimension are statically known, the sum of all the input
|
|
|
|
|
// dimensions should be equal to the output dimension.
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
if (sumSz != dstSh)
|
2022-08-03 19:23:42 +00:00
|
|
|
return emitError(
|
|
|
|
|
"The concatenation dimension of the output tensor should be the "
|
|
|
|
|
"sum of all the concatenation dimensions of the input tensors.");
|
|
|
|
|
}
|
|
|
|
|
} else {
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
DynSize prev = dstSh;
|
|
|
|
|
for (const auto src : getInputs()) {
|
|
|
|
|
const auto sh = getSparseTensorType(src).getDimShape()[d];
|
|
|
|
|
if (!ShapedType::isDynamic(prev) && sh != prev)
|
2022-08-03 19:23:42 +00:00
|
|
|
return emitError("All dimensions (expect for the concatenating one) "
|
|
|
|
|
"should be equal.");
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
prev = sh;
|
2022-08-03 19:23:42 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return success();
|
|
|
|
|
}
|
|
|
|
|
|
2022-09-21 17:40:50 -07:00
|
|
|
LogicalResult InsertOp::verify() {
|
[mlir][sparse] Renaming "pointer/index" to "position/coordinate"
The old "pointer/index" names often cause confusion since these names clash with names of unrelated things in MLIR; so this change rectifies this by changing everything to use "position/coordinate" terminology instead.
In addition to the basic terminology, there have also been various conventions for making certain distinctions like: (1) the overall storage for coordinates in the sparse-tensor, vs the particular collection of coordinates of a given element; and (2) particular coordinates given as a `Value` or `TypedValue<MemRefType>`, vs particular coordinates given as `ValueRange` or similar. I have striven to maintain these distinctions
as follows:
* "p/c" are used for individual position/coordinate values, when there is no risk of confusion. (Just like we use "d/l" to abbreviate "dim/lvl".)
* "pos/crd" are used for individual position/coordinate values, when a longer name is helpful to avoid ambiguity or to form compound names (e.g., "parentPos"). (Just like we use "dim/lvl" when we need a longer form of "d/l".)
I have also used these forms for a handful of compound names where the old name had been using a three-letter form previously, even though a longer form would be more appropriate. I've avoided renaming these to use a longer form purely for expediency sake, since changing them would require a cascade of other renamings. They should be updated to follow the new naming scheme, but that can be done in future patches.
* "coords" is used for the complete collection of crd values associated with a single element. In the runtime library this includes both `std::vector` and raw pointer representations. In the compiler, this is used specifically for buffer variables with C++ type `Value`, `TypedValue<MemRefType>`, etc.
The bare form "coords" is discouraged, since it fails to make the dim/lvl distinction; so the compound names "dimCoords/lvlCoords" should be used instead. (Though there may exist a rare few cases where is is appropriate to be intentionally ambiguous about what coordinate-space the coords live in; in which case the bare "coords" is appropriate.)
There is seldom the need for the pos variant of this notion. In most circumstances we use the term "cursor", since the same buffer is reused for a 'moving' pos-collection.
* "dcvs/lcvs" is used in the compiler as the `ValueRange` analogue of "dimCoords/lvlCoords". (The "vs" stands for "`Value`s".) I haven't found the need for it, but "pvs" would be the obvious name for a pos-`ValueRange`.
The old "ind"-vs-"ivs" naming scheme does not seem to have been sustained in more recent code, which instead prefers other mnemonics (e.g., adding "Buf" to the end of the names for `TypeValue<MemRefType>`). I have cleaned up a lot of these to follow the "coords"-vs-"cvs" naming scheme, though haven't done an exhaustive cleanup.
* "positions/coordinates" are used for larger collections of pos/crd values; in particular, these are used when referring to the complete sparse-tensor storage components.
I also prefer to use these unabbreviated names in the documentation, unless there is some specific reason why using the abbreviated forms helps resolve ambiguity.
In addition to making this terminology change, this change also does some cleanup along the way:
* correcting the dim/lvl terminology in certain places.
* adding `const` when it requires no other code changes.
* miscellaneous cleanup that was entailed in order to make the proper distinctions. Most of these are in CodegenUtils.{h,cpp}
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D144773
2023-03-06 12:19:41 -08:00
|
|
|
const auto stt = getSparseTensorType(getTensor());
|
|
|
|
|
if (stt.getLvlRank() != static_cast<Level>(getLvlCoords().size()))
|
|
|
|
|
return emitOpError("incorrect number of coordinates");
|
2022-09-21 17:40:50 -07:00
|
|
|
return success();
|
|
|
|
|
}
|
2022-10-26 08:42:44 -07:00
|
|
|
|
|
|
|
|
void PushBackOp::build(OpBuilder &builder, OperationState &result,
|
2022-12-15 18:45:07 +00:00
|
|
|
Value curSize, Value inBuffer, Value value) {
|
|
|
|
|
build(builder, result, curSize, inBuffer, value, Value());
|
2022-10-26 08:42:44 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
LogicalResult PushBackOp::verify() {
|
2023-01-18 18:22:48 -08:00
|
|
|
if (Value n = getN()) {
|
2022-10-26 08:42:44 -07:00
|
|
|
auto nValue = dyn_cast_or_null<arith::ConstantIndexOp>(n.getDefiningOp());
|
|
|
|
|
if (nValue && nValue.value() < 1)
|
|
|
|
|
return emitOpError("n must be not less than 1");
|
|
|
|
|
}
|
|
|
|
|
return success();
|
|
|
|
|
}
|
2022-09-21 17:40:50 -07:00
|
|
|
|
|
|
|
|
LogicalResult CompressOp::verify() {
|
[mlir][sparse] Renaming "pointer/index" to "position/coordinate"
The old "pointer/index" names often cause confusion since these names clash with names of unrelated things in MLIR; so this change rectifies this by changing everything to use "position/coordinate" terminology instead.
In addition to the basic terminology, there have also been various conventions for making certain distinctions like: (1) the overall storage for coordinates in the sparse-tensor, vs the particular collection of coordinates of a given element; and (2) particular coordinates given as a `Value` or `TypedValue<MemRefType>`, vs particular coordinates given as `ValueRange` or similar. I have striven to maintain these distinctions
as follows:
* "p/c" are used for individual position/coordinate values, when there is no risk of confusion. (Just like we use "d/l" to abbreviate "dim/lvl".)
* "pos/crd" are used for individual position/coordinate values, when a longer name is helpful to avoid ambiguity or to form compound names (e.g., "parentPos"). (Just like we use "dim/lvl" when we need a longer form of "d/l".)
I have also used these forms for a handful of compound names where the old name had been using a three-letter form previously, even though a longer form would be more appropriate. I've avoided renaming these to use a longer form purely for expediency sake, since changing them would require a cascade of other renamings. They should be updated to follow the new naming scheme, but that can be done in future patches.
* "coords" is used for the complete collection of crd values associated with a single element. In the runtime library this includes both `std::vector` and raw pointer representations. In the compiler, this is used specifically for buffer variables with C++ type `Value`, `TypedValue<MemRefType>`, etc.
The bare form "coords" is discouraged, since it fails to make the dim/lvl distinction; so the compound names "dimCoords/lvlCoords" should be used instead. (Though there may exist a rare few cases where is is appropriate to be intentionally ambiguous about what coordinate-space the coords live in; in which case the bare "coords" is appropriate.)
There is seldom the need for the pos variant of this notion. In most circumstances we use the term "cursor", since the same buffer is reused for a 'moving' pos-collection.
* "dcvs/lcvs" is used in the compiler as the `ValueRange` analogue of "dimCoords/lvlCoords". (The "vs" stands for "`Value`s".) I haven't found the need for it, but "pvs" would be the obvious name for a pos-`ValueRange`.
The old "ind"-vs-"ivs" naming scheme does not seem to have been sustained in more recent code, which instead prefers other mnemonics (e.g., adding "Buf" to the end of the names for `TypeValue<MemRefType>`). I have cleaned up a lot of these to follow the "coords"-vs-"cvs" naming scheme, though haven't done an exhaustive cleanup.
* "positions/coordinates" are used for larger collections of pos/crd values; in particular, these are used when referring to the complete sparse-tensor storage components.
I also prefer to use these unabbreviated names in the documentation, unless there is some specific reason why using the abbreviated forms helps resolve ambiguity.
In addition to making this terminology change, this change also does some cleanup along the way:
* correcting the dim/lvl terminology in certain places.
* adding `const` when it requires no other code changes.
* miscellaneous cleanup that was entailed in order to make the proper distinctions. Most of these are in CodegenUtils.{h,cpp}
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D144773
2023-03-06 12:19:41 -08:00
|
|
|
const auto stt = getSparseTensorType(getTensor());
|
|
|
|
|
if (stt.getLvlRank() != 1 + static_cast<Level>(getLvlCoords().size()))
|
|
|
|
|
return emitOpError("incorrect number of coordinates");
|
2022-09-21 17:40:50 -07:00
|
|
|
return success();
|
|
|
|
|
}
|
|
|
|
|
|
2022-09-29 18:11:56 +00:00
|
|
|
void ForeachOp::build(
|
|
|
|
|
OpBuilder &builder, OperationState &result, Value tensor,
|
2023-02-16 20:24:01 +00:00
|
|
|
ValueRange initArgs, AffineMapAttr order,
|
2022-11-04 17:52:21 +00:00
|
|
|
function_ref<void(OpBuilder &, Location, ValueRange, Value, ValueRange)>
|
|
|
|
|
bodyBuilder) {
|
2023-02-16 20:24:01 +00:00
|
|
|
build(builder, result, initArgs.getTypes(), tensor, initArgs, order);
|
2022-11-04 17:52:21 +00:00
|
|
|
// Builds foreach body.
|
2022-09-29 18:11:56 +00:00
|
|
|
if (!bodyBuilder)
|
|
|
|
|
return;
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
const auto stt = getSparseTensorType(tensor);
|
|
|
|
|
const Dimension dimRank = stt.getDimRank();
|
2022-09-29 18:11:56 +00:00
|
|
|
|
[mlir][sparse] Renaming "pointer/index" to "position/coordinate"
The old "pointer/index" names often cause confusion since these names clash with names of unrelated things in MLIR; so this change rectifies this by changing everything to use "position/coordinate" terminology instead.
In addition to the basic terminology, there have also been various conventions for making certain distinctions like: (1) the overall storage for coordinates in the sparse-tensor, vs the particular collection of coordinates of a given element; and (2) particular coordinates given as a `Value` or `TypedValue<MemRefType>`, vs particular coordinates given as `ValueRange` or similar. I have striven to maintain these distinctions
as follows:
* "p/c" are used for individual position/coordinate values, when there is no risk of confusion. (Just like we use "d/l" to abbreviate "dim/lvl".)
* "pos/crd" are used for individual position/coordinate values, when a longer name is helpful to avoid ambiguity or to form compound names (e.g., "parentPos"). (Just like we use "dim/lvl" when we need a longer form of "d/l".)
I have also used these forms for a handful of compound names where the old name had been using a three-letter form previously, even though a longer form would be more appropriate. I've avoided renaming these to use a longer form purely for expediency sake, since changing them would require a cascade of other renamings. They should be updated to follow the new naming scheme, but that can be done in future patches.
* "coords" is used for the complete collection of crd values associated with a single element. In the runtime library this includes both `std::vector` and raw pointer representations. In the compiler, this is used specifically for buffer variables with C++ type `Value`, `TypedValue<MemRefType>`, etc.
The bare form "coords" is discouraged, since it fails to make the dim/lvl distinction; so the compound names "dimCoords/lvlCoords" should be used instead. (Though there may exist a rare few cases where is is appropriate to be intentionally ambiguous about what coordinate-space the coords live in; in which case the bare "coords" is appropriate.)
There is seldom the need for the pos variant of this notion. In most circumstances we use the term "cursor", since the same buffer is reused for a 'moving' pos-collection.
* "dcvs/lcvs" is used in the compiler as the `ValueRange` analogue of "dimCoords/lvlCoords". (The "vs" stands for "`Value`s".) I haven't found the need for it, but "pvs" would be the obvious name for a pos-`ValueRange`.
The old "ind"-vs-"ivs" naming scheme does not seem to have been sustained in more recent code, which instead prefers other mnemonics (e.g., adding "Buf" to the end of the names for `TypeValue<MemRefType>`). I have cleaned up a lot of these to follow the "coords"-vs-"cvs" naming scheme, though haven't done an exhaustive cleanup.
* "positions/coordinates" are used for larger collections of pos/crd values; in particular, these are used when referring to the complete sparse-tensor storage components.
I also prefer to use these unabbreviated names in the documentation, unless there is some specific reason why using the abbreviated forms helps resolve ambiguity.
In addition to making this terminology change, this change also does some cleanup along the way:
* correcting the dim/lvl terminology in certain places.
* adding `const` when it requires no other code changes.
* miscellaneous cleanup that was entailed in order to make the proper distinctions. Most of these are in CodegenUtils.{h,cpp}
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D144773
2023-03-06 12:19:41 -08:00
|
|
|
// Starts with `dimRank`-many coordinates.
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
SmallVector<Type> blockArgTypes(dimRank, builder.getIndexType());
|
2022-09-29 18:11:56 +00:00
|
|
|
// Followed by one value.
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
blockArgTypes.push_back(stt.getElementType());
|
|
|
|
|
// Followed by the reduction variables.
|
2022-11-04 17:52:21 +00:00
|
|
|
blockArgTypes.append(initArgs.getTypes().begin(), initArgs.getTypes().end());
|
2022-09-29 18:11:56 +00:00
|
|
|
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
SmallVector<Location> blockArgLocs(blockArgTypes.size(), tensor.getLoc());
|
2022-09-29 18:11:56 +00:00
|
|
|
|
|
|
|
|
OpBuilder::InsertionGuard guard(builder);
|
|
|
|
|
auto ®ion = *result.regions.front();
|
|
|
|
|
Block *bodyBlock =
|
|
|
|
|
builder.createBlock(®ion, region.end(), blockArgTypes, blockArgLocs);
|
2022-11-04 17:52:21 +00:00
|
|
|
bodyBuilder(builder, result.location,
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
bodyBlock->getArguments().slice(0, dimRank),
|
|
|
|
|
bodyBlock->getArguments()[dimRank],
|
|
|
|
|
bodyBlock->getArguments().drop_front(dimRank + 1));
|
2022-09-29 18:11:56 +00:00
|
|
|
}
|
|
|
|
|
|
2022-09-22 21:53:48 +00:00
|
|
|
LogicalResult ForeachOp::verify() {
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
const auto t = getSparseTensorType(getTensor());
|
|
|
|
|
const Dimension dimRank = t.getDimRank();
|
|
|
|
|
const auto args = getBody()->getArguments();
|
2022-09-22 21:53:48 +00:00
|
|
|
|
2023-02-16 20:24:01 +00:00
|
|
|
if (getOrder().has_value() &&
|
|
|
|
|
(t.getEncoding() || !getOrder()->isPermutation()))
|
|
|
|
|
return emitError("Only support permuted order on non encoded dense tensor");
|
|
|
|
|
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
if (static_cast<size_t>(dimRank) + 1 + getInitArgs().size() != args.size())
|
2022-09-22 21:53:48 +00:00
|
|
|
return emitError("Unmatched number of arguments in the block");
|
|
|
|
|
|
2022-11-04 17:52:21 +00:00
|
|
|
if (getNumResults() != getInitArgs().size())
|
|
|
|
|
return emitError("Mismatch in number of init arguments and results");
|
|
|
|
|
|
|
|
|
|
if (getResultTypes() != getInitArgs().getTypes())
|
|
|
|
|
return emitError("Mismatch in types of init arguments and results");
|
|
|
|
|
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
// Cannot mark this const, because the getters aren't.
|
2022-11-04 17:52:21 +00:00
|
|
|
auto yield = cast<YieldOp>(getBody()->getTerminator());
|
|
|
|
|
if (yield.getNumOperands() != getNumResults() ||
|
|
|
|
|
yield.getOperands().getTypes() != getResultTypes())
|
|
|
|
|
return emitError("Mismatch in types of yield values and results");
|
|
|
|
|
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
const auto iTp = IndexType::get(getContext());
|
|
|
|
|
for (Dimension d = 0; d < dimRank; d++)
|
|
|
|
|
if (args[d].getType() != iTp)
|
2022-09-22 21:53:48 +00:00
|
|
|
emitError(
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
llvm::formatv("Expecting Index type for argument at index {0}", d));
|
2022-09-22 21:53:48 +00:00
|
|
|
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
const auto elemTp = t.getElementType();
|
|
|
|
|
const auto valueTp = args[dimRank].getType();
|
2022-09-22 21:53:48 +00:00
|
|
|
if (elemTp != valueTp)
|
|
|
|
|
emitError(llvm::formatv("Unmatched element type between input tensor and "
|
|
|
|
|
"block argument, expected:{0}, got: {1}",
|
|
|
|
|
elemTp, valueTp));
|
|
|
|
|
return success();
|
|
|
|
|
}
|
|
|
|
|
|
2022-07-15 15:26:41 -05:00
|
|
|
LogicalResult ReduceOp::verify() {
|
2022-07-21 12:03:07 -07:00
|
|
|
Type inputType = getX().getType();
|
2022-07-15 15:26:41 -05:00
|
|
|
// Check correct number of block arguments and return type.
|
2022-07-21 12:03:07 -07:00
|
|
|
Region &formula = getRegion();
|
2023-01-18 18:22:48 -08:00
|
|
|
RETURN_FAILURE_IF_FAILED(verifyNumBlockArgs(
|
|
|
|
|
this, formula, "reduce", TypeRange{inputType, inputType}, inputType))
|
2022-09-13 15:22:53 -05:00
|
|
|
return success();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
LogicalResult SelectOp::verify() {
|
|
|
|
|
Builder b(getContext());
|
|
|
|
|
Type inputType = getX().getType();
|
|
|
|
|
Type boolType = b.getI1Type();
|
|
|
|
|
// Check correct number of block arguments and return type.
|
|
|
|
|
Region &formula = getRegion();
|
2023-01-18 18:22:48 -08:00
|
|
|
RETURN_FAILURE_IF_FAILED(verifyNumBlockArgs(this, formula, "select",
|
|
|
|
|
TypeRange{inputType}, boolType))
|
2022-07-15 15:26:41 -05:00
|
|
|
return success();
|
|
|
|
|
}
|
|
|
|
|
|
2022-09-27 11:45:02 -07:00
|
|
|
LogicalResult SortOp::verify() {
|
|
|
|
|
if (getXs().empty())
|
|
|
|
|
return emitError("need at least one xs buffer.");
|
|
|
|
|
|
|
|
|
|
auto n = getN().getDefiningOp<arith::ConstantIndexOp>();
|
|
|
|
|
|
2023-01-24 13:23:52 -08:00
|
|
|
Type xtp = getMemRefType(getXs().front()).getElementType();
|
2022-09-27 11:45:02 -07:00
|
|
|
auto checkTypes = [&](ValueRange operands,
|
|
|
|
|
bool checkEleType = true) -> LogicalResult {
|
|
|
|
|
for (Value opnd : operands) {
|
2023-01-24 13:23:52 -08:00
|
|
|
auto mtp = getMemRefType(opnd);
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
const DynSize sh = mtp.getShape()[0];
|
2022-09-27 11:45:02 -07:00
|
|
|
// We can't check the size of dynamic dimension at compile-time, but all
|
|
|
|
|
// xs and ys should have a dimension not less than n at runtime.
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
if (n && !ShapedType::isDynamic(sh) && sh < n.value())
|
2022-09-27 11:45:02 -07:00
|
|
|
return emitError(llvm::formatv("xs and ys need to have a dimension >= n"
|
|
|
|
|
": {0} < {1}",
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
sh, n.value()));
|
2022-09-27 11:45:02 -07:00
|
|
|
|
|
|
|
|
if (checkEleType && xtp != mtp.getElementType())
|
|
|
|
|
return emitError("mismatch xs element types");
|
|
|
|
|
}
|
|
|
|
|
return success();
|
|
|
|
|
};
|
2023-01-18 18:22:48 -08:00
|
|
|
RETURN_FAILURE_IF_FAILED(checkTypes(getXs()))
|
|
|
|
|
return n ? checkTypes(getYs(), false) : success();
|
2022-09-27 11:45:02 -07:00
|
|
|
}
|
|
|
|
|
|
2022-11-07 08:18:53 -08:00
|
|
|
LogicalResult SortCooOp::verify() {
|
|
|
|
|
auto cn = getN().getDefiningOp<arith::ConstantIndexOp>();
|
|
|
|
|
// We can't check the size of the buffers when n or buffer dimensions aren't
|
|
|
|
|
// compile-time constants.
|
|
|
|
|
if (!cn)
|
|
|
|
|
return success();
|
|
|
|
|
|
|
|
|
|
uint64_t n = cn.value();
|
|
|
|
|
uint64_t nx = 1;
|
|
|
|
|
if (auto nxAttr = getNxAttr()) {
|
|
|
|
|
nx = nxAttr.getInt();
|
|
|
|
|
if (nx < 1)
|
|
|
|
|
emitError(llvm::formatv("Expected nx > 1, got {0}", nx));
|
|
|
|
|
}
|
|
|
|
|
uint64_t ny = 0;
|
|
|
|
|
if (auto nyAttr = getNyAttr()) {
|
|
|
|
|
ny = nyAttr.getInt();
|
|
|
|
|
}
|
|
|
|
|
|
[mlir][sparse] Factoring out SparseTensorType class
This change adds a new `SparseTensorType` class for making the "dim" vs "lvl" distinction more overt, and for abstracting over the differences between sparse-tensors and dense-tensors. In addition, this change also adds new type aliases `Dimension`, `Level`, and `FieldIndex` to make code more self-documenting.
Although the diff is very large, the majority of the changes are mechanical in nature (e.g., changing types to use the new aliases, updating variable names to match, etc). Along the way I also made many variables `const` when they could be; the majority of which required only adding the keyword. A few places had conditional definitions of these variables, requiring actual code changes; however, that was only done when the overall change was extremely local and easy to extract. All these changes are included in the current patch only because it would be too onerous to split them off into a separate patch.
Reviewed By: aartbik
Differential Revision: https://reviews.llvm.org/D143800
2023-02-14 18:20:45 -08:00
|
|
|
// FIXME: update the types of variables used in expressions bassed as
|
|
|
|
|
// the `minSize` argument, to avoid implicit casting at the callsites
|
|
|
|
|
// of this lambda.
|
|
|
|
|
const auto checkDim = [&](Value v, StaticSize minSize, const char *message) {
|
|
|
|
|
const DynSize sh = getMemRefType(v).getShape()[0];
|
|
|
|
|
if (!ShapedType::isDynamic(sh) && sh < minSize)
|
|
|
|
|
emitError(llvm::formatv("{0} got {1} < {2}", message, sh, minSize));
|
2022-11-07 08:18:53 -08:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
checkDim(getXy(), n * (nx + ny), "Expected dimension(xy) >= n * (nx + ny)");
|
|
|
|
|
|
|
|
|
|
for (Value opnd : getYs()) {
|
|
|
|
|
checkDim(opnd, n, "Expected dimension(y) >= n");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return success();
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-17 12:16:29 -05:00
|
|
|
LogicalResult YieldOp::verify() {
|
|
|
|
|
// Check for compatible parent.
|
|
|
|
|
auto *parentOp = (*this)->getParentOp();
|
2022-07-15 15:26:41 -05:00
|
|
|
if (isa<BinaryOp>(parentOp) || isa<UnaryOp>(parentOp) ||
|
2022-09-22 21:53:48 +00:00
|
|
|
isa<ReduceOp>(parentOp) || isa<SelectOp>(parentOp) ||
|
|
|
|
|
isa<ForeachOp>(parentOp))
|
2022-03-17 12:16:29 -05:00
|
|
|
return success();
|
|
|
|
|
|
2022-09-13 15:22:53 -05:00
|
|
|
return emitOpError("expected parent op to be sparse_tensor unary, binary, "
|
2022-09-22 21:53:48 +00:00
|
|
|
"reduce, select or foreach");
|
2022-03-17 12:16:29 -05:00
|
|
|
}
|
|
|
|
|
|
2023-01-18 18:22:48 -08:00
|
|
|
#undef RETURN_FAILURE_IF_FAILED
|
|
|
|
|
|
2021-04-30 18:07:28 -07:00
|
|
|
//===----------------------------------------------------------------------===//
|
2021-05-10 10:34:21 -07:00
|
|
|
// TensorDialect Methods.
|
2021-04-30 18:07:28 -07:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
2021-04-29 14:31:18 -07:00
|
|
|
void SparseTensorDialect::initialize() {
|
2021-04-30 18:07:28 -07:00
|
|
|
addAttributes<
|
|
|
|
|
#define GET_ATTRDEF_LIST
|
|
|
|
|
#include "mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.cpp.inc"
|
|
|
|
|
>();
|
2022-12-13 19:49:43 +00:00
|
|
|
addTypes<
|
|
|
|
|
#define GET_TYPEDEF_LIST
|
|
|
|
|
#include "mlir/Dialect/SparseTensor/IR/SparseTensorTypes.cpp.inc"
|
|
|
|
|
>();
|
2021-04-29 14:31:18 -07:00
|
|
|
addOperations<
|
|
|
|
|
#define GET_OP_LIST
|
|
|
|
|
#include "mlir/Dialect/SparseTensor/IR/SparseTensorOps.cpp.inc"
|
|
|
|
|
>();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#define GET_OP_CLASSES
|
|
|
|
|
#include "mlir/Dialect/SparseTensor/IR/SparseTensorOps.cpp.inc"
|
2022-02-11 21:18:20 -08:00
|
|
|
|
|
|
|
|
#include "mlir/Dialect/SparseTensor/IR/SparseTensorOpsDialect.cpp.inc"
|