[mlir] Split alloc-like op LLVM lowerings into base and separate derived classes.

The previous code did the lowering to alloca, malloc, and aligned_malloc
in a single class with different code paths that are somewhat difficult to
follow.

This change moves the common code to a base class and has a separte
derived class per lowering target that contains the specifics.

Reviewed By: ftynse

Differential Revision: https://reviews.llvm.org/D88696
This commit is contained in:
Christian Sigg
2020-10-01 21:51:54 +02:00
parent c3f12dd606
commit 665371d0b2
4 changed files with 331 additions and 297 deletions

View File

@@ -412,6 +412,7 @@ public:
LLVMTypeConverter &typeConverter,
PatternBenefit benefit = 1);
protected:
/// Returns the LLVM dialect.
LLVM::LLVMDialect &getDialect() const;
@@ -419,6 +420,10 @@ public:
/// defined by the used type converter.
LLVM::LLVMType getIndexType() const;
/// Gets the MLIR type wrapping the LLVM integer type whose bit width
/// corresponds to that of a LLVM pointer type.
LLVM::LLVMType getIntPtrType(unsigned addressSpace = 0) const;
/// Gets the MLIR type wrapping the LLVM void type.
LLVM::LLVMType getVoidType() const;
@@ -470,6 +475,15 @@ public:
ArrayRef<Value> shape,
ConversionPatternRewriter &rewriter) const;
/// Creates and populates the memref descriptor struct given all its fields.
/// 'strides' can be either dynamic (kDynamicStrideOrOffset) or static, but
/// not a mix of the two.
MemRefDescriptor
createMemRefDescriptor(Location loc, MemRefType memRefType,
Value allocatedPtr, Value alignedPtr, uint64_t offset,
ArrayRef<int64_t> strides, ArrayRef<Value> sizes,
ConversionPatternRewriter &rewriter) const;
protected:
/// Reference to the type converter, with potential extensions.
LLVMTypeConverter &typeConverter;

View File

@@ -872,6 +872,13 @@ LLVM::LLVMType ConvertToLLVMPattern::getIndexType() const {
return typeConverter.getIndexType();
}
LLVM::LLVMType
ConvertToLLVMPattern::getIntPtrType(unsigned addressSpace) const {
return LLVM::LLVMType::getIntNTy(
&typeConverter.getContext(),
typeConverter.getPointerBitwidth(addressSpace));
}
LLVM::LLVMType ConvertToLLVMPattern::getVoidType() const {
return LLVM::LLVMType::getVoidTy(&typeConverter.getContext());
}
@@ -911,12 +918,12 @@ Value ConvertToLLVMPattern::getStridedElementPtr(
Value base = memRefDescriptor.alignedPtr(rewriter, loc);
Value offsetValue = offset == MemRefType::getDynamicStrideOrOffset()
? memRefDescriptor.offset(rewriter, loc)
: this->createIndexConstant(rewriter, loc, offset);
: createIndexConstant(rewriter, loc, offset);
for (int i = 0, e = indices.size(); i < e; ++i) {
Value stride = strides[i] == MemRefType::getDynamicStrideOrOffset()
? memRefDescriptor.stride(rewriter, loc, i)
: this->createIndexConstant(rewriter, loc, strides[i]);
: createIndexConstant(rewriter, loc, strides[i]);
Value additionalOffset =
rewriter.create<LLVM::MulOp>(loc, indices[i], stride);
offsetValue =
@@ -973,19 +980,69 @@ Value ConvertToLLVMPattern::getSizeInBytes(
}
Value ConvertToLLVMPattern::getCumulativeSizeInBytes(
Location loc, Type elementType, ArrayRef<Value> sizes,
Location loc, Type elementType, ArrayRef<Value> shape,
ConversionPatternRewriter &rewriter) const {
// Compute the total number of memref elements.
Value cumulativeSizeInBytes =
sizes.empty() ? createIndexConstant(rewriter, loc, 1) : sizes.front();
for (unsigned i = 1, e = sizes.size(); i < e; ++i)
shape.empty() ? createIndexConstant(rewriter, loc, 1) : shape.front();
for (unsigned i = 1, e = shape.size(); i < e; ++i)
cumulativeSizeInBytes = rewriter.create<LLVM::MulOp>(
loc, getIndexType(), ArrayRef<Value>{cumulativeSizeInBytes, sizes[i]});
loc, getIndexType(), ArrayRef<Value>{cumulativeSizeInBytes, shape[i]});
auto elementSize = this->getSizeInBytes(loc, elementType, rewriter);
return rewriter.create<LLVM::MulOp>(
loc, getIndexType(), ArrayRef<Value>{cumulativeSizeInBytes, elementSize});
}
/// Creates and populates the memref descriptor struct given all its fields.
MemRefDescriptor ConvertToLLVMPattern::createMemRefDescriptor(
Location loc, MemRefType memRefType, Value allocatedPtr, Value alignedPtr,
uint64_t offset, ArrayRef<int64_t> strides, ArrayRef<Value> sizes,
ConversionPatternRewriter &rewriter) const {
auto structType = typeConverter.convertType(memRefType);
auto memRefDescriptor = MemRefDescriptor::undef(rewriter, loc, structType);
// Field 1: Allocated pointer, used for malloc/free.
memRefDescriptor.setAllocatedPtr(rewriter, loc, allocatedPtr);
// Field 2: Actual aligned pointer to payload.
memRefDescriptor.setAlignedPtr(rewriter, loc, alignedPtr);
// Field 3: Offset in aligned pointer.
memRefDescriptor.setOffset(rewriter, loc,
createIndexConstant(rewriter, loc, offset));
if (memRefType.getRank() == 0)
// No size/stride descriptor in memref, return the descriptor value.
return memRefDescriptor;
// Fields 4 and 5: sizes and strides of the strided MemRef.
// Store all sizes in the descriptor. Only dynamic sizes are passed in as
// operands to AllocOp.
Value runningStride = nullptr;
// Iterate strides in reverse order, compute runningStride and strideValues.
auto nStrides = strides.size();
SmallVector<Value, 4> strideValues(nStrides, nullptr);
for (unsigned i = 0; i < nStrides; ++i) {
int64_t index = nStrides - 1 - i;
if (strides[index] == MemRefType::getDynamicStrideOrOffset())
// Identity layout map is enforced in the match function, so we compute:
// `runningStride *= sizes[index + 1]`
runningStride = runningStride ? rewriter.create<LLVM::MulOp>(
loc, runningStride, sizes[index + 1])
: createIndexConstant(rewriter, loc, 1);
else
runningStride = createIndexConstant(rewriter, loc, strides[index]);
strideValues[index] = runningStride;
}
// Fill size and stride descriptors in memref.
for (auto indexedSize : llvm::enumerate(sizes)) {
int64_t index = indexedSize.index();
memRefDescriptor.setSize(rewriter, loc, index, indexedSize.value());
memRefDescriptor.setStride(rewriter, loc, index, strideValues[index]);
}
return memRefDescriptor;
}
/// Only retain those attributes that are not constructed by
/// `LLVMFuncOp::build`. If `filterArgAttrs` is set, also filter out argument
/// attributes.
@@ -1710,113 +1767,198 @@ static bool isSupportedMemRefType(MemRefType type) {
}
/// Lowering for AllocOp and AllocaOp.
template <typename AllocLikeOp>
struct AllocLikeOpLowering : public ConvertOpToLLVMPattern<AllocLikeOp> {
using ConvertOpToLLVMPattern<AllocLikeOp>::createIndexConstant;
using ConvertOpToLLVMPattern<AllocLikeOp>::getIndexType;
using ConvertOpToLLVMPattern<AllocLikeOp>::typeConverter;
using ConvertOpToLLVMPattern<AllocLikeOp>::getVoidPtrType;
struct AllocLikeOpLowering : public ConvertToLLVMPattern {
using ConvertToLLVMPattern::createIndexConstant;
using ConvertToLLVMPattern::getIndexType;
using ConvertToLLVMPattern::getVoidPtrType;
using ConvertToLLVMPattern::typeConverter;
explicit AllocLikeOpLowering(LLVMTypeConverter &converter)
: ConvertOpToLLVMPattern<AllocLikeOp>(converter) {}
explicit AllocLikeOpLowering(StringRef opName, LLVMTypeConverter &converter)
: ConvertToLLVMPattern(opName, &converter.getContext(), converter) {}
protected:
// Returns 'input' aligned up to 'alignment'. Computes
// bumped = input + alignement - 1
// aligned = bumped - bumped % alignment
static Value createAligned(ConversionPatternRewriter &rewriter, Location loc,
Value input, Value alignment) {
Value one = createIndexAttrConstant(rewriter, loc, alignment.getType(), 1);
Value bump = rewriter.create<LLVM::SubOp>(loc, alignment, one);
Value bumped = rewriter.create<LLVM::AddOp>(loc, input, bump);
Value mod = rewriter.create<LLVM::URemOp>(loc, bumped, alignment);
return rewriter.create<LLVM::SubOp>(loc, bumped, mod);
}
// Creates a call to an allocation function with params and casts the
// resulting void pointer to ptrType.
Value createAllocCall(Location loc, StringRef name, Type ptrType,
ArrayRef<Value> params, ModuleOp module,
ConversionPatternRewriter &rewriter) const {
SmallVector<LLVM::LLVMType, 2> paramTypes;
auto allocFuncOp = module.lookupSymbol<LLVM::LLVMFuncOp>(name);
if (!allocFuncOp) {
for (Value param : params)
paramTypes.push_back(param.getType().cast<LLVM::LLVMType>());
auto allocFuncType =
LLVM::LLVMType::getFunctionTy(getVoidPtrType(), paramTypes,
/*isVarArg=*/false);
OpBuilder::InsertionGuard guard(rewriter);
rewriter.setInsertionPointToStart(module.getBody());
allocFuncOp = rewriter.create<LLVM::LLVMFuncOp>(rewriter.getUnknownLoc(),
name, allocFuncType);
}
auto allocFuncSymbol = rewriter.getSymbolRefAttr(allocFuncOp);
auto allocatedPtr = rewriter
.create<LLVM::CallOp>(loc, getVoidPtrType(),
allocFuncSymbol, params)
.getResult(0);
return rewriter.create<LLVM::BitcastOp>(loc, ptrType, allocatedPtr);
}
/// Allocates the underlying buffer. Returns the allocated pointer and the
/// aligned pointer.
virtual std::tuple<Value, Value>
allocateBuffer(ConversionPatternRewriter &rewriter, Location loc,
Value cumulativeSize, Operation *op) const = 0;
private:
static MemRefType getMemRefResultType(Operation *op) {
return op->getResult(0).getType().cast<MemRefType>();
}
LogicalResult match(Operation *op) const override {
MemRefType memRefType = cast<AllocLikeOp>(op).getType();
MemRefType memRefType = getMemRefResultType(op);
if (isSupportedMemRefType(memRefType))
return success();
int64_t offset;
SmallVector<int64_t, 4> strides;
auto successStrides = getStridesAndOffset(memRefType, strides, offset);
if (failed(successStrides))
if (failed(getStridesAndOffset(memRefType, strides, offset)))
return failure();
// Dynamic strides are ok if they can be deduced from dynamic sizes (which
// is guaranteed when succeeded(successStrides)). Dynamic offset however can
// never be alloc'ed.
// is guaranteed when getStridesAndOffset succeeded. Dynamic offset however
// can never be alloc'ed.
if (offset == MemRefType::getDynamicStrideOrOffset())
return failure();
return success();
}
// Returns bump = (alignment - (input % alignment))% alignment, which is the
// increment necessary to align `input` to `alignment` boundary.
// TODO: this can be made more efficient by just using a single addition
// and two bit shifts: (ptr + align - 1)/align, align is always power of 2.
Value createBumpToAlign(Location loc, OpBuilder b, Value input,
Value alignment) const {
Value modAlign = b.create<LLVM::URemOp>(loc, input, alignment);
Value diff = b.create<LLVM::SubOp>(loc, alignment, modAlign);
Value shift = b.create<LLVM::URemOp>(loc, diff, alignment);
return shift;
// An `alloc` is converted into a definition of a memref descriptor value and
// a call to `malloc` to allocate the underlying data buffer. The memref
// descriptor is of the LLVM structure type where:
// 1. the first element is a pointer to the allocated (typed) data buffer,
// 2. the second element is a pointer to the (typed) payload, aligned to the
// specified alignment,
// 3. the remaining elements serve to store all the sizes and strides of the
// memref using LLVM-converted `index` type.
//
// Alignment is performed by allocating `alignment` more bytes than
// requested and shifting the aligned pointer relative to the allocated
// memory. Note: `alignment - <minimum malloc alignment>` would actually be
// sufficient. If alignment is unspecified, the two pointers are equal.
// An `alloca` is converted into a definition of a memref descriptor value and
// an llvm.alloca to allocate the underlying data buffer.
void rewrite(Operation *op, ArrayRef<Value> operands,
ConversionPatternRewriter &rewriter) const override {
MemRefType memRefType = getMemRefResultType(op);
auto loc = op->getLoc();
// Get actual sizes of the memref as values: static sizes are constant
// values and dynamic sizes are passed to 'alloc' as operands. In case of
// zero-dimensional memref, assume a scalar (size 1).
SmallVector<Value, 4> sizes;
this->getMemRefDescriptorSizes(loc, memRefType, operands, rewriter, sizes);
Value cumulativeSize = this->getCumulativeSizeInBytes(
loc, memRefType.getElementType(), sizes, rewriter);
// Allocate the underlying buffer.
Value allocatedPtr;
Value alignedPtr;
std::tie(allocatedPtr, alignedPtr) =
this->allocateBuffer(rewriter, loc, cumulativeSize, op);
int64_t offset;
SmallVector<int64_t, 4> strides;
auto successStrides = getStridesAndOffset(memRefType, strides, offset);
(void)successStrides;
assert(succeeded(successStrides) && "unexpected non-strided memref");
assert(offset != MemRefType::getDynamicStrideOrOffset() &&
"unexpected dynamic offset");
// 0-D memref corner case: they have size 1.
assert(
((memRefType.getRank() == 0 && strides.empty() && sizes.size() == 1) ||
(strides.size() == sizes.size())) &&
"unexpected number of strides");
// Create the MemRef descriptor.
auto memRefDescriptor =
this->createMemRefDescriptor(loc, memRefType, allocatedPtr, alignedPtr,
offset, strides, sizes, rewriter);
// Return the final value of the descriptor.
rewriter.replaceOp(op, {memRefDescriptor});
}
};
/// Creates and populates the memref descriptor struct given all its fields.
/// This method also performs any post allocation alignment needed for heap
/// allocations when `accessAlignment` is non null. This is used with
/// allocators that do not support alignment.
MemRefDescriptor createMemRefDescriptor(
Location loc, ConversionPatternRewriter &rewriter, MemRefType memRefType,
Value allocatedTypePtr, Value allocatedBytePtr, Value accessAlignment,
uint64_t offset, ArrayRef<int64_t> strides, ArrayRef<Value> sizes) const {
auto elementPtrType = this->getElementPtrType(memRefType);
auto structType = typeConverter.convertType(memRefType);
auto memRefDescriptor = MemRefDescriptor::undef(rewriter, loc, structType);
struct AllocOpLowering : public AllocLikeOpLowering {
AllocOpLowering(LLVMTypeConverter &converter)
: AllocLikeOpLowering(AllocOp::getOperationName(), converter) {}
// Field 1: Allocated pointer, used for malloc/free.
memRefDescriptor.setAllocatedPtr(rewriter, loc, allocatedTypePtr);
std::tuple<Value, Value> allocateBuffer(ConversionPatternRewriter &rewriter,
Location loc, Value cumulativeSize,
Operation *op) const override {
// Heap allocations.
AllocOp allocOp = cast<AllocOp>(op);
MemRefType memRefType = allocOp.getType();
// Field 2: Actual aligned pointer to payload.
Value alignedBytePtr = allocatedTypePtr;
if (accessAlignment) {
// offset = (align - (ptr % align))% align
Value intVal = rewriter.create<LLVM::PtrToIntOp>(
loc, this->getIndexType(), allocatedBytePtr);
Value offset = createBumpToAlign(loc, rewriter, intVal, accessAlignment);
Value aligned = rewriter.create<LLVM::GEPOp>(
loc, allocatedBytePtr.getType(), allocatedBytePtr, offset);
alignedBytePtr = rewriter.create<LLVM::BitcastOp>(
loc, elementPtrType, ArrayRef<Value>(aligned));
Value alignment;
if (auto alignmentAttr = allocOp.alignment()) {
alignment = createIndexConstant(rewriter, loc, *alignmentAttr);
} else if (!memRefType.getElementType().isSignlessIntOrIndexOrFloat()) {
// In the case where no alignment is specified, we may want to override
// `malloc's` behavior. `malloc` typically aligns at the size of the
// biggest scalar on a target HW. For non-scalars, use the natural
// alignment of the LLVM type given by the LLVM DataLayout.
alignment = getSizeInBytes(loc, memRefType.getElementType(), rewriter);
}
memRefDescriptor.setAlignedPtr(rewriter, loc, alignedBytePtr);
// Field 3: Offset in aligned pointer.
memRefDescriptor.setOffset(rewriter, loc,
createIndexConstant(rewriter, loc, offset));
if (memRefType.getRank() == 0)
// No size/stride descriptor in memref, return the descriptor value.
return memRefDescriptor;
// Fields 4 and 5: sizes and strides of the strided MemRef.
// Store all sizes in the descriptor. Only dynamic sizes are passed in as
// operands to AllocOp.
Value runningStride = nullptr;
// Iterate strides in reverse order, compute runningStride and strideValues.
auto nStrides = strides.size();
SmallVector<Value, 4> strideValues(nStrides, nullptr);
for (unsigned i = 0; i < nStrides; ++i) {
int64_t index = nStrides - 1 - i;
if (strides[index] == MemRefType::getDynamicStrideOrOffset())
// Identity layout map is enforced in the match function, so we compute:
// `runningStride *= sizes[index + 1]`
runningStride = runningStride
? rewriter.create<LLVM::MulOp>(loc, runningStride,
sizes[index + 1])
: createIndexConstant(rewriter, loc, 1);
else
runningStride = createIndexConstant(rewriter, loc, strides[index]);
strideValues[index] = runningStride;
if (alignment) {
// Adjust the allocation size to consider alignment.
cumulativeSize =
rewriter.create<LLVM::AddOp>(loc, cumulativeSize, alignment);
}
// Fill size and stride descriptors in memref.
for (auto indexedSize : llvm::enumerate(sizes)) {
int64_t index = indexedSize.index();
memRefDescriptor.setSize(rewriter, loc, index, indexedSize.value());
memRefDescriptor.setStride(rewriter, loc, index, strideValues[index]);
// Allocate the underlying buffer and store a pointer to it in the MemRef
// descriptor.
Type elementPtrType = this->getElementPtrType(memRefType);
Value allocatedPtr =
createAllocCall(loc, "malloc", elementPtrType, {cumulativeSize},
allocOp.getParentOfType<ModuleOp>(), rewriter);
Value alignedPtr = allocatedPtr;
if (alignment) {
auto intPtrType = getIntPtrType(memRefType.getMemorySpace());
// Compute the aligned type pointer.
Value allocatedInt =
rewriter.create<LLVM::PtrToIntOp>(loc, intPtrType, allocatedPtr);
Value alignmentInt =
createAligned(rewriter, loc, allocatedInt, alignment);
alignedPtr =
rewriter.create<LLVM::IntToPtrOp>(loc, elementPtrType, alignmentInt);
}
return memRefDescriptor;
return std::make_tuple(allocatedPtr, alignedPtr);
}
};
struct AlignedAllocOpLowering : public AllocLikeOpLowering {
AlignedAllocOpLowering(LLVMTypeConverter &converter)
: AllocLikeOpLowering(AllocOp::getOperationName(), converter) {}
/// Returns the memref's element size in bytes.
// TODO: there are other places where this is used. Expose publicly?
@@ -1834,25 +1976,6 @@ struct AllocLikeOpLowering : public ConvertOpToLLVMPattern<AllocLikeOp> {
return llvm::divideCeil(sizeInBits, 8);
}
/// Returns the alignment to be used for the allocation call itself.
/// aligned_alloc requires the allocation size to be a power of two, and the
/// allocation size to be a multiple of alignment,
Optional<int64_t> getAllocationAlignment(AllocOp allocOp) const {
// No alignment can be used for the 'malloc' call itself.
if (!typeConverter.getOptions().useAlignedAlloc)
return None;
if (Optional<uint64_t> alignment = allocOp.alignment())
return *alignment;
// Whenever we don't have alignment set, we will use an alignment
// consistent with the element type; since the allocation size has to be a
// power of two, we will bump to the next power of two if it already isn't.
auto eltSizeBytes = getMemRefEltSizeInBytes(allocOp.getType());
return std::max(kMinAlignedAllocAlignment,
llvm::PowerOf2Ceil(eltSizeBytes));
}
/// Returns true if the memref size in bytes is known to be a multiple of
/// factor.
static bool isMemRefSizeMultipleOf(MemRefType type, uint64_t factor) {
@@ -1865,171 +1988,72 @@ struct AllocLikeOpLowering : public ConvertOpToLLVMPattern<AllocLikeOp> {
return sizeDivisor % factor == 0;
}
/// Returns the alignment to be used for the allocation call itself.
/// aligned_alloc requires the allocation size to be a power of two, and the
/// allocation size to be a multiple of alignment,
int64_t getAllocationAlignment(AllocOp allocOp) const {
if (Optional<uint64_t> alignment = allocOp.alignment())
return *alignment;
// Whenever we don't have alignment set, we will use an alignment
// consistent with the element type; since the allocation size has to be a
// power of two, we will bump to the next power of two if it already isn't.
auto eltSizeBytes = getMemRefEltSizeInBytes(allocOp.getType());
return std::max(kMinAlignedAllocAlignment,
llvm::PowerOf2Ceil(eltSizeBytes));
}
std::tuple<Value, Value> allocateBuffer(ConversionPatternRewriter &rewriter,
Location loc, Value cumulativeSize,
Operation *op) const override {
// Heap allocations.
AllocOp allocOp = cast<AllocOp>(op);
MemRefType memRefType = allocOp.getType();
int64_t alignment = getAllocationAlignment(allocOp);
Value allocAlignment = createIndexConstant(rewriter, loc, alignment);
// aligned_alloc requires size to be a multiple of alignment; we will pad
// the size to the next multiple if necessary.
if (!isMemRefSizeMultipleOf(memRefType, alignment))
cumulativeSize =
createAligned(rewriter, loc, cumulativeSize, allocAlignment);
Type elementPtrType = this->getElementPtrType(memRefType);
Value allocatedPtr = createAllocCall(
loc, "aligned_alloc", elementPtrType, {allocAlignment, cumulativeSize},
allocOp.getParentOfType<ModuleOp>(), rewriter);
return std::make_tuple(allocatedPtr, allocatedPtr);
}
/// The minimum alignment to use with aligned_alloc (has to be a power of 2).
static constexpr uint64_t kMinAlignedAllocAlignment = 16UL;
};
struct AllocaOpLowering : public AllocLikeOpLowering {
AllocaOpLowering(LLVMTypeConverter &converter)
: AllocLikeOpLowering(AllocaOp::getOperationName(), converter) {}
/// Allocates the underlying buffer using the right call. `allocatedBytePtr`
/// is set to null for stack allocations. `accessAlignment` is set if
/// alignment is needed post allocation (for eg. in conjunction with malloc).
Value allocateBuffer(Location loc, Value cumulativeSize, Operation *op,
MemRefType memRefType, Value one, Value &accessAlignment,
Value &allocatedBytePtr,
ConversionPatternRewriter &rewriter) const {
auto elementPtrType = this->getElementPtrType(memRefType);
std::tuple<Value, Value> allocateBuffer(ConversionPatternRewriter &rewriter,
Location loc, Value cumulativeSize,
Operation *op) const override {
// With alloca, one gets a pointer to the element type right away.
// For stack allocations.
if (auto allocaOp = dyn_cast<AllocaOp>(op)) {
allocatedBytePtr = nullptr;
accessAlignment = nullptr;
return rewriter.create<LLVM::AllocaOp>(
loc, elementPtrType, cumulativeSize,
allocaOp.alignment() ? *allocaOp.alignment() : 0);
}
auto allocaOp = cast<AllocaOp>(op);
auto elementPtrType = this->getElementPtrType(allocaOp.getType());
// Heap allocations.
AllocOp allocOp = cast<AllocOp>(op);
auto allocatedElementPtr = rewriter.create<LLVM::AllocaOp>(
loc, elementPtrType, cumulativeSize,
allocaOp.alignment() ? *allocaOp.alignment() : 0);
Optional<int64_t> allocationAlignment = getAllocationAlignment(allocOp);
// Whether to use std lib function aligned_alloc that supports alignment.
bool useAlignedAlloc = allocationAlignment.hasValue();
// Insert the malloc/aligned_alloc declaration if it is not already present.
const auto *allocFuncName = useAlignedAlloc ? "aligned_alloc" : "malloc";
auto module = allocOp.getParentOfType<ModuleOp>();
auto allocFunc = module.lookupSymbol<LLVM::LLVMFuncOp>(allocFuncName);
if (!allocFunc) {
OpBuilder::InsertionGuard guard(rewriter);
rewriter.setInsertionPointToStart(
op->getParentOfType<ModuleOp>().getBody());
SmallVector<LLVM::LLVMType, 2> callArgTypes = {getIndexType()};
// aligned_alloc(size_t alignment, size_t size)
if (useAlignedAlloc)
callArgTypes.push_back(getIndexType());
allocFunc = rewriter.create<LLVM::LLVMFuncOp>(
rewriter.getUnknownLoc(), allocFuncName,
LLVM::LLVMType::getFunctionTy(getVoidPtrType(), callArgTypes,
/*isVarArg=*/false));
}
// Allocate the underlying buffer and store a pointer to it in the MemRef
// descriptor.
SmallVector<Value, 2> callArgs;
if (useAlignedAlloc) {
// Use aligned_alloc.
assert(allocationAlignment && "allocation alignment should be present");
auto alignedAllocAlignmentValue = rewriter.create<LLVM::ConstantOp>(
loc, typeConverter.convertType(rewriter.getIntegerType(64)),
rewriter.getI64IntegerAttr(allocationAlignment.getValue()));
// aligned_alloc requires size to be a multiple of alignment; we will pad
// the size to the next multiple if necessary.
if (!isMemRefSizeMultipleOf(memRefType, allocationAlignment.getValue())) {
Value bump = createBumpToAlign(loc, rewriter, cumulativeSize,
alignedAllocAlignmentValue);
cumulativeSize =
rewriter.create<LLVM::AddOp>(loc, cumulativeSize, bump);
}
callArgs = {alignedAllocAlignmentValue, cumulativeSize};
} else {
// Adjust the allocation size to consider alignment.
if (Optional<uint64_t> alignment = allocOp.alignment()) {
accessAlignment = createIndexConstant(rewriter, loc, *alignment);
} else if (!memRefType.getElementType().isSignlessIntOrIndexOrFloat()) {
// In the case where no alignment is specified, we may want to override
// `malloc's` behavior. `malloc` typically aligns at the size of the
// biggest scalar on a target HW. For non-scalars, use the natural
// alignment of the LLVM type given by the LLVM DataLayout.
accessAlignment =
this->getSizeInBytes(loc, memRefType.getElementType(), rewriter);
}
if (accessAlignment)
cumulativeSize =
rewriter.create<LLVM::AddOp>(loc, cumulativeSize, accessAlignment);
callArgs.push_back(cumulativeSize);
}
auto allocFuncSymbol = rewriter.getSymbolRefAttr(allocFunc);
allocatedBytePtr = rewriter
.create<LLVM::CallOp>(loc, getVoidPtrType(),
allocFuncSymbol, callArgs)
.getResult(0);
// For heap allocations, the allocated pointer is a cast of the byte pointer
// to the type pointer.
return rewriter.create<LLVM::BitcastOp>(loc, elementPtrType,
allocatedBytePtr);
return std::make_tuple(allocatedElementPtr, allocatedElementPtr);
}
// An `alloc` is converted into a definition of a memref descriptor value and
// a call to `malloc` to allocate the underlying data buffer. The memref
// descriptor is of the LLVM structure type where:
// 1. the first element is a pointer to the allocated (typed) data buffer,
// 2. the second element is a pointer to the (typed) payload, aligned to the
// specified alignment,
// 3. the remaining elements serve to store all the sizes and strides of the
// memref using LLVM-converted `index` type.
//
// Alignment is performed by allocating `alignment - 1` more bytes than
// requested and shifting the aligned pointer relative to the allocated
// memory. If alignment is unspecified, the two pointers are equal.
// An `alloca` is converted into a definition of a memref descriptor value and
// an llvm.alloca to allocate the underlying data buffer.
void rewrite(Operation *op, ArrayRef<Value> operands,
ConversionPatternRewriter &rewriter) const override {
MemRefType memRefType = cast<AllocLikeOp>(op).getType();
auto loc = op->getLoc();
// Get actual sizes of the memref as values: static sizes are constant
// values and dynamic sizes are passed to 'alloc' as operands. In case of
// zero-dimensional memref, assume a scalar (size 1).
SmallVector<Value, 4> sizes;
this->getMemRefDescriptorSizes(loc, memRefType, operands, rewriter, sizes);
Value cumulativeSize = this->getCumulativeSizeInBytes(
loc, memRefType.getElementType(), sizes, rewriter);
// Allocate the underlying buffer.
// Value holding the alignment that has to be performed post allocation
// (in conjunction with allocators that do not support alignment, eg.
// malloc); nullptr if no such adjustment needs to be performed.
Value accessAlignment;
// Byte pointer to the allocated buffer.
Value allocatedBytePtr;
Value allocatedTypePtr =
allocateBuffer(loc, cumulativeSize, op, memRefType,
createIndexConstant(rewriter, loc, 1), accessAlignment,
allocatedBytePtr, rewriter);
int64_t offset;
SmallVector<int64_t, 4> strides;
auto successStrides = getStridesAndOffset(memRefType, strides, offset);
(void)successStrides;
assert(succeeded(successStrides) && "unexpected non-strided memref");
assert(offset != MemRefType::getDynamicStrideOrOffset() &&
"unexpected dynamic offset");
// 0-D memref corner case: they have size 1.
assert(
((memRefType.getRank() == 0 && strides.empty() && sizes.size() == 1) ||
(strides.size() == sizes.size())) &&
"unexpected number of strides");
// Create the MemRef descriptor.
auto memRefDescriptor = createMemRefDescriptor(
loc, rewriter, memRefType, allocatedTypePtr, allocatedBytePtr,
accessAlignment, offset, strides, sizes);
// Return the final value of the descriptor.
rewriter.replaceOp(op, {memRefDescriptor});
}
protected:
/// The minimum alignment to use with aligned_alloc (has to be a power of 2).
uint64_t kMinAlignedAllocAlignment = 16UL;
};
struct AllocOpLowering : public AllocLikeOpLowering<AllocOp> {
explicit AllocOpLowering(LLVMTypeConverter &converter)
: AllocLikeOpLowering<AllocOp>(converter) {}
};
using AllocaOpLowering = AllocLikeOpLowering<AllocaOp>;
/// Copies the shaped descriptor part to (if `toDynamic` is set) or from
/// (otherwise) the dynamically allocated memory for any operands that were
/// unranked descriptors originally.
@@ -3200,12 +3224,13 @@ struct AssumeAlignmentOpLowering
// This relies on LLVM's CSE optimization (potentially after SROA), since
// after CSE all memref.alignedPtr instances get de-duplicated into the same
// pointer SSA value.
Value zero =
createIndexAttrConstant(rewriter, op->getLoc(), getIndexType(), 0);
Value mask = createIndexAttrConstant(rewriter, op->getLoc(), getIndexType(),
auto intPtrType =
getIntPtrType(memRefDescriptor.getElementPtrType().getAddressSpace());
Value zero = createIndexAttrConstant(rewriter, op->getLoc(), intPtrType, 0);
Value mask = createIndexAttrConstant(rewriter, op->getLoc(), intPtrType,
alignment - 1);
Value ptrValue =
rewriter.create<LLVM::PtrToIntOp>(op->getLoc(), getIndexType(), ptr);
rewriter.create<LLVM::PtrToIntOp>(op->getLoc(), intPtrType, ptr);
rewriter.create<LLVM::AssumeOp>(
op->getLoc(),
rewriter.create<LLVM::ICmpOp>(
@@ -3477,9 +3502,12 @@ void mlir::populateStdToLLVMMemoryConversionPatterns(
StoreOpLowering,
SubViewOpLowering,
TransposeOpLowering,
ViewOpLowering,
AllocOpLowering>(converter);
ViewOpLowering>(converter);
// clang-format on
if (converter.getOptions().useAlignedAlloc)
patterns.insert<AlignedAllocOpLowering>(converter);
else
patterns.insert<AllocOpLowering>(converter);
}
void mlir::populateStdToLLVMFuncOpConversionPattern(

View File

@@ -36,7 +36,6 @@ func @mixed_alloc(%arg0: index, %arg1: index) -> memref<?x42x?xf32> {
// CHECK-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm.ptr<float>, !llvm.i64) -> !llvm.ptr<float>
// CHECK-NEXT: %[[sizeof:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr<float> to !llvm.i64
// CHECK-NEXT: %[[sz_bytes:.*]] = llvm.mul %[[sz]], %[[sizeof]] : !llvm.i64
// CHECK-NEXT: %[[one_1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64
// CHECK-NEXT: llvm.call @malloc(%[[sz_bytes]]) : (!llvm.i64) -> !llvm.ptr<i8>
// CHECK-NEXT: llvm.bitcast %{{.*}} : !llvm.ptr<i8> to !llvm.ptr<float>
// CHECK-NEXT: llvm.mlir.undef : !llvm.struct<(ptr<float>, ptr<float>, i64, array<3 x i64>, array<3 x i64>)>
@@ -77,7 +76,6 @@ func @dynamic_alloc(%arg0: index, %arg1: index) -> memref<?x?xf32> {
// CHECK-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm.ptr<float>, !llvm.i64) -> !llvm.ptr<float>
// CHECK-NEXT: %[[sizeof:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr<float> to !llvm.i64
// CHECK-NEXT: %[[sz_bytes:.*]] = llvm.mul %[[sz]], %[[sizeof]] : !llvm.i64
// CHECK-NEXT: %[[one_1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64
// CHECK-NEXT: llvm.call @malloc(%[[sz_bytes]]) : (!llvm.i64) -> !llvm.ptr<i8>
// CHECK-NEXT: llvm.bitcast %{{.*}} : !llvm.ptr<i8> to !llvm.ptr<float>
// CHECK-NEXT: llvm.mlir.undef : !llvm.struct<(ptr<float>, ptr<float>, i64, array<2 x i64>, array<2 x i64>)>
@@ -107,7 +105,6 @@ func @dynamic_alloca(%arg0: index, %arg1: index) -> memref<?x?xf32> {
// CHECK-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm.ptr<float>, !llvm.i64) -> !llvm.ptr<float>
// CHECK-NEXT: %[[sizeof:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr<float> to !llvm.i64
// CHECK-NEXT: %[[sz_bytes:.*]] = llvm.mul %[[num_elems]], %[[sizeof]] : !llvm.i64
// CHECK-NEXT: %[[one_1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64
// CHECK-NEXT: %[[allocated:.*]] = llvm.alloca %[[sz_bytes]] x !llvm.float : (!llvm.i64) -> !llvm.ptr<float>
// CHECK-NEXT: llvm.mlir.undef : !llvm.struct<(ptr<float>, ptr<float>, i64, array<2 x i64>, array<2 x i64>)>
// CHECK-NEXT: llvm.insertvalue %[[allocated]], %{{.*}}[0] : !llvm.struct<(ptr<float>, ptr<float>, i64, array<2 x i64>, array<2 x i64>)>
@@ -153,8 +150,7 @@ func @stdlib_aligned_alloc(%N : index) -> memref<32x18xf32> {
// ALIGNED-ALLOC-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm.ptr<float>, !llvm.i64) -> !llvm.ptr<float>
// ALIGNED-ALLOC-NEXT: %[[sizeof:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr<float> to !llvm.i64
// ALIGNED-ALLOC-NEXT: %[[bytes:.*]] = llvm.mul %[[num_elems]], %[[sizeof]] : !llvm.i64
// ALIGNED-ALLOC-NEXT: %[[one_1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64
// ALIGNED-ALLOC-NEXT: %[[alignment:.*]] = llvm.mlir.constant(32 : i64) : !llvm.i64
// ALIGNED-ALLOC-NEXT: %[[alignment:.*]] = llvm.mlir.constant(32 : index) : !llvm.i64
// ALIGNED-ALLOC-NEXT: %[[allocated:.*]] = llvm.call @aligned_alloc(%[[alignment]], %[[bytes]]) : (!llvm.i64, !llvm.i64) -> !llvm.ptr<i8>
// ALIGNED-ALLOC-NEXT: llvm.bitcast %[[allocated]] : !llvm.ptr<i8> to !llvm.ptr<float>
%0 = alloc() {alignment = 32} : memref<32x18xf32>
@@ -164,26 +160,27 @@ func @stdlib_aligned_alloc(%N : index) -> memref<32x18xf32> {
%1 = alloc() {alignment = 64} : memref<4096xf32>
// Alignment is to element type boundaries (minimum 16 bytes).
// ALIGNED-ALLOC: %[[c32:.*]] = llvm.mlir.constant(32 : i64) : !llvm.i64
// ALIGNED-ALLOC: %[[c32:.*]] = llvm.mlir.constant(32 : index) : !llvm.i64
// ALIGNED-ALLOC-NEXT: llvm.call @aligned_alloc(%[[c32]]
%2 = alloc() : memref<4096xvector<8xf32>>
// The minimum alignment is 16 bytes unless explicitly specified.
// ALIGNED-ALLOC: %[[c16:.*]] = llvm.mlir.constant(16 : i64) : !llvm.i64
// ALIGNED-ALLOC: %[[c16:.*]] = llvm.mlir.constant(16 : index) : !llvm.i64
// ALIGNED-ALLOC-NEXT: llvm.call @aligned_alloc(%[[c16]],
%3 = alloc() : memref<4096xvector<2xf32>>
// ALIGNED-ALLOC: %[[c8:.*]] = llvm.mlir.constant(8 : i64) : !llvm.i64
// ALIGNED-ALLOC: %[[c8:.*]] = llvm.mlir.constant(8 : index) : !llvm.i64
// ALIGNED-ALLOC-NEXT: llvm.call @aligned_alloc(%[[c8]],
%4 = alloc() {alignment = 8} : memref<1024xvector<4xf32>>
// Bump the memref allocation size if its size is not a multiple of alignment.
// ALIGNED-ALLOC: %[[c32:.*]] = llvm.mlir.constant(32 : i64) : !llvm.i64
// ALIGNED-ALLOC-NEXT: llvm.urem
// ALIGNED-ALLOC: %[[c32:.*]] = llvm.mlir.constant(32 : index) : !llvm.i64
// ALIGNED-ALLOC-NEXT: llvm.mlir.constant(1 : index) : !llvm.i64
// ALIGNED-ALLOC-NEXT: llvm.sub
// ALIGNED-ALLOC-NEXT: llvm.add
// ALIGNED-ALLOC-NEXT: llvm.urem
// ALIGNED-ALLOC-NEXT: %[[SIZE_ALIGNED:.*]] = llvm.add
// ALIGNED-ALLOC-NEXT: %[[SIZE_ALIGNED:.*]] = llvm.sub
// ALIGNED-ALLOC-NEXT: llvm.call @aligned_alloc(%[[c32]], %[[SIZE_ALIGNED]])
%5 = alloc() {alignment = 32} : memref<100xf32>
// Bump alignment to the next power of two if it isn't.
// ALIGNED-ALLOC: %[[c128:.*]] = llvm.mlir.constant(128 : i64) : !llvm.i64
// ALIGNED-ALLOC: %[[c128:.*]] = llvm.mlir.constant(128 : index) : !llvm.i64
// ALIGNED-ALLOC: llvm.call @aligned_alloc(%[[c128]]
%6 = alloc(%N) : memref<?xvector<18xf32>>
return %0 : memref<32x18xf32>

View File

@@ -76,7 +76,6 @@ func @zero_d_alloc() -> memref<f32> {
// CHECK-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm.ptr<float>, !llvm.i64) -> !llvm.ptr<float>
// CHECK-NEXT: %[[sizeof:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr<float> to !llvm.i64
// CHECK-NEXT: llvm.mul %{{.*}}, %[[sizeof]] : !llvm.i64
// CHECK-NEXT: %[[one_1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64
// CHECK-NEXT: llvm.call @malloc(%{{.*}}) : (!llvm.i64) -> !llvm.ptr<i8>
// CHECK-NEXT: %[[ptr:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<i8> to !llvm.ptr<float>
// CHECK-NEXT: llvm.mlir.undef : !llvm.struct<(ptr<float>, ptr<float>, i64)>
@@ -91,7 +90,6 @@ func @zero_d_alloc() -> memref<f32> {
// BAREPTR-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm.ptr<float>, !llvm.i64) -> !llvm.ptr<float>
// BAREPTR-NEXT: %[[sizeof:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr<float> to !llvm.i64
// BAREPTR-NEXT: llvm.mul %{{.*}}, %[[sizeof]] : !llvm.i64
// BAREPTR-NEXT: %[[one_1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64
// BAREPTR-NEXT: llvm.call @malloc(%{{.*}}) : (!llvm.i64) -> !llvm.ptr<i8>
// BAREPTR-NEXT: %[[ptr:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<i8> to !llvm.ptr<float>
// BAREPTR-NEXT: llvm.mlir.undef : !llvm.struct<(ptr<float>, ptr<float>, i64)>
@@ -130,19 +128,19 @@ func @aligned_1d_alloc() -> memref<42xf32> {
// CHECK-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm.ptr<float>, !llvm.i64) -> !llvm.ptr<float>
// CHECK-NEXT: %[[sizeof:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr<float> to !llvm.i64
// CHECK-NEXT: llvm.mul %{{.*}}, %[[sizeof]] : !llvm.i64
// CHECK-NEXT: %[[one_1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64
// CHECK-NEXT: %[[alignment:.*]] = llvm.mlir.constant(8 : index) : !llvm.i64
// CHECK-NEXT: %[[allocsize:.*]] = llvm.add {{.*}}, %[[alignment]] : !llvm.i64
// CHECK-NEXT: %[[allocated:.*]] = llvm.call @malloc(%[[allocsize]]) : (!llvm.i64) -> !llvm.ptr<i8>
// CHECK-NEXT: %[[ptr:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<i8> to !llvm.ptr<float>
// CHECK-NEXT: %[[allocatedAsInt:.*]] = llvm.ptrtoint %[[ptr]] : !llvm.ptr<float> to !llvm.i64
// CHECK-NEXT: %[[one_1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64
// CHECK-NEXT: %[[bump:.*]] = llvm.sub %[[alignment]], %[[one_1]] : !llvm.i64
// CHECK-NEXT: %[[bumped:.*]] = llvm.add %[[allocatedAsInt]], %[[bump]] : !llvm.i64
// CHECK-NEXT: %[[mod:.*]] = llvm.urem %[[bumped]], %[[alignment]] : !llvm.i64
// CHECK-NEXT: %[[aligned:.*]] = llvm.sub %[[bumped]], %[[mod]] : !llvm.i64
// CHECK-NEXT: %[[alignedBitCast:.*]] = llvm.inttoptr %[[aligned]] : !llvm.i64 to !llvm.ptr<float>
// CHECK-NEXT: llvm.mlir.undef : !llvm.struct<(ptr<float>, ptr<float>, i64, array<1 x i64>, array<1 x i64>)>
// CHECK-NEXT: llvm.insertvalue %[[ptr]], %{{.*}}[0] : !llvm.struct<(ptr<float>, ptr<float>, i64, array<1 x i64>, array<1 x i64>)>
// CHECK-NEXT: %[[allocatedAsInt:.*]] = llvm.ptrtoint %[[allocated]] : !llvm.ptr<i8> to !llvm.i64
// CHECK-NEXT: %[[alignAdj1:.*]] = llvm.urem %[[allocatedAsInt]], %[[alignment]] : !llvm.i64
// CHECK-NEXT: %[[alignAdj2:.*]] = llvm.sub %[[alignment]], %[[alignAdj1]] : !llvm.i64
// CHECK-NEXT: %[[alignAdj3:.*]] = llvm.urem %[[alignAdj2]], %[[alignment]] : !llvm.i64
// CHECK-NEXT: %[[aligned:.*]] = llvm.getelementptr %[[allocated]][%[[alignAdj3]]] : (!llvm.ptr<i8>, !llvm.i64) -> !llvm.ptr<i8>
// CHECK-NEXT: %[[alignedBitCast:.*]] = llvm.bitcast %[[aligned]] : !llvm.ptr<i8> to !llvm.ptr<float>
// CHECK-NEXT: llvm.insertvalue %[[alignedBitCast]], %{{.*}}[1] : !llvm.struct<(ptr<float>, ptr<float>, i64, array<1 x i64>, array<1 x i64>)>
// CHECK-NEXT: %[[c0:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64
// CHECK-NEXT: llvm.insertvalue %[[c0]], %{{.*}}[2] : !llvm.struct<(ptr<float>, ptr<float>, i64, array<1 x i64>, array<1 x i64>)>
@@ -153,19 +151,19 @@ func @aligned_1d_alloc() -> memref<42xf32> {
// BAREPTR-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm.ptr<float>, !llvm.i64) -> !llvm.ptr<float>
// BAREPTR-NEXT: %[[sizeof:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr<float> to !llvm.i64
// BAREPTR-NEXT: llvm.mul %{{.*}}, %[[sizeof]] : !llvm.i64
// BAREPTR-NEXT: %[[one_1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64
// BAREPTR-NEXT: %[[alignment:.*]] = llvm.mlir.constant(8 : index) : !llvm.i64
// BAREPTR-NEXT: %[[allocsize:.*]] = llvm.add {{.*}}, %[[alignment]] : !llvm.i64
// BAREPTR-NEXT: %[[allocated:.*]] = llvm.call @malloc(%[[allocsize]]) : (!llvm.i64) -> !llvm.ptr<i8>
// BAREPTR-NEXT: %[[ptr:.*]] = llvm.bitcast %{{.*}} : !llvm.ptr<i8> to !llvm.ptr<float>
// BAREPTR-NEXT: %[[allocatedAsInt:.*]] = llvm.ptrtoint %[[ptr]] : !llvm.ptr<float> to !llvm.i64
// BAREPTR-NEXT: %[[one_2:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64
// BAREPTR-NEXT: %[[bump:.*]] = llvm.sub %[[alignment]], %[[one_2]] : !llvm.i64
// BAREPTR-NEXT: %[[bumped:.*]] = llvm.add %[[allocatedAsInt]], %[[bump]] : !llvm.i64
// BAREPTR-NEXT: %[[mod:.*]] = llvm.urem %[[bumped]], %[[alignment]] : !llvm.i64
// BAREPTR-NEXT: %[[aligned:.*]] = llvm.sub %[[bumped]], %[[mod]] : !llvm.i64
// BAREPTR-NEXT: %[[alignedBitCast:.*]] = llvm.inttoptr %[[aligned]] : !llvm.i64 to !llvm.ptr<float>
// BAREPTR-NEXT: llvm.mlir.undef : !llvm.struct<(ptr<float>, ptr<float>, i64, array<1 x i64>, array<1 x i64>)>
// BAREPTR-NEXT: llvm.insertvalue %[[ptr]], %{{.*}}[0] : !llvm.struct<(ptr<float>, ptr<float>, i64, array<1 x i64>, array<1 x i64>)>
// BAREPTR-NEXT: %[[allocatedAsInt:.*]] = llvm.ptrtoint %[[allocated]] : !llvm.ptr<i8> to !llvm.i64
// BAREPTR-NEXT: %[[alignAdj1:.*]] = llvm.urem %[[allocatedAsInt]], %[[alignment]] : !llvm.i64
// BAREPTR-NEXT: %[[alignAdj2:.*]] = llvm.sub %[[alignment]], %[[alignAdj1]] : !llvm.i64
// BAREPTR-NEXT: %[[alignAdj3:.*]] = llvm.urem %[[alignAdj2]], %[[alignment]] : !llvm.i64
// BAREPTR-NEXT: %[[aligned:.*]] = llvm.getelementptr %[[allocated]][%[[alignAdj3]]] : (!llvm.ptr<i8>, !llvm.i64) -> !llvm.ptr<i8>
// BAREPTR-NEXT: %[[alignedBitCast:.*]] = llvm.bitcast %[[aligned]] : !llvm.ptr<i8> to !llvm.ptr<float>
// BAREPTR-NEXT: llvm.insertvalue %[[alignedBitCast]], %{{.*}}[1] : !llvm.struct<(ptr<float>, ptr<float>, i64, array<1 x i64>, array<1 x i64>)>
// BAREPTR-NEXT: %[[c0:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64
// BAREPTR-NEXT: llvm.insertvalue %[[c0]], %{{.*}}[2] : !llvm.struct<(ptr<float>, ptr<float>, i64, array<1 x i64>, array<1 x i64>)>
@@ -186,7 +184,6 @@ func @static_alloc() -> memref<32x18xf32> {
// CHECK-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm.ptr<float>, !llvm.i64) -> !llvm.ptr<float>
// CHECK-NEXT: %[[sizeof:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr<float> to !llvm.i64
// CHECK-NEXT: %[[bytes:.*]] = llvm.mul %[[num_elems]], %[[sizeof]] : !llvm.i64
// CHECK-NEXT: %[[one_1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64
// CHECK-NEXT: %[[allocated:.*]] = llvm.call @malloc(%[[bytes]]) : (!llvm.i64) -> !llvm.ptr<i8>
// CHECK-NEXT: llvm.bitcast %[[allocated]] : !llvm.ptr<i8> to !llvm.ptr<float>
@@ -198,7 +195,6 @@ func @static_alloc() -> memref<32x18xf32> {
// BAREPTR-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm.ptr<float>, !llvm.i64) -> !llvm.ptr<float>
// BAREPTR-NEXT: %[[sizeof:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr<float> to !llvm.i64
// BAREPTR-NEXT: %[[bytes:.*]] = llvm.mul %[[num_elems]], %[[sizeof]] : !llvm.i64
// BAREPTR-NEXT: %[[one_1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64
// BAREPTR-NEXT: %[[allocated:.*]] = llvm.call @malloc(%[[bytes]]) : (!llvm.i64) -> !llvm.ptr<i8>
// BAREPTR-NEXT: llvm.bitcast %[[allocated]] : !llvm.ptr<i8> to !llvm.ptr<float>
%0 = alloc() : memref<32x18xf32>
@@ -217,7 +213,6 @@ func @static_alloca() -> memref<32x18xf32> {
// CHECK-NEXT: %[[gep:.*]] = llvm.getelementptr %[[null]][%[[one]]] : (!llvm.ptr<float>, !llvm.i64) -> !llvm.ptr<float>
// CHECK-NEXT: %[[sizeof:.*]] = llvm.ptrtoint %[[gep]] : !llvm.ptr<float> to !llvm.i64
// CHECK-NEXT: %[[bytes:.*]] = llvm.mul %[[num_elems]], %[[sizeof]] : !llvm.i64
// CHECK-NEXT: %[[one_1:.*]] = llvm.mlir.constant(1 : index) : !llvm.i64
// CHECK-NEXT: %[[allocated:.*]] = llvm.alloca %[[bytes]] x !llvm.float : (!llvm.i64) -> !llvm.ptr<float>
%0 = alloca() : memref<32x18xf32>