mirror of
https://github.com/intel/llvm.git
synced 2026-01-24 17:01:00 +08:00
[AMDGPU] Deduce attributes with the Attributor
This patch introduces a pass that uses the Attributor to deduce AMDGPU specific attributes. Reviewed By: jdoerfert, arsenm Differential Revision: https://reviews.llvm.org/D104997
This commit is contained in:
@@ -1593,6 +1593,13 @@ public:
|
||||
bool CheckBBLivenessOnly = false,
|
||||
DepClassTy DepClass = DepClassTy::OPTIONAL);
|
||||
|
||||
/// Return true if \p BB is assumed dead.
|
||||
///
|
||||
/// If \p LivenessAA is not provided it is queried.
|
||||
bool isAssumedDead(const BasicBlock &BB, const AbstractAttribute *QueryingAA,
|
||||
const AAIsDead *FnLivenessAA,
|
||||
DepClassTy DepClass = DepClassTy::OPTIONAL);
|
||||
|
||||
/// Check \p Pred on all (transitive) uses of \p V.
|
||||
///
|
||||
/// This method will evaluate \p Pred on all (transitive) uses of the
|
||||
@@ -2470,7 +2477,8 @@ struct IntegerRangeState : public AbstractState {
|
||||
/// IRAttribute::manifest is defined in the Attributor.cpp.
|
||||
struct IRAttributeManifest {
|
||||
static ChangeStatus manifestAttrs(Attributor &A, const IRPosition &IRP,
|
||||
const ArrayRef<Attribute> &DeducedAttrs);
|
||||
const ArrayRef<Attribute> &DeducedAttrs,
|
||||
bool ForceReplace = false);
|
||||
};
|
||||
|
||||
/// Helper to tie a abstract state implementation to an abstract attribute.
|
||||
@@ -2696,6 +2704,17 @@ struct AttributorCGSCCPass : public PassInfoMixin<AttributorCGSCCPass> {
|
||||
Pass *createAttributorLegacyPass();
|
||||
Pass *createAttributorCGSCCLegacyPass();
|
||||
|
||||
/// Helper function to clamp a state \p S of type \p StateType with the
|
||||
/// information in \p R and indicate/return if \p S did change (as-in update is
|
||||
/// required to be run again).
|
||||
template <typename StateType>
|
||||
ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R) {
|
||||
auto Assumed = S.getAssumed();
|
||||
S ^= R;
|
||||
return Assumed == S.getAssumed() ? ChangeStatus::UNCHANGED
|
||||
: ChangeStatus::CHANGED;
|
||||
}
|
||||
|
||||
/// ----------------------------------------------------------------------------
|
||||
/// Abstract Attribute Classes
|
||||
/// ----------------------------------------------------------------------------
|
||||
|
||||
@@ -97,6 +97,8 @@ extern char &AMDGPUMachineCFGStructurizerID;
|
||||
void initializeAMDGPUAlwaysInlinePass(PassRegistry&);
|
||||
|
||||
Pass *createAMDGPUAnnotateKernelFeaturesPass();
|
||||
Pass *createAMDGPUAttributorPass();
|
||||
void initializeAMDGPUAttributorPass(PassRegistry &);
|
||||
void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &);
|
||||
extern char &AMDGPUAnnotateKernelFeaturesID;
|
||||
|
||||
|
||||
529
llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
Normal file
529
llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
Normal file
@@ -0,0 +1,529 @@
|
||||
//===- AMDGPUAttributor.cpp -----------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
/// \file This pass uses Attributor framework to deduce AMDGPU attributes.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "AMDGPU.h"
|
||||
#include "GCNSubtarget.h"
|
||||
#include "llvm/CodeGen/TargetPassConfig.h"
|
||||
#include "llvm/IR/IntrinsicsAMDGPU.h"
|
||||
#include "llvm/IR/IntrinsicsR600.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
#include "llvm/Transforms/IPO/Attributor.h"
|
||||
|
||||
#define DEBUG_TYPE "amdgpu-attributor"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
static constexpr StringLiteral ImplicitAttrNames[] = {
|
||||
// X ids unnecessarily propagated to kernels.
|
||||
"amdgpu-work-item-id-x", "amdgpu-work-item-id-y",
|
||||
"amdgpu-work-item-id-z", "amdgpu-work-group-id-x",
|
||||
"amdgpu-work-group-id-y", "amdgpu-work-group-id-z",
|
||||
"amdgpu-dispatch-ptr", "amdgpu-dispatch-id",
|
||||
"amdgpu-queue-ptr", "amdgpu-implicitarg-ptr"};
|
||||
|
||||
// We do not need to note the x workitem or workgroup id because they are always
|
||||
// initialized.
|
||||
//
|
||||
// TODO: We should not add the attributes if the known compile time workgroup
|
||||
// size is 1 for y/z.
|
||||
static StringRef intrinsicToAttrName(Intrinsic::ID ID, bool &NonKernelOnly,
|
||||
bool &IsQueuePtr) {
|
||||
switch (ID) {
|
||||
case Intrinsic::amdgcn_workitem_id_x:
|
||||
NonKernelOnly = true;
|
||||
return "amdgpu-work-item-id-x";
|
||||
case Intrinsic::amdgcn_workgroup_id_x:
|
||||
NonKernelOnly = true;
|
||||
return "amdgpu-work-group-id-x";
|
||||
case Intrinsic::amdgcn_workitem_id_y:
|
||||
case Intrinsic::r600_read_tidig_y:
|
||||
return "amdgpu-work-item-id-y";
|
||||
case Intrinsic::amdgcn_workitem_id_z:
|
||||
case Intrinsic::r600_read_tidig_z:
|
||||
return "amdgpu-work-item-id-z";
|
||||
case Intrinsic::amdgcn_workgroup_id_y:
|
||||
case Intrinsic::r600_read_tgid_y:
|
||||
return "amdgpu-work-group-id-y";
|
||||
case Intrinsic::amdgcn_workgroup_id_z:
|
||||
case Intrinsic::r600_read_tgid_z:
|
||||
return "amdgpu-work-group-id-z";
|
||||
case Intrinsic::amdgcn_dispatch_ptr:
|
||||
return "amdgpu-dispatch-ptr";
|
||||
case Intrinsic::amdgcn_dispatch_id:
|
||||
return "amdgpu-dispatch-id";
|
||||
case Intrinsic::amdgcn_kernarg_segment_ptr:
|
||||
return "amdgpu-kernarg-segment-ptr";
|
||||
case Intrinsic::amdgcn_implicitarg_ptr:
|
||||
return "amdgpu-implicitarg-ptr";
|
||||
case Intrinsic::amdgcn_queue_ptr:
|
||||
case Intrinsic::amdgcn_is_shared:
|
||||
case Intrinsic::amdgcn_is_private:
|
||||
// TODO: Does not require queue ptr on gfx9+
|
||||
case Intrinsic::trap:
|
||||
case Intrinsic::debugtrap:
|
||||
IsQueuePtr = true;
|
||||
return "amdgpu-queue-ptr";
|
||||
default:
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
static bool castRequiresQueuePtr(unsigned SrcAS) {
|
||||
return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
|
||||
}
|
||||
|
||||
static bool isDSAddress(const Constant *C) {
|
||||
const GlobalValue *GV = dyn_cast<GlobalValue>(C);
|
||||
if (!GV)
|
||||
return false;
|
||||
unsigned AS = GV->getAddressSpace();
|
||||
return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS;
|
||||
}
|
||||
|
||||
class AMDGPUInformationCache : public InformationCache {
|
||||
public:
|
||||
AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,
|
||||
BumpPtrAllocator &Allocator,
|
||||
SetVector<Function *> *CGSCC, TargetMachine &TM)
|
||||
: InformationCache(M, AG, Allocator, CGSCC), TM(TM) {}
|
||||
TargetMachine &TM;
|
||||
|
||||
enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 };
|
||||
|
||||
/// Check if the subtarget has aperture regs.
|
||||
bool hasApertureRegs(Function &F) {
|
||||
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
|
||||
return ST.hasApertureRegs();
|
||||
}
|
||||
|
||||
private:
|
||||
/// Check if the ConstantExpr \p CE requires queue ptr attribute.
|
||||
static bool visitConstExpr(const ConstantExpr *CE) {
|
||||
if (CE->getOpcode() == Instruction::AddrSpaceCast) {
|
||||
unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
|
||||
return castRequiresQueuePtr(SrcAS);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Get the constant access bitmap for \p C.
|
||||
uint8_t getConstantAccess(const Constant *C) {
|
||||
auto It = ConstantStatus.find(C);
|
||||
if (It != ConstantStatus.end())
|
||||
return It->second;
|
||||
|
||||
uint8_t Result = 0;
|
||||
if (isDSAddress(C))
|
||||
Result = DS_GLOBAL;
|
||||
|
||||
if (const auto *CE = dyn_cast<ConstantExpr>(C))
|
||||
if (visitConstExpr(CE))
|
||||
Result |= ADDR_SPACE_CAST;
|
||||
|
||||
for (const Use &U : C->operands()) {
|
||||
const auto *OpC = dyn_cast<Constant>(U);
|
||||
if (!OpC)
|
||||
continue;
|
||||
|
||||
Result |= getConstantAccess(OpC);
|
||||
}
|
||||
return Result;
|
||||
}
|
||||
|
||||
public:
|
||||
/// Returns true if \p Fn needs a queue ptr attribute because of \p C.
|
||||
bool needsQueuePtr(const Constant *C, Function &Fn) {
|
||||
bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv());
|
||||
bool HasAperture = hasApertureRegs(Fn);
|
||||
|
||||
// No need to explore the constants.
|
||||
if (!IsNonEntryFunc && HasAperture)
|
||||
return false;
|
||||
|
||||
uint8_t Access = getConstantAccess(C);
|
||||
|
||||
// We need to trap on DS globals in non-entry functions.
|
||||
if (IsNonEntryFunc && (Access & DS_GLOBAL))
|
||||
return true;
|
||||
|
||||
return !HasAperture && (Access & ADDR_SPACE_CAST);
|
||||
}
|
||||
|
||||
private:
|
||||
/// Used to determine if the Constant needs a queue ptr attribute.
|
||||
DenseMap<const Constant *, uint8_t> ConstantStatus;
|
||||
};
|
||||
|
||||
struct AAAMDAttributes : public StateWrapper<BooleanState, AbstractAttribute> {
|
||||
using Base = StateWrapper<BooleanState, AbstractAttribute>;
|
||||
AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
|
||||
|
||||
/// Create an abstract attribute view for the position \p IRP.
|
||||
static AAAMDAttributes &createForPosition(const IRPosition &IRP,
|
||||
Attributor &A);
|
||||
|
||||
/// See AbstractAttribute::getName().
|
||||
const std::string getName() const override { return "AAAMDAttributes"; }
|
||||
|
||||
/// See AbstractAttribute::getIdAddr().
|
||||
const char *getIdAddr() const override { return &ID; }
|
||||
|
||||
/// This function should return true if the type of the \p AA is
|
||||
/// AAAMDAttributes.
|
||||
static bool classof(const AbstractAttribute *AA) {
|
||||
return (AA->getIdAddr() == &ID);
|
||||
}
|
||||
|
||||
virtual const DenseSet<StringRef> &getAttributes() const = 0;
|
||||
|
||||
/// Unique ID (due to the unique address)
|
||||
static const char ID;
|
||||
};
|
||||
const char AAAMDAttributes::ID = 0;
|
||||
|
||||
struct AAAMDWorkGroupSize
|
||||
: public StateWrapper<BooleanState, AbstractAttribute> {
|
||||
using Base = StateWrapper<BooleanState, AbstractAttribute>;
|
||||
AAAMDWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
|
||||
|
||||
/// Create an abstract attribute view for the position \p IRP.
|
||||
static AAAMDWorkGroupSize &createForPosition(const IRPosition &IRP,
|
||||
Attributor &A);
|
||||
|
||||
/// See AbstractAttribute::getName().
|
||||
const std::string getName() const override { return "AAAMDWorkGroupSize"; }
|
||||
|
||||
/// See AbstractAttribute::getIdAddr().
|
||||
const char *getIdAddr() const override { return &ID; }
|
||||
|
||||
/// This function should return true if the type of the \p AA is
|
||||
/// AAAMDAttributes.
|
||||
static bool classof(const AbstractAttribute *AA) {
|
||||
return (AA->getIdAddr() == &ID);
|
||||
}
|
||||
|
||||
/// Unique ID (due to the unique address)
|
||||
static const char ID;
|
||||
};
|
||||
const char AAAMDWorkGroupSize::ID = 0;
|
||||
|
||||
struct AAAMDWorkGroupSizeFunction : public AAAMDWorkGroupSize {
|
||||
AAAMDWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)
|
||||
: AAAMDWorkGroupSize(IRP, A) {}
|
||||
|
||||
void initialize(Attributor &A) override {
|
||||
Function *F = getAssociatedFunction();
|
||||
CallingConv::ID CC = F->getCallingConv();
|
||||
|
||||
if (CC != CallingConv::AMDGPU_KERNEL)
|
||||
return;
|
||||
|
||||
bool InitialValue = false;
|
||||
if (F->hasFnAttribute("uniform-work-group-size"))
|
||||
InitialValue = F->getFnAttribute("uniform-work-group-size")
|
||||
.getValueAsString()
|
||||
.equals("true");
|
||||
|
||||
if (InitialValue)
|
||||
indicateOptimisticFixpoint();
|
||||
else
|
||||
indicatePessimisticFixpoint();
|
||||
}
|
||||
|
||||
ChangeStatus updateImpl(Attributor &A) override {
|
||||
Function *F = getAssociatedFunction();
|
||||
ChangeStatus Change = ChangeStatus::UNCHANGED;
|
||||
|
||||
auto CheckCallSite = [&](AbstractCallSite CS) {
|
||||
Function *Caller = CS.getInstruction()->getFunction();
|
||||
LLVM_DEBUG(dbgs() << "[AAAMDWorkGroupSize] Call " << Caller->getName()
|
||||
<< "->" << F->getName() << "\n");
|
||||
|
||||
const auto &CallerInfo = A.getAAFor<AAAMDWorkGroupSize>(
|
||||
*this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
|
||||
|
||||
Change = Change | clampStateAndIndicateChange(this->getState(),
|
||||
CallerInfo.getState());
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
bool AllCallSitesKnown = true;
|
||||
if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
|
||||
indicatePessimisticFixpoint();
|
||||
|
||||
return Change;
|
||||
}
|
||||
|
||||
ChangeStatus manifest(Attributor &A) override {
|
||||
SmallVector<Attribute, 8> AttrList;
|
||||
LLVMContext &Ctx = getAssociatedFunction()->getContext();
|
||||
|
||||
AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size",
|
||||
getAssumed() ? "true" : "false"));
|
||||
return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
|
||||
/* ForceReplace */ true);
|
||||
}
|
||||
|
||||
bool isValidState() const override {
|
||||
// This state is always valid, even when the state is false.
|
||||
return true;
|
||||
}
|
||||
|
||||
const std::string getAsStr() const override {
|
||||
return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]";
|
||||
}
|
||||
|
||||
/// See AbstractAttribute::trackStatistics()
|
||||
void trackStatistics() const override {}
|
||||
};
|
||||
|
||||
AAAMDWorkGroupSize &AAAMDWorkGroupSize::createForPosition(const IRPosition &IRP,
|
||||
Attributor &A) {
|
||||
if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
|
||||
return *new (A.Allocator) AAAMDWorkGroupSizeFunction(IRP, A);
|
||||
llvm_unreachable("AAAMDWorkGroupSize is only valid for function position");
|
||||
}
|
||||
|
||||
struct AAAMDAttributesFunction : public AAAMDAttributes {
|
||||
AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A)
|
||||
: AAAMDAttributes(IRP, A) {}
|
||||
|
||||
void initialize(Attributor &A) override {
|
||||
Function *F = getAssociatedFunction();
|
||||
CallingConv::ID CC = F->getCallingConv();
|
||||
bool CallingConvSupportsAllImplicits = (CC != CallingConv::AMDGPU_Gfx);
|
||||
|
||||
// Don't add attributes to instrinsics
|
||||
if (F->isIntrinsic()) {
|
||||
indicatePessimisticFixpoint();
|
||||
return;
|
||||
}
|
||||
|
||||
// Ignore functions with graphics calling conventions, these are currently
|
||||
// not allowed to have kernel arguments.
|
||||
if (AMDGPU::isGraphics(F->getCallingConv())) {
|
||||
indicatePessimisticFixpoint();
|
||||
return;
|
||||
}
|
||||
|
||||
for (StringRef Attr : ImplicitAttrNames) {
|
||||
if (F->hasFnAttribute(Attr))
|
||||
Attributes.insert(Attr);
|
||||
}
|
||||
|
||||
// TODO: We shouldn't need this in the future.
|
||||
if (CallingConvSupportsAllImplicits &&
|
||||
F->hasAddressTaken(nullptr, true, true, true)) {
|
||||
for (StringRef AttrName : ImplicitAttrNames) {
|
||||
Attributes.insert(AttrName);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ChangeStatus updateImpl(Attributor &A) override {
|
||||
Function *F = getAssociatedFunction();
|
||||
ChangeStatus Change = ChangeStatus::UNCHANGED;
|
||||
bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
|
||||
CallingConv::ID CC = F->getCallingConv();
|
||||
bool CallingConvSupportsAllImplicits = (CC != CallingConv::AMDGPU_Gfx);
|
||||
auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
|
||||
|
||||
auto AddAttribute = [&](StringRef AttrName) {
|
||||
if (Attributes.insert(AttrName).second)
|
||||
Change = ChangeStatus::CHANGED;
|
||||
};
|
||||
|
||||
// Check for Intrinsics and propagate attributes.
|
||||
const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(
|
||||
*this, this->getIRPosition(), DepClassTy::REQUIRED);
|
||||
|
||||
// We have to assume that we can reach a function with these attributes.
|
||||
// We do not consider inline assembly as a unknown callee.
|
||||
if (CallingConvSupportsAllImplicits && AAEdges.hasNonAsmUnknownCallee()) {
|
||||
for (StringRef AttrName : ImplicitAttrNames) {
|
||||
AddAttribute(AttrName);
|
||||
}
|
||||
}
|
||||
|
||||
bool NeedsQueuePtr = false;
|
||||
bool HasCall = false;
|
||||
for (Function *Callee : AAEdges.getOptimisticEdges()) {
|
||||
Intrinsic::ID IID = Callee->getIntrinsicID();
|
||||
if (IID != Intrinsic::not_intrinsic) {
|
||||
if (!IsNonEntryFunc && IID == Intrinsic::amdgcn_kernarg_segment_ptr) {
|
||||
AddAttribute("amdgpu-kernarg-segment-ptr");
|
||||
continue;
|
||||
}
|
||||
|
||||
bool NonKernelOnly = false;
|
||||
StringRef AttrName =
|
||||
intrinsicToAttrName(IID, NonKernelOnly, NeedsQueuePtr);
|
||||
|
||||
if (!AttrName.empty() && (IsNonEntryFunc || !NonKernelOnly))
|
||||
AddAttribute(AttrName);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
HasCall = true;
|
||||
const AAAMDAttributes &AAAMD = A.getAAFor<AAAMDAttributes>(
|
||||
*this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
|
||||
const DenseSet<StringRef> &CalleeAttributes = AAAMD.getAttributes();
|
||||
// Propagate implicit attributes from called function.
|
||||
for (StringRef AttrName : ImplicitAttrNames)
|
||||
if (CalleeAttributes.count(AttrName))
|
||||
AddAttribute(AttrName);
|
||||
}
|
||||
|
||||
HasCall |= AAEdges.hasUnknownCallee();
|
||||
if (!IsNonEntryFunc && HasCall)
|
||||
AddAttribute("amdgpu-calls");
|
||||
|
||||
// Check the function body.
|
||||
auto CheckAlloca = [&](Instruction &I) {
|
||||
AddAttribute("amdgpu-stack-objects");
|
||||
return false;
|
||||
};
|
||||
|
||||
bool UsedAssumedInformation = false;
|
||||
A.checkForAllInstructions(CheckAlloca, *this, {Instruction::Alloca},
|
||||
UsedAssumedInformation);
|
||||
|
||||
// If we found that we need amdgpu-queue-ptr, nothing else to do.
|
||||
if (NeedsQueuePtr || Attributes.count("amdgpu-queue-ptr")) {
|
||||
AddAttribute("amdgpu-queue-ptr");
|
||||
return Change;
|
||||
}
|
||||
|
||||
auto CheckAddrSpaceCasts = [&](Instruction &I) {
|
||||
unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace();
|
||||
if (castRequiresQueuePtr(SrcAS)) {
|
||||
NeedsQueuePtr = true;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
|
||||
|
||||
// `checkForAllInstructions` is much more cheaper than going through all
|
||||
// instructions, try it first.
|
||||
|
||||
// amdgpu-queue-ptr is not needed if aperture regs is present.
|
||||
if (!HasApertureRegs)
|
||||
A.checkForAllInstructions(CheckAddrSpaceCasts, *this,
|
||||
{Instruction::AddrSpaceCast},
|
||||
UsedAssumedInformation);
|
||||
|
||||
// If we found that we need amdgpu-queue-ptr, nothing else to do.
|
||||
if (NeedsQueuePtr) {
|
||||
AddAttribute("amdgpu-queue-ptr");
|
||||
return Change;
|
||||
}
|
||||
|
||||
if (!IsNonEntryFunc && HasApertureRegs)
|
||||
return Change;
|
||||
|
||||
for (BasicBlock &BB : *F) {
|
||||
for (Instruction &I : BB) {
|
||||
for (const Use &U : I.operands()) {
|
||||
if (const auto *C = dyn_cast<Constant>(U)) {
|
||||
if (InfoCache.needsQueuePtr(C, *F)) {
|
||||
AddAttribute("amdgpu-queue-ptr");
|
||||
return Change;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return Change;
|
||||
}
|
||||
|
||||
ChangeStatus manifest(Attributor &A) override {
|
||||
SmallVector<Attribute, 8> AttrList;
|
||||
LLVMContext &Ctx = getAssociatedFunction()->getContext();
|
||||
|
||||
for (StringRef AttrName : Attributes)
|
||||
AttrList.push_back(Attribute::get(Ctx, AttrName));
|
||||
|
||||
return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
|
||||
/* ForceReplace */ true);
|
||||
}
|
||||
|
||||
const std::string getAsStr() const override {
|
||||
return "AMDInfo[" + std::to_string(Attributes.size()) + "]";
|
||||
}
|
||||
|
||||
const DenseSet<StringRef> &getAttributes() const override {
|
||||
return Attributes;
|
||||
}
|
||||
|
||||
/// See AbstractAttribute::trackStatistics()
|
||||
void trackStatistics() const override {}
|
||||
|
||||
private:
|
||||
DenseSet<StringRef> Attributes;
|
||||
};
|
||||
|
||||
AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
|
||||
Attributor &A) {
|
||||
if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
|
||||
return *new (A.Allocator) AAAMDAttributesFunction(IRP, A);
|
||||
llvm_unreachable("AAAMDAttributes is only valid for function position");
|
||||
}
|
||||
|
||||
class AMDGPUAttributor : public ModulePass {
|
||||
public:
|
||||
AMDGPUAttributor() : ModulePass(ID) {}
|
||||
|
||||
/// doInitialization - Virtual method overridden by subclasses to do
|
||||
/// any necessary initialization before any pass is run.
|
||||
bool doInitialization(Module &) override {
|
||||
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
|
||||
if (!TPC)
|
||||
report_fatal_error("TargetMachine is required");
|
||||
|
||||
TM = &TPC->getTM<TargetMachine>();
|
||||
return false;
|
||||
}
|
||||
|
||||
bool runOnModule(Module &M) override {
|
||||
SetVector<Function *> Functions;
|
||||
AnalysisGetter AG;
|
||||
for (Function &F : M)
|
||||
Functions.insert(&F);
|
||||
|
||||
CallGraphUpdater CGUpdater;
|
||||
BumpPtrAllocator Allocator;
|
||||
AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM);
|
||||
Attributor A(Functions, InfoCache, CGUpdater);
|
||||
|
||||
for (Function &F : M) {
|
||||
A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));
|
||||
A.getOrCreateAAFor<AAAMDWorkGroupSize>(IRPosition::function(F));
|
||||
}
|
||||
|
||||
ChangeStatus Change = A.run();
|
||||
return Change == ChangeStatus::CHANGED;
|
||||
}
|
||||
|
||||
StringRef getPassName() const override { return "AMDGPU Attributor"; }
|
||||
TargetMachine *TM;
|
||||
static char ID;
|
||||
};
|
||||
|
||||
char AMDGPUAttributor::ID = 0;
|
||||
|
||||
Pass *llvm::createAMDGPUAttributorPass() { return new AMDGPUAttributor(); }
|
||||
INITIALIZE_PASS(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false, false)
|
||||
@@ -350,6 +350,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
|
||||
initializeSILoadStoreOptimizerPass(*PR);
|
||||
initializeAMDGPUFixFunctionBitcastsPass(*PR);
|
||||
initializeAMDGPUAlwaysInlinePass(*PR);
|
||||
initializeAMDGPUAttributorPass(*PR);
|
||||
initializeAMDGPUAnnotateKernelFeaturesPass(*PR);
|
||||
initializeAMDGPUAnnotateUniformValuesPass(*PR);
|
||||
initializeAMDGPUArgumentUsageInfoPass(*PR);
|
||||
|
||||
@@ -44,6 +44,7 @@ add_llvm_target(AMDGPUCodeGen
|
||||
AMDGPUAliasAnalysis.cpp
|
||||
AMDGPUAlwaysInlinePass.cpp
|
||||
AMDGPUAnnotateKernelFeatures.cpp
|
||||
AMDGPUAttributor.cpp
|
||||
AMDGPUAnnotateUniformValues.cpp
|
||||
AMDGPUArgumentUsageInfo.cpp
|
||||
AMDGPUAsmPrinter.cpp
|
||||
|
||||
@@ -293,12 +293,14 @@ static bool isEqualOrWorse(const Attribute &New, const Attribute &Old) {
|
||||
/// attribute list \p Attrs. This is only the case if it was not already present
|
||||
/// in \p Attrs at the position describe by \p PK and \p AttrIdx.
|
||||
static bool addIfNotExistent(LLVMContext &Ctx, const Attribute &Attr,
|
||||
AttributeList &Attrs, int AttrIdx) {
|
||||
AttributeList &Attrs, int AttrIdx,
|
||||
bool ForceReplace = false) {
|
||||
|
||||
if (Attr.isEnumAttribute()) {
|
||||
Attribute::AttrKind Kind = Attr.getKindAsEnum();
|
||||
if (Attrs.hasAttribute(AttrIdx, Kind))
|
||||
if (isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind)))
|
||||
if (!ForceReplace &&
|
||||
isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind)))
|
||||
return false;
|
||||
Attrs = Attrs.addAttribute(Ctx, AttrIdx, Attr);
|
||||
return true;
|
||||
@@ -306,7 +308,8 @@ static bool addIfNotExistent(LLVMContext &Ctx, const Attribute &Attr,
|
||||
if (Attr.isStringAttribute()) {
|
||||
StringRef Kind = Attr.getKindAsString();
|
||||
if (Attrs.hasAttribute(AttrIdx, Kind))
|
||||
if (isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind)))
|
||||
if (!ForceReplace &&
|
||||
isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind)))
|
||||
return false;
|
||||
Attrs = Attrs.addAttribute(Ctx, AttrIdx, Attr);
|
||||
return true;
|
||||
@@ -314,7 +317,8 @@ static bool addIfNotExistent(LLVMContext &Ctx, const Attribute &Attr,
|
||||
if (Attr.isIntAttribute()) {
|
||||
Attribute::AttrKind Kind = Attr.getKindAsEnum();
|
||||
if (Attrs.hasAttribute(AttrIdx, Kind))
|
||||
if (isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind)))
|
||||
if (!ForceReplace &&
|
||||
isEqualOrWorse(Attr, Attrs.getAttribute(AttrIdx, Kind)))
|
||||
return false;
|
||||
Attrs = Attrs.removeAttribute(Ctx, AttrIdx, Kind);
|
||||
Attrs = Attrs.addAttribute(Ctx, AttrIdx, Attr);
|
||||
@@ -395,7 +399,8 @@ ChangeStatus AbstractAttribute::update(Attributor &A) {
|
||||
|
||||
ChangeStatus
|
||||
IRAttributeManifest::manifestAttrs(Attributor &A, const IRPosition &IRP,
|
||||
const ArrayRef<Attribute> &DeducedAttrs) {
|
||||
const ArrayRef<Attribute> &DeducedAttrs,
|
||||
bool ForceReplace) {
|
||||
Function *ScopeFn = IRP.getAnchorScope();
|
||||
IRPosition::Kind PK = IRP.getPositionKind();
|
||||
|
||||
@@ -423,7 +428,7 @@ IRAttributeManifest::manifestAttrs(Attributor &A, const IRPosition &IRP,
|
||||
ChangeStatus HasChanged = ChangeStatus::UNCHANGED;
|
||||
LLVMContext &Ctx = IRP.getAnchorValue().getContext();
|
||||
for (const Attribute &Attr : DeducedAttrs) {
|
||||
if (!addIfNotExistent(Ctx, Attr, Attrs, IRP.getAttrIdx()))
|
||||
if (!addIfNotExistent(Ctx, Attr, Attrs, IRP.getAttrIdx(), ForceReplace))
|
||||
continue;
|
||||
|
||||
HasChanged = ChangeStatus::CHANGED;
|
||||
@@ -894,6 +899,22 @@ bool Attributor::isAssumedDead(const IRPosition &IRP,
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Attributor::isAssumedDead(const BasicBlock &BB,
|
||||
const AbstractAttribute *QueryingAA,
|
||||
const AAIsDead *FnLivenessAA,
|
||||
DepClassTy DepClass) {
|
||||
if (!FnLivenessAA)
|
||||
FnLivenessAA = lookupAAFor<AAIsDead>(IRPosition::function(*BB.getParent()),
|
||||
QueryingAA, DepClassTy::NONE);
|
||||
if (FnLivenessAA->isAssumedDead(&BB)) {
|
||||
if (QueryingAA)
|
||||
recordDependence(*FnLivenessAA, *QueryingAA, DepClass);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Attributor::checkForAllUses(function_ref<bool(const Use &, bool &)> Pred,
|
||||
const AbstractAttribute &QueryingAA,
|
||||
const Value &V, bool CheckBBLivenessOnly,
|
||||
@@ -2213,6 +2234,8 @@ void InformationCache::initializeInformationCache(const Function &CF,
|
||||
// The alignment of a pointer is interesting for loads.
|
||||
case Instruction::Store:
|
||||
// The alignment of a pointer is interesting for stores.
|
||||
case Instruction::Alloca:
|
||||
case Instruction::AddrSpaceCast:
|
||||
IsInterestingOpcode = true;
|
||||
}
|
||||
if (IsInterestingOpcode) {
|
||||
|
||||
@@ -146,6 +146,16 @@ PIPE_OPERATOR(AAFunctionReachability)
|
||||
PIPE_OPERATOR(AAPointerInfo)
|
||||
|
||||
#undef PIPE_OPERATOR
|
||||
|
||||
template <>
|
||||
ChangeStatus clampStateAndIndicateChange<DerefState>(DerefState &S,
|
||||
const DerefState &R) {
|
||||
ChangeStatus CS0 =
|
||||
clampStateAndIndicateChange(S.DerefBytesState, R.DerefBytesState);
|
||||
ChangeStatus CS1 = clampStateAndIndicateChange(S.GlobalState, R.GlobalState);
|
||||
return CS0 | CS1;
|
||||
}
|
||||
|
||||
} // namespace llvm
|
||||
|
||||
/// Get pointer operand of memory accessing instruction. If \p I is
|
||||
@@ -448,17 +458,6 @@ getBasePointerOfAccessPointerOperand(const Instruction *I, int64_t &BytesOffset,
|
||||
AllowNonInbounds);
|
||||
}
|
||||
|
||||
/// Helper function to clamp a state \p S of type \p StateType with the
|
||||
/// information in \p R and indicate/return if \p S did change (as-in update is
|
||||
/// required to be run again).
|
||||
template <typename StateType>
|
||||
ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R) {
|
||||
auto Assumed = S.getAssumed();
|
||||
S ^= R;
|
||||
return Assumed == S.getAssumed() ? ChangeStatus::UNCHANGED
|
||||
: ChangeStatus::CHANGED;
|
||||
}
|
||||
|
||||
/// Clamp the information known for all returned values of a function
|
||||
/// (identified by \p QueryingAA) into \p S.
|
||||
template <typename AAType, typename StateType = typename AAType::StateType>
|
||||
@@ -3942,15 +3941,6 @@ struct AAIsDeadCallSite final : AAIsDeadFunction {
|
||||
|
||||
/// -------------------- Dereferenceable Argument Attribute --------------------
|
||||
|
||||
template <>
|
||||
ChangeStatus clampStateAndIndicateChange<DerefState>(DerefState &S,
|
||||
const DerefState &R) {
|
||||
ChangeStatus CS0 =
|
||||
clampStateAndIndicateChange(S.DerefBytesState, R.DerefBytesState);
|
||||
ChangeStatus CS1 = clampStateAndIndicateChange(S.GlobalState, R.GlobalState);
|
||||
return CS0 | CS1;
|
||||
}
|
||||
|
||||
struct AADereferenceableImpl : AADereferenceable {
|
||||
AADereferenceableImpl(const IRPosition &IRP, Attributor &A)
|
||||
: AADereferenceable(IRP, A) {}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
|
||||
; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=HSA %s
|
||||
; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=HSA,AKF_HSA %s
|
||||
; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-attributor < %s | FileCheck -check-prefixes=HSA,ATTRIBUTOR_HSA %s
|
||||
|
||||
declare void @llvm.memcpy.p1i32.p4i32.i32(i32 addrspace(1)* nocapture, i32 addrspace(4)* nocapture, i32, i1) #0
|
||||
|
||||
@@ -167,7 +168,11 @@ define i32 addrspace(3)* @ret_constant_cast_group_gv_gep_to_flat_to_group() #1 {
|
||||
attributes #0 = { argmemonly nounwind }
|
||||
attributes #1 = { nounwind }
|
||||
;.
|
||||
; HSA: attributes #[[ATTR0:[0-9]+]] = { argmemonly nofree nounwind willreturn }
|
||||
; HSA: attributes #[[ATTR1]] = { nounwind }
|
||||
; HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-queue-ptr" }
|
||||
; AKF_HSA: attributes #[[ATTR0:[0-9]+]] = { argmemonly nofree nounwind willreturn }
|
||||
; AKF_HSA: attributes #[[ATTR1]] = { nounwind }
|
||||
; AKF_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-queue-ptr" }
|
||||
;.
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { argmemonly nofree nounwind willreturn "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-queue-ptr" "uniform-work-group-size"="false" }
|
||||
;.
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
|
||||
; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -amdgpu-annotate-kernel-features %s | FileCheck -check-prefix=HSA %s
|
||||
; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=HSA,AKF_HSA %s
|
||||
; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-attributor < %s | FileCheck -check-prefixes=HSA,ATTRIBUTOR_HSA %s
|
||||
|
||||
declare i32 @llvm.amdgcn.workgroup.id.x() #0
|
||||
declare i32 @llvm.amdgcn.workgroup.id.y() #0
|
||||
@@ -140,152 +141,228 @@ define void @use_workgroup_id_y_workgroup_id_z() #1 {
|
||||
}
|
||||
|
||||
define void @func_indirect_use_workitem_id_x() #1 {
|
||||
; HSA-LABEL: define {{[^@]+}}@func_indirect_use_workitem_id_x
|
||||
; HSA-SAME: () #[[ATTR1]] {
|
||||
; HSA-NEXT: call void @use_workitem_id_x()
|
||||
; HSA-NEXT: ret void
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workitem_id_x
|
||||
; AKF_HSA-SAME: () #[[ATTR1]] {
|
||||
; AKF_HSA-NEXT: call void @use_workitem_id_x()
|
||||
; AKF_HSA-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workitem_id_x
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR1]] {
|
||||
; ATTRIBUTOR_HSA-NEXT: call void @use_workitem_id_x() #[[ATTR22:[0-9]+]]
|
||||
; ATTRIBUTOR_HSA-NEXT: ret void
|
||||
;
|
||||
call void @use_workitem_id_x()
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @kernel_indirect_use_workitem_id_x() #1 {
|
||||
; HSA-LABEL: define {{[^@]+}}@kernel_indirect_use_workitem_id_x
|
||||
; HSA-SAME: () #[[ATTR1]] {
|
||||
; HSA-NEXT: call void @use_workitem_id_x()
|
||||
; HSA-NEXT: ret void
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@kernel_indirect_use_workitem_id_x
|
||||
; AKF_HSA-SAME: () #[[ATTR1]] {
|
||||
; AKF_HSA-NEXT: call void @use_workitem_id_x()
|
||||
; AKF_HSA-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kernel_indirect_use_workitem_id_x
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR1]] {
|
||||
; ATTRIBUTOR_HSA-NEXT: call void @use_workitem_id_x() #[[ATTR22]]
|
||||
; ATTRIBUTOR_HSA-NEXT: ret void
|
||||
;
|
||||
call void @use_workitem_id_x()
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @func_indirect_use_workitem_id_y() #1 {
|
||||
; HSA-LABEL: define {{[^@]+}}@func_indirect_use_workitem_id_y
|
||||
; HSA-SAME: () #[[ATTR2]] {
|
||||
; HSA-NEXT: call void @use_workitem_id_y()
|
||||
; HSA-NEXT: ret void
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workitem_id_y
|
||||
; AKF_HSA-SAME: () #[[ATTR2]] {
|
||||
; AKF_HSA-NEXT: call void @use_workitem_id_y()
|
||||
; AKF_HSA-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workitem_id_y
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR2]] {
|
||||
; ATTRIBUTOR_HSA-NEXT: call void @use_workitem_id_y() #[[ATTR22]]
|
||||
; ATTRIBUTOR_HSA-NEXT: ret void
|
||||
;
|
||||
call void @use_workitem_id_y()
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @func_indirect_use_workitem_id_z() #1 {
|
||||
; HSA-LABEL: define {{[^@]+}}@func_indirect_use_workitem_id_z
|
||||
; HSA-SAME: () #[[ATTR3]] {
|
||||
; HSA-NEXT: call void @use_workitem_id_z()
|
||||
; HSA-NEXT: ret void
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workitem_id_z
|
||||
; AKF_HSA-SAME: () #[[ATTR3]] {
|
||||
; AKF_HSA-NEXT: call void @use_workitem_id_z()
|
||||
; AKF_HSA-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workitem_id_z
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR3]] {
|
||||
; ATTRIBUTOR_HSA-NEXT: call void @use_workitem_id_z() #[[ATTR22]]
|
||||
; ATTRIBUTOR_HSA-NEXT: ret void
|
||||
;
|
||||
call void @use_workitem_id_z()
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @func_indirect_use_workgroup_id_x() #1 {
|
||||
; HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_x
|
||||
; HSA-SAME: () #[[ATTR4]] {
|
||||
; HSA-NEXT: call void @use_workgroup_id_x()
|
||||
; HSA-NEXT: ret void
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_x
|
||||
; AKF_HSA-SAME: () #[[ATTR4]] {
|
||||
; AKF_HSA-NEXT: call void @use_workgroup_id_x()
|
||||
; AKF_HSA-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_x
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR4]] {
|
||||
; ATTRIBUTOR_HSA-NEXT: call void @use_workgroup_id_x() #[[ATTR22]]
|
||||
; ATTRIBUTOR_HSA-NEXT: ret void
|
||||
;
|
||||
call void @use_workgroup_id_x()
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @kernel_indirect_use_workgroup_id_x() #1 {
|
||||
; HSA-LABEL: define {{[^@]+}}@kernel_indirect_use_workgroup_id_x
|
||||
; HSA-SAME: () #[[ATTR4]] {
|
||||
; HSA-NEXT: call void @use_workgroup_id_x()
|
||||
; HSA-NEXT: ret void
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@kernel_indirect_use_workgroup_id_x
|
||||
; AKF_HSA-SAME: () #[[ATTR4]] {
|
||||
; AKF_HSA-NEXT: call void @use_workgroup_id_x()
|
||||
; AKF_HSA-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kernel_indirect_use_workgroup_id_x
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR4]] {
|
||||
; ATTRIBUTOR_HSA-NEXT: call void @use_workgroup_id_x() #[[ATTR22]]
|
||||
; ATTRIBUTOR_HSA-NEXT: ret void
|
||||
;
|
||||
call void @use_workgroup_id_x()
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @func_indirect_use_workgroup_id_y() #1 {
|
||||
; HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_y
|
||||
; HSA-SAME: () #[[ATTR5]] {
|
||||
; HSA-NEXT: call void @use_workgroup_id_y()
|
||||
; HSA-NEXT: ret void
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_y
|
||||
; AKF_HSA-SAME: () #[[ATTR5]] {
|
||||
; AKF_HSA-NEXT: call void @use_workgroup_id_y()
|
||||
; AKF_HSA-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_y
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR5]] {
|
||||
; ATTRIBUTOR_HSA-NEXT: call void @use_workgroup_id_y() #[[ATTR22]]
|
||||
; ATTRIBUTOR_HSA-NEXT: ret void
|
||||
;
|
||||
call void @use_workgroup_id_y()
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @func_indirect_use_workgroup_id_z() #1 {
|
||||
; HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_z
|
||||
; HSA-SAME: () #[[ATTR6]] {
|
||||
; HSA-NEXT: call void @use_workgroup_id_z()
|
||||
; HSA-NEXT: ret void
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_z
|
||||
; AKF_HSA-SAME: () #[[ATTR6]] {
|
||||
; AKF_HSA-NEXT: call void @use_workgroup_id_z()
|
||||
; AKF_HSA-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_z
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR6]] {
|
||||
; ATTRIBUTOR_HSA-NEXT: call void @use_workgroup_id_z() #[[ATTR22]]
|
||||
; ATTRIBUTOR_HSA-NEXT: ret void
|
||||
;
|
||||
call void @use_workgroup_id_z()
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @func_indirect_indirect_use_workgroup_id_y() #1 {
|
||||
; HSA-LABEL: define {{[^@]+}}@func_indirect_indirect_use_workgroup_id_y
|
||||
; HSA-SAME: () #[[ATTR5]] {
|
||||
; HSA-NEXT: call void @func_indirect_use_workgroup_id_y()
|
||||
; HSA-NEXT: ret void
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_indirect_use_workgroup_id_y
|
||||
; AKF_HSA-SAME: () #[[ATTR5]] {
|
||||
; AKF_HSA-NEXT: call void @func_indirect_use_workgroup_id_y()
|
||||
; AKF_HSA-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_indirect_use_workgroup_id_y
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR5]] {
|
||||
; ATTRIBUTOR_HSA-NEXT: call void @func_indirect_use_workgroup_id_y() #[[ATTR22]]
|
||||
; ATTRIBUTOR_HSA-NEXT: ret void
|
||||
;
|
||||
call void @func_indirect_use_workgroup_id_y()
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @indirect_x2_use_workgroup_id_y() #1 {
|
||||
; HSA-LABEL: define {{[^@]+}}@indirect_x2_use_workgroup_id_y
|
||||
; HSA-SAME: () #[[ATTR5]] {
|
||||
; HSA-NEXT: call void @func_indirect_indirect_use_workgroup_id_y()
|
||||
; HSA-NEXT: ret void
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@indirect_x2_use_workgroup_id_y
|
||||
; AKF_HSA-SAME: () #[[ATTR5]] {
|
||||
; AKF_HSA-NEXT: call void @func_indirect_indirect_use_workgroup_id_y()
|
||||
; AKF_HSA-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@indirect_x2_use_workgroup_id_y
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR5]] {
|
||||
; ATTRIBUTOR_HSA-NEXT: call void @func_indirect_indirect_use_workgroup_id_y() #[[ATTR22]]
|
||||
; ATTRIBUTOR_HSA-NEXT: ret void
|
||||
;
|
||||
call void @func_indirect_indirect_use_workgroup_id_y()
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @func_indirect_use_dispatch_ptr() #1 {
|
||||
; HSA-LABEL: define {{[^@]+}}@func_indirect_use_dispatch_ptr
|
||||
; HSA-SAME: () #[[ATTR7]] {
|
||||
; HSA-NEXT: call void @use_dispatch_ptr()
|
||||
; HSA-NEXT: ret void
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_dispatch_ptr
|
||||
; AKF_HSA-SAME: () #[[ATTR7]] {
|
||||
; AKF_HSA-NEXT: call void @use_dispatch_ptr()
|
||||
; AKF_HSA-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_dispatch_ptr
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR7]] {
|
||||
; ATTRIBUTOR_HSA-NEXT: call void @use_dispatch_ptr() #[[ATTR22]]
|
||||
; ATTRIBUTOR_HSA-NEXT: ret void
|
||||
;
|
||||
call void @use_dispatch_ptr()
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @func_indirect_use_queue_ptr() #1 {
|
||||
; HSA-LABEL: define {{[^@]+}}@func_indirect_use_queue_ptr
|
||||
; HSA-SAME: () #[[ATTR8]] {
|
||||
; HSA-NEXT: call void @use_queue_ptr()
|
||||
; HSA-NEXT: ret void
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_queue_ptr
|
||||
; AKF_HSA-SAME: () #[[ATTR8]] {
|
||||
; AKF_HSA-NEXT: call void @use_queue_ptr()
|
||||
; AKF_HSA-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_queue_ptr
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR8]] {
|
||||
; ATTRIBUTOR_HSA-NEXT: call void @use_queue_ptr() #[[ATTR22]]
|
||||
; ATTRIBUTOR_HSA-NEXT: ret void
|
||||
;
|
||||
call void @use_queue_ptr()
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @func_indirect_use_dispatch_id() #1 {
|
||||
; HSA-LABEL: define {{[^@]+}}@func_indirect_use_dispatch_id
|
||||
; HSA-SAME: () #[[ATTR9]] {
|
||||
; HSA-NEXT: call void @use_dispatch_id()
|
||||
; HSA-NEXT: ret void
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_dispatch_id
|
||||
; AKF_HSA-SAME: () #[[ATTR9]] {
|
||||
; AKF_HSA-NEXT: call void @use_dispatch_id()
|
||||
; AKF_HSA-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_dispatch_id
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR9]] {
|
||||
; ATTRIBUTOR_HSA-NEXT: call void @use_dispatch_id() #[[ATTR22]]
|
||||
; ATTRIBUTOR_HSA-NEXT: ret void
|
||||
;
|
||||
call void @use_dispatch_id()
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @func_indirect_use_workgroup_id_y_workgroup_id_z() #1 {
|
||||
; HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_y_workgroup_id_z
|
||||
; HSA-SAME: () #[[ATTR11:[0-9]+]] {
|
||||
; HSA-NEXT: call void @func_indirect_use_workgroup_id_y_workgroup_id_z()
|
||||
; HSA-NEXT: ret void
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_y_workgroup_id_z
|
||||
; AKF_HSA-SAME: () #[[ATTR11:[0-9]+]] {
|
||||
; AKF_HSA-NEXT: call void @func_indirect_use_workgroup_id_y_workgroup_id_z()
|
||||
; AKF_HSA-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_workgroup_id_y_workgroup_id_z
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR11:[0-9]+]] {
|
||||
; ATTRIBUTOR_HSA-NEXT: unreachable
|
||||
;
|
||||
call void @func_indirect_use_workgroup_id_y_workgroup_id_z()
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @recursive_use_workitem_id_y() #1 {
|
||||
; HSA-LABEL: define {{[^@]+}}@recursive_use_workitem_id_y
|
||||
; HSA-SAME: () #[[ATTR2]] {
|
||||
; HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
|
||||
; HSA-NEXT: store volatile i32 [[VAL]], i32 addrspace(1)* undef, align 4
|
||||
; HSA-NEXT: call void @recursive_use_workitem_id_y()
|
||||
; HSA-NEXT: ret void
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@recursive_use_workitem_id_y
|
||||
; AKF_HSA-SAME: () #[[ATTR2]] {
|
||||
; AKF_HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
|
||||
; AKF_HSA-NEXT: store volatile i32 [[VAL]], i32 addrspace(1)* undef, align 4
|
||||
; AKF_HSA-NEXT: call void @recursive_use_workitem_id_y()
|
||||
; AKF_HSA-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@recursive_use_workitem_id_y
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR12:[0-9]+]] {
|
||||
; ATTRIBUTOR_HSA-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
|
||||
; ATTRIBUTOR_HSA-NEXT: store volatile i32 [[VAL]], i32 addrspace(1)* undef, align 4
|
||||
; ATTRIBUTOR_HSA-NEXT: call void @recursive_use_workitem_id_y() #[[ATTR23:[0-9]+]]
|
||||
; ATTRIBUTOR_HSA-NEXT: unreachable
|
||||
;
|
||||
%val = call i32 @llvm.amdgcn.workitem.id.y()
|
||||
store volatile i32 %val, i32 addrspace(1)* undef
|
||||
@@ -294,10 +371,15 @@ define void @recursive_use_workitem_id_y() #1 {
|
||||
}
|
||||
|
||||
define void @call_recursive_use_workitem_id_y() #1 {
|
||||
; HSA-LABEL: define {{[^@]+}}@call_recursive_use_workitem_id_y
|
||||
; HSA-SAME: () #[[ATTR2]] {
|
||||
; HSA-NEXT: call void @recursive_use_workitem_id_y()
|
||||
; HSA-NEXT: ret void
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@call_recursive_use_workitem_id_y
|
||||
; AKF_HSA-SAME: () #[[ATTR2]] {
|
||||
; AKF_HSA-NEXT: call void @recursive_use_workitem_id_y()
|
||||
; AKF_HSA-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@call_recursive_use_workitem_id_y
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR2]] {
|
||||
; ATTRIBUTOR_HSA-NEXT: call void @recursive_use_workitem_id_y() #[[ATTR23]]
|
||||
; ATTRIBUTOR_HSA-NEXT: unreachable
|
||||
;
|
||||
call void @recursive_use_workitem_id_y()
|
||||
ret void
|
||||
@@ -315,12 +397,19 @@ define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #1 {
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
define void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* %ptr) #2 {
|
||||
; HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast_gfx9
|
||||
; HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR12:[0-9]+]] {
|
||||
; HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(3)* [[PTR]] to i32 addrspace(4)*
|
||||
; HSA-NEXT: store volatile i32 0, i32 addrspace(4)* [[STOF]], align 4
|
||||
; HSA-NEXT: ret void
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast_gfx9
|
||||
; AKF_HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR12:[0-9]+]] {
|
||||
; AKF_HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(3)* [[PTR]] to i32 addrspace(4)*
|
||||
; AKF_HSA-NEXT: store volatile i32 0, i32 addrspace(4)* [[STOF]], align 4
|
||||
; AKF_HSA-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast_gfx9
|
||||
; ATTRIBUTOR_HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR13:[0-9]+]] {
|
||||
; ATTRIBUTOR_HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(3)* [[PTR]] to i32 addrspace(4)*
|
||||
; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, i32 addrspace(4)* [[STOF]], align 4
|
||||
; ATTRIBUTOR_HSA-NEXT: ret void
|
||||
;
|
||||
%stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
|
||||
store volatile i32 0, i32 addrspace(4)* %stof
|
||||
@@ -328,12 +417,19 @@ define void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* %ptr) #2 {
|
||||
}
|
||||
|
||||
define void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* %ptr) #2 {
|
||||
; HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast_queue_ptr_gfx9
|
||||
; HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR13:[0-9]+]] {
|
||||
; HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(3)* [[PTR]] to i32 addrspace(4)*
|
||||
; HSA-NEXT: store volatile i32 0, i32 addrspace(4)* [[STOF]], align 4
|
||||
; HSA-NEXT: call void @func_indirect_use_queue_ptr()
|
||||
; HSA-NEXT: ret void
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast_queue_ptr_gfx9
|
||||
; AKF_HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR13:[0-9]+]] {
|
||||
; AKF_HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(3)* [[PTR]] to i32 addrspace(4)*
|
||||
; AKF_HSA-NEXT: store volatile i32 0, i32 addrspace(4)* [[STOF]], align 4
|
||||
; AKF_HSA-NEXT: call void @func_indirect_use_queue_ptr()
|
||||
; AKF_HSA-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_group_to_flat_addrspacecast_queue_ptr_gfx9
|
||||
; ATTRIBUTOR_HSA-SAME: (i32 addrspace(3)* [[PTR:%.*]]) #[[ATTR14:[0-9]+]] {
|
||||
; ATTRIBUTOR_HSA-NEXT: [[STOF:%.*]] = addrspacecast i32 addrspace(3)* [[PTR]] to i32 addrspace(4)*
|
||||
; ATTRIBUTOR_HSA-NEXT: store volatile i32 0, i32 addrspace(4)* [[STOF]], align 4
|
||||
; ATTRIBUTOR_HSA-NEXT: call void @func_indirect_use_queue_ptr() #[[ATTR22]]
|
||||
; ATTRIBUTOR_HSA-NEXT: ret void
|
||||
;
|
||||
%stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
|
||||
store volatile i32 0, i32 addrspace(4)* %stof
|
||||
@@ -342,63 +438,94 @@ define void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* %p
|
||||
}
|
||||
|
||||
define void @indirect_use_group_to_flat_addrspacecast() #1 {
|
||||
; HSA-LABEL: define {{[^@]+}}@indirect_use_group_to_flat_addrspacecast
|
||||
; HSA-SAME: () #[[ATTR8]] {
|
||||
; HSA-NEXT: call void @use_group_to_flat_addrspacecast(i32 addrspace(3)* null)
|
||||
; HSA-NEXT: ret void
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@indirect_use_group_to_flat_addrspacecast
|
||||
; AKF_HSA-SAME: () #[[ATTR8]] {
|
||||
; AKF_HSA-NEXT: call void @use_group_to_flat_addrspacecast(i32 addrspace(3)* null)
|
||||
; AKF_HSA-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@indirect_use_group_to_flat_addrspacecast
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR8]] {
|
||||
; ATTRIBUTOR_HSA-NEXT: call void @use_group_to_flat_addrspacecast(i32 addrspace(3)* null) #[[ATTR22]]
|
||||
; ATTRIBUTOR_HSA-NEXT: ret void
|
||||
;
|
||||
call void @use_group_to_flat_addrspacecast(i32 addrspace(3)* null)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @indirect_use_group_to_flat_addrspacecast_gfx9() #1 {
|
||||
; HSA-LABEL: define {{[^@]+}}@indirect_use_group_to_flat_addrspacecast_gfx9
|
||||
; HSA-SAME: () #[[ATTR11]] {
|
||||
; HSA-NEXT: call void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* null)
|
||||
; HSA-NEXT: ret void
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@indirect_use_group_to_flat_addrspacecast_gfx9
|
||||
; AKF_HSA-SAME: () #[[ATTR11]] {
|
||||
; AKF_HSA-NEXT: call void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* null)
|
||||
; AKF_HSA-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@indirect_use_group_to_flat_addrspacecast_gfx9
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR15:[0-9]+]] {
|
||||
; ATTRIBUTOR_HSA-NEXT: call void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* null) #[[ATTR22]]
|
||||
; ATTRIBUTOR_HSA-NEXT: ret void
|
||||
;
|
||||
call void @use_group_to_flat_addrspacecast_gfx9(i32 addrspace(3)* null)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @indirect_use_group_to_flat_addrspacecast_queue_ptr_gfx9() #1 {
|
||||
; HSA-LABEL: define {{[^@]+}}@indirect_use_group_to_flat_addrspacecast_queue_ptr_gfx9
|
||||
; HSA-SAME: () #[[ATTR8]] {
|
||||
; HSA-NEXT: call void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* null)
|
||||
; HSA-NEXT: ret void
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@indirect_use_group_to_flat_addrspacecast_queue_ptr_gfx9
|
||||
; AKF_HSA-SAME: () #[[ATTR8]] {
|
||||
; AKF_HSA-NEXT: call void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* null)
|
||||
; AKF_HSA-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@indirect_use_group_to_flat_addrspacecast_queue_ptr_gfx9
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR8]] {
|
||||
; ATTRIBUTOR_HSA-NEXT: call void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* null) #[[ATTR22]]
|
||||
; ATTRIBUTOR_HSA-NEXT: ret void
|
||||
;
|
||||
call void @use_group_to_flat_addrspacecast_queue_ptr_gfx9(i32 addrspace(3)* null)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @use_kernarg_segment_ptr() #1 {
|
||||
; HSA-LABEL: define {{[^@]+}}@use_kernarg_segment_ptr
|
||||
; HSA-SAME: () #[[ATTR14:[0-9]+]] {
|
||||
; HSA-NEXT: [[KERNARG_SEGMENT_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
||||
; HSA-NEXT: store volatile i8 addrspace(4)* [[KERNARG_SEGMENT_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8
|
||||
; HSA-NEXT: ret void
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@use_kernarg_segment_ptr
|
||||
; AKF_HSA-SAME: () #[[ATTR14:[0-9]+]] {
|
||||
; AKF_HSA-NEXT: [[KERNARG_SEGMENT_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
||||
; AKF_HSA-NEXT: store volatile i8 addrspace(4)* [[KERNARG_SEGMENT_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8
|
||||
; AKF_HSA-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_kernarg_segment_ptr
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR16:[0-9]+]] {
|
||||
; ATTRIBUTOR_HSA-NEXT: [[KERNARG_SEGMENT_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
||||
; ATTRIBUTOR_HSA-NEXT: store volatile i8 addrspace(4)* [[KERNARG_SEGMENT_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8
|
||||
; ATTRIBUTOR_HSA-NEXT: ret void
|
||||
;
|
||||
%kernarg.segment.ptr = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
|
||||
store volatile i8 addrspace(4)* %kernarg.segment.ptr, i8 addrspace(4)* addrspace(1)* undef
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @func_indirect_use_kernarg_segment_ptr() #1 {
|
||||
; HSA-LABEL: define {{[^@]+}}@func_indirect_use_kernarg_segment_ptr
|
||||
; HSA-SAME: () #[[ATTR11]] {
|
||||
; HSA-NEXT: call void @use_kernarg_segment_ptr()
|
||||
; HSA-NEXT: ret void
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_kernarg_segment_ptr
|
||||
; AKF_HSA-SAME: () #[[ATTR11]] {
|
||||
; AKF_HSA-NEXT: call void @use_kernarg_segment_ptr()
|
||||
; AKF_HSA-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_kernarg_segment_ptr
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR15]] {
|
||||
; ATTRIBUTOR_HSA-NEXT: call void @use_kernarg_segment_ptr() #[[ATTR22]]
|
||||
; ATTRIBUTOR_HSA-NEXT: ret void
|
||||
;
|
||||
call void @use_kernarg_segment_ptr()
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @kern_use_implicitarg_ptr() #1 {
|
||||
; HSA-LABEL: define {{[^@]+}}@kern_use_implicitarg_ptr
|
||||
; HSA-SAME: () #[[ATTR15:[0-9]+]] {
|
||||
; HSA-NEXT: [[IMPLICITARG_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
; HSA-NEXT: store volatile i8 addrspace(4)* [[IMPLICITARG_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8
|
||||
; HSA-NEXT: ret void
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@kern_use_implicitarg_ptr
|
||||
; AKF_HSA-SAME: () #[[ATTR15:[0-9]+]] {
|
||||
; AKF_HSA-NEXT: [[IMPLICITARG_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
; AKF_HSA-NEXT: store volatile i8 addrspace(4)* [[IMPLICITARG_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8
|
||||
; AKF_HSA-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_use_implicitarg_ptr
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR17:[0-9]+]] {
|
||||
; ATTRIBUTOR_HSA-NEXT: [[IMPLICITARG_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
; ATTRIBUTOR_HSA-NEXT: store volatile i8 addrspace(4)* [[IMPLICITARG_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8
|
||||
; ATTRIBUTOR_HSA-NEXT: ret void
|
||||
;
|
||||
%implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
store volatile i8 addrspace(4)* %implicitarg.ptr, i8 addrspace(4)* addrspace(1)* undef
|
||||
@@ -406,11 +533,17 @@ define amdgpu_kernel void @kern_use_implicitarg_ptr() #1 {
|
||||
}
|
||||
|
||||
define void @use_implicitarg_ptr() #1 {
|
||||
; HSA-LABEL: define {{[^@]+}}@use_implicitarg_ptr
|
||||
; HSA-SAME: () #[[ATTR16:[0-9]+]] {
|
||||
; HSA-NEXT: [[IMPLICITARG_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
; HSA-NEXT: store volatile i8 addrspace(4)* [[IMPLICITARG_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8
|
||||
; HSA-NEXT: ret void
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@use_implicitarg_ptr
|
||||
; AKF_HSA-SAME: () #[[ATTR16:[0-9]+]] {
|
||||
; AKF_HSA-NEXT: [[IMPLICITARG_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
; AKF_HSA-NEXT: store volatile i8 addrspace(4)* [[IMPLICITARG_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8
|
||||
; AKF_HSA-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_implicitarg_ptr
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR17]] {
|
||||
; ATTRIBUTOR_HSA-NEXT: [[IMPLICITARG_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
; ATTRIBUTOR_HSA-NEXT: store volatile i8 addrspace(4)* [[IMPLICITARG_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8
|
||||
; ATTRIBUTOR_HSA-NEXT: ret void
|
||||
;
|
||||
%implicitarg.ptr = call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
|
||||
store volatile i8 addrspace(4)* %implicitarg.ptr, i8 addrspace(4)* addrspace(1)* undef
|
||||
@@ -418,10 +551,15 @@ define void @use_implicitarg_ptr() #1 {
|
||||
}
|
||||
|
||||
define void @func_indirect_use_implicitarg_ptr() #1 {
|
||||
; HSA-LABEL: define {{[^@]+}}@func_indirect_use_implicitarg_ptr
|
||||
; HSA-SAME: () #[[ATTR16]] {
|
||||
; HSA-NEXT: call void @use_implicitarg_ptr()
|
||||
; HSA-NEXT: ret void
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_implicitarg_ptr
|
||||
; AKF_HSA-SAME: () #[[ATTR16]] {
|
||||
; AKF_HSA-NEXT: call void @use_implicitarg_ptr()
|
||||
; AKF_HSA-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_implicitarg_ptr
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR17]] {
|
||||
; ATTRIBUTOR_HSA-NEXT: call void @use_implicitarg_ptr() #[[ATTR22]]
|
||||
; ATTRIBUTOR_HSA-NEXT: ret void
|
||||
;
|
||||
call void @use_implicitarg_ptr()
|
||||
ret void
|
||||
@@ -429,70 +567,99 @@ define void @func_indirect_use_implicitarg_ptr() #1 {
|
||||
|
||||
declare void @external.func() #3
|
||||
|
||||
; This function gets deleted.
|
||||
define internal void @defined.func() #3 {
|
||||
; HSA-LABEL: define {{[^@]+}}@defined.func
|
||||
; HSA-SAME: () #[[ATTR17:[0-9]+]] {
|
||||
; HSA-NEXT: ret void
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@defined.func
|
||||
; AKF_HSA-SAME: () #[[ATTR17:[0-9]+]] {
|
||||
; AKF_HSA-NEXT: ret void
|
||||
;
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @func_call_external() #3 {
|
||||
; HSA-LABEL: define {{[^@]+}}@func_call_external
|
||||
; HSA-SAME: () #[[ATTR17]] {
|
||||
; HSA-NEXT: call void @external.func()
|
||||
; HSA-NEXT: ret void
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@func_call_external
|
||||
; AKF_HSA-SAME: () #[[ATTR17]] {
|
||||
; AKF_HSA-NEXT: call void @external.func()
|
||||
; AKF_HSA-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_call_external
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR18:[0-9]+]] {
|
||||
; ATTRIBUTOR_HSA-NEXT: call void @external.func() #[[ATTR22]]
|
||||
; ATTRIBUTOR_HSA-NEXT: ret void
|
||||
;
|
||||
call void @external.func()
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @func_call_defined() #3 {
|
||||
; HSA-LABEL: define {{[^@]+}}@func_call_defined
|
||||
; HSA-SAME: () #[[ATTR17]] {
|
||||
; HSA-NEXT: call void @defined.func()
|
||||
; HSA-NEXT: ret void
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@func_call_defined
|
||||
; AKF_HSA-SAME: () #[[ATTR17]] {
|
||||
; AKF_HSA-NEXT: call void @defined.func()
|
||||
; AKF_HSA-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_call_defined
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR19:[0-9]+]] {
|
||||
; ATTRIBUTOR_HSA-NEXT: ret void
|
||||
;
|
||||
call void @defined.func()
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @func_call_asm() #3 {
|
||||
; HSA-LABEL: define {{[^@]+}}@func_call_asm
|
||||
; HSA-SAME: () #[[ATTR18:[0-9]+]] {
|
||||
; HSA-NEXT: call void asm sideeffect "", ""() #[[ATTR18]]
|
||||
; HSA-NEXT: ret void
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@func_call_asm
|
||||
; AKF_HSA-SAME: () #[[ATTR18:[0-9]+]] {
|
||||
; AKF_HSA-NEXT: call void asm sideeffect "", ""() #[[ATTR18]]
|
||||
; AKF_HSA-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_call_asm
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR19]] {
|
||||
; ATTRIBUTOR_HSA-NEXT: call void asm sideeffect "", ""() #[[ATTR22]]
|
||||
; ATTRIBUTOR_HSA-NEXT: ret void
|
||||
;
|
||||
call void asm sideeffect "", ""() #3
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @kern_call_external() #3 {
|
||||
; HSA-LABEL: define {{[^@]+}}@kern_call_external
|
||||
; HSA-SAME: () #[[ATTR19:[0-9]+]] {
|
||||
; HSA-NEXT: call void @external.func()
|
||||
; HSA-NEXT: ret void
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@kern_call_external
|
||||
; AKF_HSA-SAME: () #[[ATTR19:[0-9]+]] {
|
||||
; AKF_HSA-NEXT: call void @external.func()
|
||||
; AKF_HSA-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@kern_call_external
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR20:[0-9]+]] {
|
||||
; ATTRIBUTOR_HSA-NEXT: call void @external.func() #[[ATTR22]]
|
||||
; ATTRIBUTOR_HSA-NEXT: ret void
|
||||
;
|
||||
call void @external.func()
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @func_kern_defined() #3 {
|
||||
; HSA-LABEL: define {{[^@]+}}@func_kern_defined
|
||||
; HSA-SAME: () #[[ATTR19]] {
|
||||
; HSA-NEXT: call void @defined.func()
|
||||
; HSA-NEXT: ret void
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@func_kern_defined
|
||||
; AKF_HSA-SAME: () #[[ATTR19]] {
|
||||
; AKF_HSA-NEXT: call void @defined.func()
|
||||
; AKF_HSA-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_kern_defined
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR19]] {
|
||||
; ATTRIBUTOR_HSA-NEXT: ret void
|
||||
;
|
||||
call void @defined.func()
|
||||
ret void
|
||||
}
|
||||
|
||||
define i32 @use_dispatch_ptr_ret_type() #1 {
|
||||
; HSA-LABEL: define {{[^@]+}}@use_dispatch_ptr_ret_type
|
||||
; HSA-SAME: () #[[ATTR20:[0-9]+]] {
|
||||
; HSA-NEXT: [[DISPATCH_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
|
||||
; HSA-NEXT: store volatile i8 addrspace(4)* [[DISPATCH_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8
|
||||
; HSA-NEXT: ret i32 0
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@use_dispatch_ptr_ret_type
|
||||
; AKF_HSA-SAME: () #[[ATTR20:[0-9]+]] {
|
||||
; AKF_HSA-NEXT: [[DISPATCH_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
|
||||
; AKF_HSA-NEXT: store volatile i8 addrspace(4)* [[DISPATCH_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8
|
||||
; AKF_HSA-NEXT: ret i32 0
|
||||
;
|
||||
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@use_dispatch_ptr_ret_type
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR21:[0-9]+]] {
|
||||
; ATTRIBUTOR_HSA-NEXT: [[DISPATCH_PTR:%.*]] = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
|
||||
; ATTRIBUTOR_HSA-NEXT: store volatile i8 addrspace(4)* [[DISPATCH_PTR]], i8 addrspace(4)* addrspace(1)* undef, align 8
|
||||
; ATTRIBUTOR_HSA-NEXT: ret i32 0
|
||||
;
|
||||
%dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr()
|
||||
store volatile i8 addrspace(4)* %dispatch.ptr, i8 addrspace(4)* addrspace(1)* undef
|
||||
@@ -500,11 +667,17 @@ define i32 @use_dispatch_ptr_ret_type() #1 {
|
||||
}
|
||||
|
||||
define float @func_indirect_use_dispatch_ptr_constexpr_cast_func() #1 {
|
||||
; HSA-LABEL: define {{[^@]+}}@func_indirect_use_dispatch_ptr_constexpr_cast_func
|
||||
; HSA-SAME: () #[[ATTR20]] {
|
||||
; HSA-NEXT: [[F:%.*]] = call float bitcast (i32 ()* @use_dispatch_ptr_ret_type to float ()*)()
|
||||
; HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
|
||||
; HSA-NEXT: ret float [[FADD]]
|
||||
; AKF_HSA-LABEL: define {{[^@]+}}@func_indirect_use_dispatch_ptr_constexpr_cast_func
|
||||
; AKF_HSA-SAME: () #[[ATTR20]] {
|
||||
; AKF_HSA-NEXT: [[F:%.*]] = call float bitcast (i32 ()* @use_dispatch_ptr_ret_type to float ()*)()
|
||||
; AKF_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
|
||||
; AKF_HSA-NEXT: ret float [[FADD]]
|
||||
;
|
||||
; ATTRIBUTOR_HSA-LABEL: define {{[^@]+}}@func_indirect_use_dispatch_ptr_constexpr_cast_func
|
||||
; ATTRIBUTOR_HSA-SAME: () #[[ATTR21]] {
|
||||
; ATTRIBUTOR_HSA-NEXT: [[F:%.*]] = call float bitcast (i32 ()* @use_dispatch_ptr_ret_type to float ()*)()
|
||||
; ATTRIBUTOR_HSA-NEXT: [[FADD:%.*]] = fadd float [[F]], 1.000000e+00
|
||||
; ATTRIBUTOR_HSA-NEXT: ret float [[FADD]]
|
||||
;
|
||||
%f = call float bitcast (i32()* @use_dispatch_ptr_ret_type to float()*)()
|
||||
%fadd = fadd float %f, 1.0
|
||||
@@ -517,25 +690,50 @@ attributes #2 = { nounwind "target-cpu"="gfx900" }
|
||||
attributes #3 = { nounwind }
|
||||
|
||||
;.
|
||||
; HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn }
|
||||
; HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-work-item-id-x" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-work-item-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-work-item-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-work-group-id-x" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-work-group-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-work-group-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-dispatch-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-queue-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-dispatch-id" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "target-cpu"="fiji" }
|
||||
; HSA: attributes #[[ATTR11]] = { nounwind "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; HSA: attributes #[[ATTR12]] = { nounwind "target-cpu"="gfx900" "uniform-work-group-size"="false" }
|
||||
; HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-queue-ptr" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
|
||||
; HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-kernarg-segment-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; HSA: attributes #[[ATTR15]] = { nounwind "amdgpu-implicitarg-ptr" "target-cpu"="fiji" }
|
||||
; HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-implicitarg-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; HSA: attributes #[[ATTR17]] = { nounwind "uniform-work-group-size"="false" }
|
||||
; HSA: attributes #[[ATTR18]] = { nounwind }
|
||||
; HSA: attributes #[[ATTR19]] = { nounwind "amdgpu-calls" "uniform-work-group-size"="false" }
|
||||
; HSA: attributes #[[ATTR20]] = { nounwind "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "target-cpu"="fiji" }
|
||||
; AKF_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn }
|
||||
; AKF_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-work-item-id-x" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; AKF_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-work-item-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; AKF_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-work-item-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; AKF_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-work-group-id-x" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; AKF_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-work-group-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; AKF_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-work-group-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; AKF_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-dispatch-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; AKF_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-queue-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; AKF_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-dispatch-id" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; AKF_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "target-cpu"="fiji" }
|
||||
; AKF_HSA: attributes #[[ATTR11]] = { nounwind "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; AKF_HSA: attributes #[[ATTR12]] = { nounwind "target-cpu"="gfx900" "uniform-work-group-size"="false" }
|
||||
; AKF_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-queue-ptr" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
|
||||
; AKF_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-kernarg-segment-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; AKF_HSA: attributes #[[ATTR15]] = { nounwind "amdgpu-implicitarg-ptr" "target-cpu"="fiji" }
|
||||
; AKF_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-implicitarg-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; AKF_HSA: attributes #[[ATTR17]] = { nounwind "uniform-work-group-size"="false" }
|
||||
; AKF_HSA: attributes #[[ATTR18]] = { nounwind }
|
||||
; AKF_HSA: attributes #[[ATTR19]] = { nounwind "amdgpu-calls" "uniform-work-group-size"="false" }
|
||||
; AKF_HSA: attributes #[[ATTR20]] = { nounwind "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "target-cpu"="fiji" }
|
||||
;.
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-work-item-id-x" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-work-item-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-work-item-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-work-group-id-x" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-work-group-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-work-group-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-dispatch-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-queue-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-dispatch-id" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { noreturn nounwind readnone "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { noreturn nounwind "amdgpu-work-item-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR13]] = { nounwind "target-cpu"="gfx900" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-queue-ptr" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR15]] = { nounwind "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-kernarg-segment-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR17]] = { nounwind "amdgpu-implicitarg-ptr" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR19]] = { nounwind "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR20]] = { nounwind "amdgpu-calls" "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR21]] = { nounwind "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR22]] = { nounwind }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR23]] = { noreturn nounwind }
|
||||
;.
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
|
||||
; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=HSA %s
|
||||
; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=HSA,AKF_HSA %s
|
||||
; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-attributor < %s | FileCheck -check-prefixes=HSA,ATTRIBUTOR_HSA %s
|
||||
|
||||
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
|
||||
|
||||
@@ -478,18 +479,33 @@ attributes #0 = { nounwind readnone speculatable }
|
||||
attributes #1 = { nounwind }
|
||||
|
||||
;.
|
||||
; HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn }
|
||||
; HSA: attributes #[[ATTR1]] = { nounwind }
|
||||
; HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-work-group-id-y" }
|
||||
; HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-work-group-id-z" }
|
||||
; HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" }
|
||||
; HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-work-item-id-y" }
|
||||
; HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-work-item-id-z" }
|
||||
; HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-item-id-y" }
|
||||
; HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
|
||||
; HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
|
||||
; HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-dispatch-ptr" }
|
||||
; HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-queue-ptr" }
|
||||
; HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-kernarg-segment-ptr" }
|
||||
; HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-stack-objects" }
|
||||
; AKF_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn }
|
||||
; AKF_HSA: attributes #[[ATTR1]] = { nounwind }
|
||||
; AKF_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-work-group-id-y" }
|
||||
; AKF_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-work-group-id-z" }
|
||||
; AKF_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" }
|
||||
; AKF_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-work-item-id-y" }
|
||||
; AKF_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-work-item-id-z" }
|
||||
; AKF_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-item-id-y" }
|
||||
; AKF_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
|
||||
; AKF_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
|
||||
; AKF_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-dispatch-ptr" }
|
||||
; AKF_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-queue-ptr" }
|
||||
; AKF_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-kernarg-segment-ptr" }
|
||||
; AKF_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-stack-objects" }
|
||||
;.
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-work-group-id-y" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-work-group-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-work-item-id-y" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-work-item-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-item-id-y" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-dispatch-ptr" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-queue-ptr" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-kernarg-segment-ptr" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-stack-objects" "uniform-work-group-size"="false" }
|
||||
;.
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-annotate-kernel-features < %s | FileCheck %s
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
|
||||
; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=CHECK,AKF_CHECK %s
|
||||
; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-attributor < %s | FileCheck -check-prefixes=CHECK,ATTRIBUTOR_CHECK %s
|
||||
|
||||
declare i32 @llvm.r600.read.tgid.x() #0
|
||||
declare i32 @llvm.r600.read.tgid.y() #0
|
||||
@@ -14,9 +15,10 @@ declare i32 @llvm.r600.read.local.size.y() #0
|
||||
declare i32 @llvm.r600.read.local.size.z() #0
|
||||
|
||||
define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
|
||||
; CHECK-LABEL: @use_tgid_x(
|
||||
; CHECK-LABEL: define {{[^@]+}}@use_tgid_x
|
||||
; CHECK-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR1:[0-9]+]] {
|
||||
; CHECK-NEXT: [[VAL:%.*]] = call i32 @llvm.r600.read.tgid.x()
|
||||
; CHECK-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR:%.*]], align 4
|
||||
; CHECK-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%val = call i32 @llvm.r600.read.tgid.x()
|
||||
@@ -25,9 +27,10 @@ define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 {
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @use_tgid_y(i32 addrspace(1)* %ptr) #1 {
|
||||
; CHECK-LABEL: @use_tgid_y(
|
||||
; CHECK-LABEL: define {{[^@]+}}@use_tgid_y
|
||||
; CHECK-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR2:[0-9]+]] {
|
||||
; CHECK-NEXT: [[VAL:%.*]] = call i32 @llvm.r600.read.tgid.y()
|
||||
; CHECK-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR:%.*]], align 4
|
||||
; CHECK-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%val = call i32 @llvm.r600.read.tgid.y()
|
||||
@@ -36,9 +39,10 @@ define amdgpu_kernel void @use_tgid_y(i32 addrspace(1)* %ptr) #1 {
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #1 {
|
||||
; CHECK-LABEL: @multi_use_tgid_y(
|
||||
; CHECK-LABEL: define {{[^@]+}}@multi_use_tgid_y
|
||||
; CHECK-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR2]] {
|
||||
; CHECK-NEXT: [[VAL0:%.*]] = call i32 @llvm.r600.read.tgid.y()
|
||||
; CHECK-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR:%.*]], align 4
|
||||
; CHECK-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4
|
||||
; CHECK-NEXT: [[VAL1:%.*]] = call i32 @llvm.r600.read.tgid.y()
|
||||
; CHECK-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
@@ -51,10 +55,11 @@ define amdgpu_kernel void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #1 {
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @use_tgid_x_y(i32 addrspace(1)* %ptr) #1 {
|
||||
; CHECK-LABEL: @use_tgid_x_y(
|
||||
; CHECK-LABEL: define {{[^@]+}}@use_tgid_x_y
|
||||
; CHECK-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR2]] {
|
||||
; CHECK-NEXT: [[VAL0:%.*]] = call i32 @llvm.r600.read.tgid.x()
|
||||
; CHECK-NEXT: [[VAL1:%.*]] = call i32 @llvm.r600.read.tgid.y()
|
||||
; CHECK-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR:%.*]], align 4
|
||||
; CHECK-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4
|
||||
; CHECK-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
@@ -66,9 +71,10 @@ define amdgpu_kernel void @use_tgid_x_y(i32 addrspace(1)* %ptr) #1 {
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @use_tgid_z(i32 addrspace(1)* %ptr) #1 {
|
||||
; CHECK-LABEL: @use_tgid_z(
|
||||
; CHECK-LABEL: define {{[^@]+}}@use_tgid_z
|
||||
; CHECK-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR3:[0-9]+]] {
|
||||
; CHECK-NEXT: [[VAL:%.*]] = call i32 @llvm.r600.read.tgid.z()
|
||||
; CHECK-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR:%.*]], align 4
|
||||
; CHECK-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%val = call i32 @llvm.r600.read.tgid.z()
|
||||
@@ -77,10 +83,11 @@ define amdgpu_kernel void @use_tgid_z(i32 addrspace(1)* %ptr) #1 {
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @use_tgid_x_z(i32 addrspace(1)* %ptr) #1 {
|
||||
; CHECK-LABEL: @use_tgid_x_z(
|
||||
; CHECK-LABEL: define {{[^@]+}}@use_tgid_x_z
|
||||
; CHECK-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR3]] {
|
||||
; CHECK-NEXT: [[VAL0:%.*]] = call i32 @llvm.r600.read.tgid.x()
|
||||
; CHECK-NEXT: [[VAL1:%.*]] = call i32 @llvm.r600.read.tgid.z()
|
||||
; CHECK-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR:%.*]], align 4
|
||||
; CHECK-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4
|
||||
; CHECK-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
@@ -92,10 +99,11 @@ define amdgpu_kernel void @use_tgid_x_z(i32 addrspace(1)* %ptr) #1 {
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @use_tgid_y_z(i32 addrspace(1)* %ptr) #1 {
|
||||
; CHECK-LABEL: @use_tgid_y_z(
|
||||
; CHECK-LABEL: define {{[^@]+}}@use_tgid_y_z
|
||||
; CHECK-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR4:[0-9]+]] {
|
||||
; CHECK-NEXT: [[VAL0:%.*]] = call i32 @llvm.r600.read.tgid.y()
|
||||
; CHECK-NEXT: [[VAL1:%.*]] = call i32 @llvm.r600.read.tgid.z()
|
||||
; CHECK-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR:%.*]], align 4
|
||||
; CHECK-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4
|
||||
; CHECK-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
@@ -107,11 +115,12 @@ define amdgpu_kernel void @use_tgid_y_z(i32 addrspace(1)* %ptr) #1 {
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #1 {
|
||||
; CHECK-LABEL: @use_tgid_x_y_z(
|
||||
; CHECK-LABEL: define {{[^@]+}}@use_tgid_x_y_z
|
||||
; CHECK-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR4]] {
|
||||
; CHECK-NEXT: [[VAL0:%.*]] = call i32 @llvm.r600.read.tgid.x()
|
||||
; CHECK-NEXT: [[VAL1:%.*]] = call i32 @llvm.r600.read.tgid.y()
|
||||
; CHECK-NEXT: [[VAL2:%.*]] = call i32 @llvm.r600.read.tgid.z()
|
||||
; CHECK-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR:%.*]], align 4
|
||||
; CHECK-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4
|
||||
; CHECK-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
|
||||
; CHECK-NEXT: store volatile i32 [[VAL2]], i32 addrspace(1)* [[PTR]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
@@ -126,9 +135,10 @@ define amdgpu_kernel void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #1 {
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @use_tidig_x(i32 addrspace(1)* %ptr) #1 {
|
||||
; CHECK-LABEL: @use_tidig_x(
|
||||
; CHECK-LABEL: define {{[^@]+}}@use_tidig_x
|
||||
; CHECK-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR1]] {
|
||||
; CHECK-NEXT: [[VAL:%.*]] = call i32 @llvm.r600.read.tidig.x()
|
||||
; CHECK-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR:%.*]], align 4
|
||||
; CHECK-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%val = call i32 @llvm.r600.read.tidig.x()
|
||||
@@ -137,9 +147,10 @@ define amdgpu_kernel void @use_tidig_x(i32 addrspace(1)* %ptr) #1 {
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @use_tidig_y(i32 addrspace(1)* %ptr) #1 {
|
||||
; CHECK-LABEL: @use_tidig_y(
|
||||
; CHECK-LABEL: define {{[^@]+}}@use_tidig_y
|
||||
; CHECK-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR5:[0-9]+]] {
|
||||
; CHECK-NEXT: [[VAL:%.*]] = call i32 @llvm.r600.read.tidig.y()
|
||||
; CHECK-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR:%.*]], align 4
|
||||
; CHECK-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%val = call i32 @llvm.r600.read.tidig.y()
|
||||
@@ -148,9 +159,10 @@ define amdgpu_kernel void @use_tidig_y(i32 addrspace(1)* %ptr) #1 {
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @use_tidig_z(i32 addrspace(1)* %ptr) #1 {
|
||||
; CHECK-LABEL: @use_tidig_z(
|
||||
; CHECK-LABEL: define {{[^@]+}}@use_tidig_z
|
||||
; CHECK-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR6:[0-9]+]] {
|
||||
; CHECK-NEXT: [[VAL:%.*]] = call i32 @llvm.r600.read.tidig.z()
|
||||
; CHECK-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR:%.*]], align 4
|
||||
; CHECK-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%val = call i32 @llvm.r600.read.tidig.z()
|
||||
@@ -159,10 +171,11 @@ define amdgpu_kernel void @use_tidig_z(i32 addrspace(1)* %ptr) #1 {
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 {
|
||||
; CHECK-LABEL: @use_tidig_x_tgid_x(
|
||||
; CHECK-LABEL: define {{[^@]+}}@use_tidig_x_tgid_x
|
||||
; CHECK-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR1]] {
|
||||
; CHECK-NEXT: [[VAL0:%.*]] = call i32 @llvm.r600.read.tidig.x()
|
||||
; CHECK-NEXT: [[VAL1:%.*]] = call i32 @llvm.r600.read.tgid.x()
|
||||
; CHECK-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR:%.*]], align 4
|
||||
; CHECK-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4
|
||||
; CHECK-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
@@ -174,10 +187,11 @@ define amdgpu_kernel void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 {
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #1 {
|
||||
; CHECK-LABEL: @use_tidig_y_tgid_y(
|
||||
; CHECK-LABEL: define {{[^@]+}}@use_tidig_y_tgid_y
|
||||
; CHECK-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR7:[0-9]+]] {
|
||||
; CHECK-NEXT: [[VAL0:%.*]] = call i32 @llvm.r600.read.tidig.y()
|
||||
; CHECK-NEXT: [[VAL1:%.*]] = call i32 @llvm.r600.read.tgid.y()
|
||||
; CHECK-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR:%.*]], align 4
|
||||
; CHECK-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4
|
||||
; CHECK-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
@@ -189,11 +203,12 @@ define amdgpu_kernel void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #1 {
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #1 {
|
||||
; CHECK-LABEL: @use_tidig_x_y_z(
|
||||
; CHECK-LABEL: define {{[^@]+}}@use_tidig_x_y_z
|
||||
; CHECK-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR8:[0-9]+]] {
|
||||
; CHECK-NEXT: [[VAL0:%.*]] = call i32 @llvm.r600.read.tidig.x()
|
||||
; CHECK-NEXT: [[VAL1:%.*]] = call i32 @llvm.r600.read.tidig.y()
|
||||
; CHECK-NEXT: [[VAL2:%.*]] = call i32 @llvm.r600.read.tidig.z()
|
||||
; CHECK-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR:%.*]], align 4
|
||||
; CHECK-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4
|
||||
; CHECK-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
|
||||
; CHECK-NEXT: store volatile i32 [[VAL2]], i32 addrspace(1)* [[PTR]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
@@ -208,14 +223,15 @@ define amdgpu_kernel void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #1 {
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @use_all_workitems(i32 addrspace(1)* %ptr) #1 {
|
||||
; CHECK-LABEL: @use_all_workitems(
|
||||
; CHECK-LABEL: define {{[^@]+}}@use_all_workitems
|
||||
; CHECK-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR9:[0-9]+]] {
|
||||
; CHECK-NEXT: [[VAL0:%.*]] = call i32 @llvm.r600.read.tidig.x()
|
||||
; CHECK-NEXT: [[VAL1:%.*]] = call i32 @llvm.r600.read.tidig.y()
|
||||
; CHECK-NEXT: [[VAL2:%.*]] = call i32 @llvm.r600.read.tidig.z()
|
||||
; CHECK-NEXT: [[VAL3:%.*]] = call i32 @llvm.r600.read.tgid.x()
|
||||
; CHECK-NEXT: [[VAL4:%.*]] = call i32 @llvm.r600.read.tgid.y()
|
||||
; CHECK-NEXT: [[VAL5:%.*]] = call i32 @llvm.r600.read.tgid.z()
|
||||
; CHECK-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR:%.*]], align 4
|
||||
; CHECK-NEXT: store volatile i32 [[VAL0]], i32 addrspace(1)* [[PTR]], align 4
|
||||
; CHECK-NEXT: store volatile i32 [[VAL1]], i32 addrspace(1)* [[PTR]], align 4
|
||||
; CHECK-NEXT: store volatile i32 [[VAL2]], i32 addrspace(1)* [[PTR]], align 4
|
||||
; CHECK-NEXT: store volatile i32 [[VAL3]], i32 addrspace(1)* [[PTR]], align 4
|
||||
@@ -239,9 +255,10 @@ define amdgpu_kernel void @use_all_workitems(i32 addrspace(1)* %ptr) #1 {
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @use_get_local_size_x(i32 addrspace(1)* %ptr) #1 {
|
||||
; CHECK-LABEL: @use_get_local_size_x(
|
||||
; CHECK-LABEL: define {{[^@]+}}@use_get_local_size_x
|
||||
; CHECK-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR1]] {
|
||||
; CHECK-NEXT: [[VAL:%.*]] = call i32 @llvm.r600.read.local.size.x()
|
||||
; CHECK-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR:%.*]], align 4
|
||||
; CHECK-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%val = call i32 @llvm.r600.read.local.size.x()
|
||||
@@ -250,9 +267,10 @@ define amdgpu_kernel void @use_get_local_size_x(i32 addrspace(1)* %ptr) #1 {
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @use_get_local_size_y(i32 addrspace(1)* %ptr) #1 {
|
||||
; CHECK-LABEL: @use_get_local_size_y(
|
||||
; CHECK-LABEL: define {{[^@]+}}@use_get_local_size_y
|
||||
; CHECK-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR1]] {
|
||||
; CHECK-NEXT: [[VAL:%.*]] = call i32 @llvm.r600.read.local.size.y()
|
||||
; CHECK-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR:%.*]], align 4
|
||||
; CHECK-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%val = call i32 @llvm.r600.read.local.size.y()
|
||||
@@ -261,9 +279,10 @@ define amdgpu_kernel void @use_get_local_size_y(i32 addrspace(1)* %ptr) #1 {
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @use_get_local_size_z(i32 addrspace(1)* %ptr) #1 {
|
||||
; CHECK-LABEL: @use_get_local_size_z(
|
||||
; CHECK-LABEL: define {{[^@]+}}@use_get_local_size_z
|
||||
; CHECK-SAME: (i32 addrspace(1)* [[PTR:%.*]]) #[[ATTR1]] {
|
||||
; CHECK-NEXT: [[VAL:%.*]] = call i32 @llvm.r600.read.local.size.z()
|
||||
; CHECK-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR:%.*]], align 4
|
||||
; CHECK-NEXT: store i32 [[VAL]], i32 addrspace(1)* [[PTR]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
%val = call i32 @llvm.r600.read.local.size.z()
|
||||
@@ -274,14 +293,46 @@ define amdgpu_kernel void @use_get_local_size_z(i32 addrspace(1)* %ptr) #1 {
|
||||
attributes #0 = { nounwind readnone }
|
||||
attributes #1 = { nounwind }
|
||||
|
||||
; HSA: attributes #0 = { nounwind readnone }
|
||||
; HSA: attributes #1 = { nounwind }
|
||||
; HSA: attributes #2 = { nounwind "amdgpu-work-group-id-y" }
|
||||
; HSA: attributes #3 = { nounwind "amdgpu-work-group-id-z" }
|
||||
; HSA: attributes #4 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" }
|
||||
; HSA: attributes #5 = { nounwind "amdgpu-work-item-id-y" }
|
||||
; HSA: attributes #6 = { nounwind "amdgpu-work-item-id-z" }
|
||||
; HSA: attributes #7 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-item-id-y" }
|
||||
; HSA: attributes #8 = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
|
||||
; HSA: attributes #9 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
|
||||
; HSA: attributes #10 = { nounwind "amdgpu-dispatch-ptr" }
|
||||
; ALL: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn "uniform-work-group-size"="false" }
|
||||
; ALL: attributes #[[ATTR1]] = { nounwind "uniform-work-group-size"="false" }
|
||||
; ALL: attributes #[[ATTR2]] = { nounwind "amdgpu-work-group-id-y" "uniform-work-group-size"="false" }
|
||||
; ALL: attributes #[[ATTR3]] = { nounwind "amdgpu-work-group-id-z" "uniform-work-group-size"="false" }
|
||||
; ALL: attributes #[[ATTR4]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "uniform-work-group-size"="false" }
|
||||
; ALL: attributes #[[ATTR5]] = { nounwind "amdgpu-work-item-id-y" "uniform-work-group-size"="false" }
|
||||
; ALL: attributes #[[ATTR6]] = { nounwind "amdgpu-work-item-id-z" "uniform-work-group-size"="false" }
|
||||
; ALL: attributes #[[ATTR7]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-item-id-y" "uniform-work-group-size"="false" }
|
||||
; ALL: attributes #[[ATTR8]] = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" }
|
||||
; ALL: attributes #[[ATTR9]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" }
|
||||
; NOHSA: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn "uniform-work-group-size"="false" }
|
||||
; NOHSA: attributes #[[ATTR1]] = { nounwind "uniform-work-group-size"="false" }
|
||||
; NOHSA: attributes #[[ATTR2]] = { nounwind "amdgpu-work-group-id-y" "uniform-work-group-size"="false" }
|
||||
; NOHSA: attributes #[[ATTR3]] = { nounwind "amdgpu-work-group-id-z" "uniform-work-group-size"="false" }
|
||||
; NOHSA: attributes #[[ATTR4]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "uniform-work-group-size"="false" }
|
||||
; NOHSA: attributes #[[ATTR5]] = { nounwind "amdgpu-work-item-id-y" "uniform-work-group-size"="false" }
|
||||
; NOHSA: attributes #[[ATTR6]] = { nounwind "amdgpu-work-item-id-z" "uniform-work-group-size"="false" }
|
||||
; NOHSA: attributes #[[ATTR7]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-item-id-y" "uniform-work-group-size"="false" }
|
||||
; NOHSA: attributes #[[ATTR8]] = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" }
|
||||
; NOHSA: attributes #[[ATTR9]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" }
|
||||
;.
|
||||
; AKF_CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn }
|
||||
; AKF_CHECK: attributes #[[ATTR1]] = { nounwind }
|
||||
; AKF_CHECK: attributes #[[ATTR2]] = { nounwind "amdgpu-work-group-id-y" }
|
||||
; AKF_CHECK: attributes #[[ATTR3]] = { nounwind "amdgpu-work-group-id-z" }
|
||||
; AKF_CHECK: attributes #[[ATTR4]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" }
|
||||
; AKF_CHECK: attributes #[[ATTR5]] = { nounwind "amdgpu-work-item-id-y" }
|
||||
; AKF_CHECK: attributes #[[ATTR6]] = { nounwind "amdgpu-work-item-id-z" }
|
||||
; AKF_CHECK: attributes #[[ATTR7]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-item-id-y" }
|
||||
; AKF_CHECK: attributes #[[ATTR8]] = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
|
||||
; AKF_CHECK: attributes #[[ATTR9]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
|
||||
;.
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { nounwind "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { nounwind "amdgpu-work-group-id-y" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR3]] = { nounwind "amdgpu-work-group-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR4]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR5]] = { nounwind "amdgpu-work-item-id-y" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR6]] = { nounwind "amdgpu-work-item-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR7]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-item-id-y" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR8]] = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR9]] = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" }
|
||||
;.
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
|
||||
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=GCN,AKF_GCN %s
|
||||
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor < %s | FileCheck -check-prefixes=GCN,ATTRIBUTOR_GCN %s
|
||||
|
||||
define internal void @indirect() {
|
||||
; GCN-LABEL: define {{[^@]+}}@indirect
|
||||
@@ -10,13 +11,20 @@ define internal void @indirect() {
|
||||
}
|
||||
|
||||
define internal void @direct() {
|
||||
; GCN-LABEL: define {{[^@]+}}@direct
|
||||
; GCN-SAME: () #[[ATTR1:[0-9]+]] {
|
||||
; GCN-NEXT: [[FPTR:%.*]] = alloca void ()*, align 8
|
||||
; GCN-NEXT: store void ()* @indirect, void ()** [[FPTR]], align 8
|
||||
; GCN-NEXT: [[FP:%.*]] = load void ()*, void ()** [[FPTR]], align 8
|
||||
; GCN-NEXT: call void [[FP]]()
|
||||
; GCN-NEXT: ret void
|
||||
; AKF_GCN-LABEL: define {{[^@]+}}@direct
|
||||
; AKF_GCN-SAME: () #[[ATTR1:[0-9]+]] {
|
||||
; AKF_GCN-NEXT: [[FPTR:%.*]] = alloca void ()*, align 8
|
||||
; AKF_GCN-NEXT: store void ()* @indirect, void ()** [[FPTR]], align 8
|
||||
; AKF_GCN-NEXT: [[FP:%.*]] = load void ()*, void ()** [[FPTR]], align 8
|
||||
; AKF_GCN-NEXT: call void [[FP]]()
|
||||
; AKF_GCN-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@direct
|
||||
; ATTRIBUTOR_GCN-SAME: () #[[ATTR1:[0-9]+]] {
|
||||
; ATTRIBUTOR_GCN-NEXT: [[FPTR:%.*]] = alloca void ()*, align 8
|
||||
; ATTRIBUTOR_GCN-NEXT: store void ()* @indirect, void ()** [[FPTR]], align 8
|
||||
; ATTRIBUTOR_GCN-NEXT: call void @indirect()
|
||||
; ATTRIBUTOR_GCN-NEXT: ret void
|
||||
;
|
||||
%fptr = alloca void()*
|
||||
store void()* @indirect, void()** %fptr
|
||||
@@ -35,7 +43,11 @@ define amdgpu_kernel void @test_direct_indirect_call() {
|
||||
ret void
|
||||
}
|
||||
;.
|
||||
; GCN: attributes #[[ATTR0]] = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
|
||||
; GCN: attributes #[[ATTR1]] = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-stack-objects" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" }
|
||||
; GCN: attributes #[[ATTR2]] = { "amdgpu-calls" "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" }
|
||||
; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
|
||||
; AKF_GCN: attributes #[[ATTR1]] = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-stack-objects" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" }
|
||||
; AKF_GCN: attributes #[[ATTR2]] = { "amdgpu-calls" "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" }
|
||||
;.
|
||||
; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-stack-objects" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_GCN: attributes #[[ATTR2]] = { "amdgpu-calls" "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" }
|
||||
;.
|
||||
|
||||
@@ -1,11 +1,15 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
|
||||
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features %s | FileCheck -check-prefixes=GCN,AKF_GCN %s
|
||||
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck -check-prefixes=GCN,ATTRIBUTOR_GCN %s
|
||||
|
||||
define internal void @indirect() {
|
||||
; GCN-LABEL: define {{[^@]+}}@indirect
|
||||
; GCN-SAME: () #[[ATTR0:[0-9]+]] {
|
||||
; GCN-NEXT: ret void
|
||||
;
|
||||
; CHECK-LABEL: define {{[^@]+}}@indirect
|
||||
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
|
||||
; CHECK-NEXT: ret void
|
||||
ret void
|
||||
}
|
||||
|
||||
@@ -18,6 +22,13 @@ define amdgpu_kernel void @test_simple_indirect_call() #0 {
|
||||
; GCN-NEXT: call void [[FP]]()
|
||||
; GCN-NEXT: ret void
|
||||
;
|
||||
; CHECK-LABEL: define {{[^@]+}}@test_simple_indirect_call
|
||||
; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
|
||||
; CHECK-NEXT: [[FPTR:%.*]] = alloca void ()*, align 8
|
||||
; CHECK-NEXT: store void ()* @indirect, void ()** [[FPTR]], align 8
|
||||
; CHECK-NEXT: [[FP:%.*]] = load void ()*, void ()** [[FPTR]], align 8
|
||||
; CHECK-NEXT: call void [[FP]]()
|
||||
; CHECK-NEXT: ret void
|
||||
%fptr = alloca void()*
|
||||
store void()* @indirect, void()** %fptr
|
||||
%fp = load void()*, void()** %fptr
|
||||
@@ -28,6 +39,9 @@ define amdgpu_kernel void @test_simple_indirect_call() #0 {
|
||||
attributes #0 = { "amdgpu-dispatch-id" }
|
||||
|
||||
;.
|
||||
; GCN: attributes #[[ATTR0]] = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
|
||||
; GCN: attributes #[[ATTR1]] = { "amdgpu-calls" "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-stack-objects" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
|
||||
; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
|
||||
; AKF_GCN: attributes #[[ATTR1]] = { "amdgpu-calls" "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-stack-objects" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
|
||||
;.
|
||||
; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-calls" "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-stack-objects" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" }
|
||||
;.
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
|
||||
; Check that no attributes are added to graphics functions
|
||||
; RUN: opt -S -mtriple=amdgcn-amd-amdpal -amdgpu-annotate-kernel-features %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: opt -S -mtriple=amdgcn-amd-amdpal -amdgpu-annotate-kernel-features %s | FileCheck -check-prefixes=AKF_GCN %s
|
||||
; RUN: opt -S -mtriple=amdgcn-amd-amdpal -amdgpu-attributor %s | FileCheck -check-prefixes=ATTRIBUTOR_GCN %s
|
||||
|
||||
; Check that it doesn't crash
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s
|
||||
@@ -9,8 +10,22 @@
|
||||
|
||||
target datalayout = "A5"
|
||||
|
||||
|
||||
define amdgpu_cs void @test_simple_indirect_call() {
|
||||
; GCN-LABEL: define amdgpu_cs void @test_simple_indirect_call() {
|
||||
; AKF_GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call() {
|
||||
; AKF_GCN-NEXT: [[PC:%.*]] = call i64 @llvm.amdgcn.s.getpc()
|
||||
; AKF_GCN-NEXT: [[FUN:%.*]] = inttoptr i64 [[PC]] to void ()*
|
||||
; AKF_GCN-NEXT: call amdgpu_gfx void [[FUN]]()
|
||||
; AKF_GCN-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_GCN-LABEL: define {{[^@]+}}@test_simple_indirect_call
|
||||
; ATTRIBUTOR_GCN-SAME: () #[[ATTR0:[0-9]+]] {
|
||||
; ATTRIBUTOR_GCN-NEXT: [[PC:%.*]] = call i64 @llvm.amdgcn.s.getpc()
|
||||
; ATTRIBUTOR_GCN-NEXT: [[FUN:%.*]] = inttoptr i64 [[PC]] to void ()*
|
||||
; ATTRIBUTOR_GCN-NEXT: call amdgpu_gfx void [[FUN]]()
|
||||
; ATTRIBUTOR_GCN-NEXT: ret void
|
||||
;
|
||||
; Attributor adds work-group-size attribute. This should be ok.
|
||||
; GFX9-LABEL: test_simple_indirect_call:
|
||||
; GFX9: ; %bb.0:
|
||||
; GFX9-NEXT: s_getpc_b64 s[36:37]
|
||||
@@ -25,7 +40,6 @@ define amdgpu_cs void @test_simple_indirect_call() {
|
||||
; GFX9-NEXT: s_mov_b64 s[2:3], s[38:39]
|
||||
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
|
||||
; GFX9-NEXT: s_endpgm
|
||||
;
|
||||
; GFX10-LABEL: test_simple_indirect_call:
|
||||
; GFX10: ; %bb.0:
|
||||
; GFX10-NEXT: s_getpc_b64 s[36:37]
|
||||
@@ -53,3 +67,9 @@ define amdgpu_cs void @test_simple_indirect_call() {
|
||||
declare i64 @llvm.amdgcn.s.getpc() #0
|
||||
|
||||
attributes #0 = { nounwind readnone speculatable willreturn }
|
||||
;.
|
||||
; AKF_GCN: attributes #[[ATTR0:[0-9]+]] = { nounwind readnone speculatable willreturn }
|
||||
;.
|
||||
; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_GCN: attributes #[[ATTR1:[0-9]+]] = { nounwind readnone speculatable willreturn "uniform-work-group-size"="false" }
|
||||
;.
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
|
||||
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features %s | FileCheck -check-prefixes=GCN,AKF_GCN %s
|
||||
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck -check-prefixes=GCN,ATTRIBUTOR_GCN %s
|
||||
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
@@ -59,9 +60,10 @@ define amdgpu_kernel void @test_simple_indirect_call() {
|
||||
ret void
|
||||
}
|
||||
|
||||
; attributes #0 = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
|
||||
; attributes #1 = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-stack-objects" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" }
|
||||
;.
|
||||
; GCN: attributes #[[ATTR0]] = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
|
||||
; GCN: attributes #[[ATTR1]] = { "amdgpu-calls" "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-stack-objects" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
|
||||
; AKF_GCN: attributes #[[ATTR0]] = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
|
||||
; AKF_GCN: attributes #[[ATTR1]] = { "amdgpu-calls" "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-stack-objects" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" }
|
||||
;.
|
||||
; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-calls" "amdgpu-dispatch-id" "amdgpu-dispatch-ptr" "amdgpu-implicitarg-ptr" "amdgpu-queue-ptr" "amdgpu-stack-objects" "amdgpu-work-group-id-x" "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-x" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" "uniform-work-group-size"="false" }
|
||||
;.
|
||||
|
||||
@@ -1,29 +1,46 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
|
||||
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s
|
||||
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck -check-prefixes=CHECK,AKF_CHECK %s
|
||||
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-attributor %s | FileCheck -check-prefixes=CHECK,ATTRIBUTOR_CHECK %s
|
||||
|
||||
; If the kernel does not have the uniform-work-group-attribute, set both callee and caller as false
|
||||
; We write to a global so that the attributor don't deletes the function.
|
||||
|
||||
@x = global i32 0
|
||||
|
||||
;.
|
||||
; CHECK: @[[X:[a-zA-Z0-9_$"\\.-]+]] = global i32 0
|
||||
;.
|
||||
define void @foo() #0 {
|
||||
; CHECK-LABEL: define {{[^@]+}}@foo
|
||||
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
|
||||
; CHECK-NEXT: store i32 0, i32* @x, align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
store i32 0, i32* @x
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @kernel1() #1 {
|
||||
; CHECK-LABEL: define {{[^@]+}}@kernel1
|
||||
; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
|
||||
; CHECK-NEXT: call void @foo()
|
||||
; CHECK-NEXT: ret void
|
||||
; AKF_CHECK-LABEL: define {{[^@]+}}@kernel1
|
||||
; AKF_CHECK-SAME: () #[[ATTR1:[0-9]+]] {
|
||||
; AKF_CHECK-NEXT: call void @foo()
|
||||
; AKF_CHECK-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@kernel1
|
||||
; ATTRIBUTOR_CHECK-SAME: () #[[ATTR1:[0-9]+]] {
|
||||
; ATTRIBUTOR_CHECK-NEXT: call void @foo() #[[ATTR2:[0-9]+]]
|
||||
; ATTRIBUTOR_CHECK-NEXT: ret void
|
||||
;
|
||||
call void @foo()
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "uniform-work-group-size"="true" }
|
||||
|
||||
;.
|
||||
; CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="false" }
|
||||
; AKF_CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" }
|
||||
; AKF_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="false" }
|
||||
;.
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { nounwind writeonly "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { nounwind writeonly }
|
||||
;.
|
||||
|
||||
157
llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll
Normal file
157
llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll
Normal file
@@ -0,0 +1,157 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
|
||||
; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=CHECK,AKF_CHECK %s
|
||||
; RUN: opt -S -mtriple=amdgcn-unknown-unknown -amdgpu-attributor < %s | FileCheck -check-prefixes=CHECK,ATTRIBUTOR_CHECK %s
|
||||
|
||||
;.
|
||||
; CHECK: @[[G1:[a-zA-Z0-9_$"\\.-]+]] = global i32* null
|
||||
; CHECK: @[[G2:[a-zA-Z0-9_$"\\.-]+]] = global i32 0
|
||||
;.
|
||||
define weak void @weak() {
|
||||
; AKF_CHECK-LABEL: define {{[^@]+}}@weak
|
||||
; AKF_CHECK-SAME: () #[[ATTR0:[0-9]+]] {
|
||||
; AKF_CHECK-NEXT: call void @internal1()
|
||||
; AKF_CHECK-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@weak
|
||||
; ATTRIBUTOR_CHECK-SAME: () #[[ATTR0:[0-9]+]] {
|
||||
; ATTRIBUTOR_CHECK-NEXT: call void @internal1() #[[ATTR5:[0-9]+]]
|
||||
; ATTRIBUTOR_CHECK-NEXT: ret void
|
||||
;
|
||||
call void @internal1()
|
||||
ret void
|
||||
}
|
||||
|
||||
@G1 = global i32* null
|
||||
|
||||
define internal void @internal1() {
|
||||
; AKF_CHECK-LABEL: define {{[^@]+}}@internal1
|
||||
; AKF_CHECK-SAME: () #[[ATTR0]] {
|
||||
; AKF_CHECK-NEXT: [[TMP1:%.*]] = load i32*, i32** @G1, align 8
|
||||
; AKF_CHECK-NEXT: store i32 0, i32* [[TMP1]], align 4
|
||||
; AKF_CHECK-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@internal1
|
||||
; ATTRIBUTOR_CHECK-SAME: () #[[ATTR1:[0-9]+]] {
|
||||
; ATTRIBUTOR_CHECK-NEXT: [[TMP1:%.*]] = load i32*, i32** @G1, align 8
|
||||
; ATTRIBUTOR_CHECK-NEXT: store i32 0, i32* [[TMP1]], align 4
|
||||
; ATTRIBUTOR_CHECK-NEXT: ret void
|
||||
;
|
||||
%1 = load i32*, i32** @G1
|
||||
store i32 0, i32* %1
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @kernel1() #0 {
|
||||
; AKF_CHECK-LABEL: define {{[^@]+}}@kernel1
|
||||
; AKF_CHECK-SAME: () #[[ATTR1:[0-9]+]] {
|
||||
; AKF_CHECK-NEXT: call void @weak()
|
||||
; AKF_CHECK-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@kernel1
|
||||
; ATTRIBUTOR_CHECK-SAME: () #[[ATTR2:[0-9]+]] {
|
||||
; ATTRIBUTOR_CHECK-NEXT: call void @weak()
|
||||
; ATTRIBUTOR_CHECK-NEXT: ret void
|
||||
;
|
||||
call void @weak()
|
||||
ret void
|
||||
}
|
||||
|
||||
@G2 = global i32 0
|
||||
|
||||
define internal void @internal3() {
|
||||
; AKF_CHECK-LABEL: define {{[^@]+}}@internal3
|
||||
; AKF_CHECK-SAME: () #[[ATTR2:[0-9]+]] {
|
||||
; AKF_CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* @G2, align 4
|
||||
; AKF_CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
|
||||
; AKF_CHECK-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
|
||||
; AKF_CHECK: 3:
|
||||
; AKF_CHECK-NEXT: call void @internal4()
|
||||
; AKF_CHECK-NEXT: call void @internal3()
|
||||
; AKF_CHECK-NEXT: br label [[TMP4]]
|
||||
; AKF_CHECK: 4:
|
||||
; AKF_CHECK-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@internal3
|
||||
; ATTRIBUTOR_CHECK-SAME: () #[[ATTR3:[0-9]+]] {
|
||||
; ATTRIBUTOR_CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* @G2, align 4
|
||||
; ATTRIBUTOR_CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
|
||||
; ATTRIBUTOR_CHECK-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]]
|
||||
; ATTRIBUTOR_CHECK: 3:
|
||||
; ATTRIBUTOR_CHECK-NEXT: call void @internal4() #[[ATTR6:[0-9]+]]
|
||||
; ATTRIBUTOR_CHECK-NEXT: call void @internal3() #[[ATTR7:[0-9]+]]
|
||||
; ATTRIBUTOR_CHECK-NEXT: br label [[TMP4]]
|
||||
; ATTRIBUTOR_CHECK: 4:
|
||||
; ATTRIBUTOR_CHECK-NEXT: ret void
|
||||
;
|
||||
%1 = load i32, i32* @G2, align 4
|
||||
%2 = icmp eq i32 %1, 0
|
||||
br i1 %2, label %3, label %4
|
||||
3:
|
||||
call void @internal4()
|
||||
call void @internal3()
|
||||
br label %4
|
||||
4:
|
||||
ret void
|
||||
}
|
||||
|
||||
define internal void @internal4() {
|
||||
; AKF_CHECK-LABEL: define {{[^@]+}}@internal4
|
||||
; AKF_CHECK-SAME: () #[[ATTR2]] {
|
||||
; AKF_CHECK-NEXT: store i32 1, i32* @G2, align 4
|
||||
; AKF_CHECK-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@internal4
|
||||
; ATTRIBUTOR_CHECK-SAME: () #[[ATTR4:[0-9]+]] {
|
||||
; ATTRIBUTOR_CHECK-NEXT: store i32 1, i32* @G2, align 4
|
||||
; ATTRIBUTOR_CHECK-NEXT: ret void
|
||||
;
|
||||
store i32 1, i32* @G2, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define internal void @internal2() {
|
||||
; AKF_CHECK-LABEL: define {{[^@]+}}@internal2
|
||||
; AKF_CHECK-SAME: () #[[ATTR2]] {
|
||||
; AKF_CHECK-NEXT: call void @internal3()
|
||||
; AKF_CHECK-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@internal2
|
||||
; ATTRIBUTOR_CHECK-SAME: () #[[ATTR3]] {
|
||||
; ATTRIBUTOR_CHECK-NEXT: call void @internal3() #[[ATTR7]]
|
||||
; ATTRIBUTOR_CHECK-NEXT: ret void
|
||||
;
|
||||
call void @internal3()
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @kernel2() #0 {
|
||||
; AKF_CHECK-LABEL: define {{[^@]+}}@kernel2
|
||||
; AKF_CHECK-SAME: () #[[ATTR1]] {
|
||||
; AKF_CHECK-NEXT: call void @internal2()
|
||||
; AKF_CHECK-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@kernel2
|
||||
; ATTRIBUTOR_CHECK-SAME: () #[[ATTR2]] {
|
||||
; ATTRIBUTOR_CHECK-NEXT: call void @internal2() #[[ATTR5]]
|
||||
; ATTRIBUTOR_CHECK-NEXT: ret void
|
||||
;
|
||||
call void @internal2()
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "uniform-work-group-size"="true" }
|
||||
|
||||
;.
|
||||
; AKF_CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" }
|
||||
; AKF_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="true" }
|
||||
; AKF_CHECK: attributes #[[ATTR2]] = { "uniform-work-group-size"="true" }
|
||||
;.
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { "amdgpu-calls" "uniform-work-group-size"="true" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR3]] = { nofree nosync nounwind "uniform-work-group-size"="true" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR4]] = { nofree nosync nounwind willreturn writeonly "uniform-work-group-size"="true" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR5]] = { nounwind }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR6]] = { nofree nosync nounwind willreturn writeonly }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR7]] = { nofree nosync nounwind }
|
||||
;.
|
||||
@@ -1,39 +1,68 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
|
||||
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s
|
||||
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=AKF_CHECK %s
|
||||
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-attributor < %s | FileCheck -check-prefixes=ATTRIBUTOR_CHECK %s
|
||||
|
||||
; Test to verify if the attribute gets propagated across nested function calls
|
||||
|
||||
; Added to prevent Attributor from deleting calls.
|
||||
@x = global i32 0
|
||||
|
||||
;.
|
||||
; AKF_CHECK: @[[X:[a-zA-Z0-9_$"\\.-]+]] = global i32 0
|
||||
;.
|
||||
; ATTRIBUTOR_CHECK: @[[X:[a-zA-Z0-9_$"\\.-]+]] = global i32 0
|
||||
;.
|
||||
define void @func1() #0 {
|
||||
; CHECK-LABEL: define {{[^@]+}}@func1
|
||||
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
|
||||
; CHECK-NEXT: ret void
|
||||
; AKF_CHECK-LABEL: define {{[^@]+}}@func1
|
||||
; AKF_CHECK-SAME: () #[[ATTR0:[0-9]+]] {
|
||||
; AKF_CHECK-NEXT: store i32 0, i32* @x, align 4
|
||||
; AKF_CHECK-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@func1
|
||||
; ATTRIBUTOR_CHECK-SAME: () #[[ATTR0:[0-9]+]] {
|
||||
; ATTRIBUTOR_CHECK-NEXT: store i32 0, i32* @x, align 4
|
||||
; ATTRIBUTOR_CHECK-NEXT: ret void
|
||||
;
|
||||
store i32 0, i32* @x
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @func2() #1 {
|
||||
; CHECK-LABEL: define {{[^@]+}}@func2
|
||||
; CHECK-SAME: () #[[ATTR0]] {
|
||||
; CHECK-NEXT: call void @func1()
|
||||
; CHECK-NEXT: ret void
|
||||
; AKF_CHECK-LABEL: define {{[^@]+}}@func2
|
||||
; AKF_CHECK-SAME: () #[[ATTR0]] {
|
||||
; AKF_CHECK-NEXT: call void @func1()
|
||||
; AKF_CHECK-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@func2
|
||||
; ATTRIBUTOR_CHECK-SAME: () #[[ATTR0]] {
|
||||
; ATTRIBUTOR_CHECK-NEXT: call void @func1() #[[ATTR2:[0-9]+]]
|
||||
; ATTRIBUTOR_CHECK-NEXT: ret void
|
||||
;
|
||||
call void @func1()
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @kernel3() #2 {
|
||||
; CHECK-LABEL: define {{[^@]+}}@kernel3
|
||||
; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
|
||||
; CHECK-NEXT: call void @func2()
|
||||
; CHECK-NEXT: ret void
|
||||
; AKF_CHECK-LABEL: define {{[^@]+}}@kernel3
|
||||
; AKF_CHECK-SAME: () #[[ATTR1:[0-9]+]] {
|
||||
; AKF_CHECK-NEXT: call void @func2()
|
||||
; AKF_CHECK-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@kernel3
|
||||
; ATTRIBUTOR_CHECK-SAME: () #[[ATTR1:[0-9]+]] {
|
||||
; ATTRIBUTOR_CHECK-NEXT: call void @func2() #[[ATTR2]]
|
||||
; ATTRIBUTOR_CHECK-NEXT: ret void
|
||||
;
|
||||
call void @func2()
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #2 = { "uniform-work-group-size"="true" }
|
||||
|
||||
;.
|
||||
; CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="true" }
|
||||
; CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="true" }
|
||||
; AKF_CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="true" }
|
||||
; AKF_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="true" }
|
||||
;.
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { nounwind writeonly "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="true" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { nounwind writeonly }
|
||||
;.
|
||||
|
||||
@@ -1,31 +1,51 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
|
||||
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s
|
||||
|
||||
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefixes=CHECK,AKF_CHECK %s
|
||||
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-attributor < %s | FileCheck -check-prefixes=CHECK,ATTRIBUTOR_CHECK %s
|
||||
|
||||
; Function added to prevent attributor from deleting call sites.
|
||||
|
||||
; Two kernels with different values of the uniform-work-group-attribute call the same function
|
||||
@x = global i32 0
|
||||
|
||||
;.
|
||||
; CHECK: @[[X:[a-zA-Z0-9_$"\\.-]+]] = global i32 0
|
||||
;.
|
||||
define void @func() #0 {
|
||||
; CHECK-LABEL: define {{[^@]+}}@func
|
||||
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
|
||||
; CHECK-NEXT: store i32 0, i32* @x, align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
store i32 0, i32* @x
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @kernel1() #1 {
|
||||
; CHECK-LABEL: define {{[^@]+}}@kernel1
|
||||
; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
|
||||
; CHECK-NEXT: call void @func()
|
||||
; CHECK-NEXT: ret void
|
||||
; AKF_CHECK-LABEL: define {{[^@]+}}@kernel1
|
||||
; AKF_CHECK-SAME: () #[[ATTR1:[0-9]+]] {
|
||||
; AKF_CHECK-NEXT: call void @func()
|
||||
; AKF_CHECK-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@kernel1
|
||||
; ATTRIBUTOR_CHECK-SAME: () #[[ATTR1:[0-9]+]] {
|
||||
; ATTRIBUTOR_CHECK-NEXT: call void @func() #[[ATTR3:[0-9]+]]
|
||||
; ATTRIBUTOR_CHECK-NEXT: ret void
|
||||
;
|
||||
call void @func()
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @kernel2() #2 {
|
||||
; CHECK-LABEL: define {{[^@]+}}@kernel2
|
||||
; CHECK-SAME: () #[[ATTR2:[0-9]+]] {
|
||||
; CHECK-NEXT: call void @func()
|
||||
; CHECK-NEXT: ret void
|
||||
; AKF_CHECK-LABEL: define {{[^@]+}}@kernel2
|
||||
; AKF_CHECK-SAME: () #[[ATTR2:[0-9]+]] {
|
||||
; AKF_CHECK-NEXT: call void @func()
|
||||
; AKF_CHECK-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@kernel2
|
||||
; ATTRIBUTOR_CHECK-SAME: () #[[ATTR2:[0-9]+]] {
|
||||
; ATTRIBUTOR_CHECK-NEXT: call void @func() #[[ATTR3]]
|
||||
; ATTRIBUTOR_CHECK-NEXT: ret void
|
||||
;
|
||||
call void @func()
|
||||
ret void
|
||||
@@ -34,7 +54,12 @@ define amdgpu_kernel void @kernel2() #2 {
|
||||
attributes #1 = { "uniform-work-group-size"="true" }
|
||||
|
||||
;.
|
||||
; CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="true" }
|
||||
; CHECK: attributes #[[ATTR2]] = { "amdgpu-calls" "uniform-work-group-size"="false" }
|
||||
; AKF_CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" }
|
||||
; AKF_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="true" }
|
||||
; AKF_CHECK: attributes #[[ATTR2]] = { "amdgpu-calls" "uniform-work-group-size"="false" }
|
||||
;.
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { nounwind writeonly "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="true" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { "amdgpu-calls" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR3]] = { nounwind writeonly }
|
||||
;.
|
||||
|
||||
@@ -1,20 +1,33 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
|
||||
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s
|
||||
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck -check-prefixes=CHECK,AKF_CHECK %s
|
||||
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-attributor %s | FileCheck -check-prefixes=CHECK,ATTRIBUTOR_CHECK %s
|
||||
|
||||
@x = global i32 0
|
||||
|
||||
; Propagate the uniform-work-group-attribute from the kernel to callee if it doesn't have it
|
||||
;.
|
||||
; CHECK: @[[X:[a-zA-Z0-9_$"\\.-]+]] = global i32 0
|
||||
;.
|
||||
define void @func() #0 {
|
||||
; CHECK-LABEL: define {{[^@]+}}@func
|
||||
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
|
||||
; CHECK-NEXT: store i32 0, i32* @x, align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
store i32 0, i32* @x
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @kernel1() #1 {
|
||||
; CHECK-LABEL: define {{[^@]+}}@kernel1
|
||||
; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
|
||||
; CHECK-NEXT: call void @func()
|
||||
; CHECK-NEXT: ret void
|
||||
; AKF_CHECK-LABEL: define {{[^@]+}}@kernel1
|
||||
; AKF_CHECK-SAME: () #[[ATTR1:[0-9]+]] {
|
||||
; AKF_CHECK-NEXT: call void @func()
|
||||
; AKF_CHECK-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@kernel1
|
||||
; ATTRIBUTOR_CHECK-SAME: () #[[ATTR1:[0-9]+]] {
|
||||
; ATTRIBUTOR_CHECK-NEXT: call void @func() #[[ATTR4:[0-9]+]]
|
||||
; ATTRIBUTOR_CHECK-NEXT: ret void
|
||||
;
|
||||
call void @func()
|
||||
ret void
|
||||
@@ -22,18 +35,30 @@ define amdgpu_kernel void @kernel1() #1 {
|
||||
|
||||
; External declaration of a function
|
||||
define weak_odr void @weak_func() #0 {
|
||||
; CHECK-LABEL: define {{[^@]+}}@weak_func
|
||||
; CHECK-SAME: () #[[ATTR0]] {
|
||||
; CHECK-NEXT: ret void
|
||||
; AKF_CHECK-LABEL: define {{[^@]+}}@weak_func
|
||||
; AKF_CHECK-SAME: () #[[ATTR0]] {
|
||||
; AKF_CHECK-NEXT: store i32 0, i32* @x, align 4
|
||||
; AKF_CHECK-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@weak_func
|
||||
; ATTRIBUTOR_CHECK-SAME: () #[[ATTR2:[0-9]+]] {
|
||||
; ATTRIBUTOR_CHECK-NEXT: store i32 0, i32* @x, align 4
|
||||
; ATTRIBUTOR_CHECK-NEXT: ret void
|
||||
;
|
||||
store i32 0, i32* @x
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @kernel2() #2 {
|
||||
; CHECK-LABEL: define {{[^@]+}}@kernel2
|
||||
; CHECK-SAME: () #[[ATTR2:[0-9]+]] {
|
||||
; CHECK-NEXT: call void @weak_func()
|
||||
; CHECK-NEXT: ret void
|
||||
; AKF_CHECK-LABEL: define {{[^@]+}}@kernel2
|
||||
; AKF_CHECK-SAME: () #[[ATTR2:[0-9]+]] {
|
||||
; AKF_CHECK-NEXT: call void @weak_func()
|
||||
; AKF_CHECK-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@kernel2
|
||||
; ATTRIBUTOR_CHECK-SAME: () #[[ATTR3:[0-9]+]] {
|
||||
; ATTRIBUTOR_CHECK-NEXT: call void @weak_func() #[[ATTR5:[0-9]+]]
|
||||
; ATTRIBUTOR_CHECK-NEXT: ret void
|
||||
;
|
||||
call void @weak_func()
|
||||
ret void
|
||||
@@ -42,9 +67,15 @@ define amdgpu_kernel void @kernel2() #2 {
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { "uniform-work-group-size"="false" }
|
||||
attributes #2 = { "uniform-work-group-size"="true" }
|
||||
|
||||
;.
|
||||
; CHECK: attributes #[[ATTR0]] = { nounwind "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR2]] = { "amdgpu-calls" "uniform-work-group-size"="true" }
|
||||
; AKF_CHECK: attributes #[[ATTR0]] = { nounwind "uniform-work-group-size"="false" }
|
||||
; AKF_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="false" }
|
||||
; AKF_CHECK: attributes #[[ATTR2]] = { "amdgpu-calls" "uniform-work-group-size"="true" }
|
||||
;.
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { nounwind writeonly "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { nounwind "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR3]] = { "amdgpu-calls" "uniform-work-group-size"="true" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR4]] = { nounwind writeonly }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR5]] = { nounwind }
|
||||
;.
|
||||
|
||||
@@ -1,26 +1,44 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
|
||||
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck %s
|
||||
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-annotate-kernel-features %s | FileCheck --allow-unused-prefixes -check-prefixes=CHECK,AKF_CHECK %s
|
||||
; RUN: opt -S -mtriple=amdgcn-amd- -amdgpu-attributor %s | FileCheck --allow-unused-prefixes -check-prefixes=CHECK,ATTRIBUTOR_CHECK %s
|
||||
|
||||
; Test to ensure recursive functions exhibit proper behaviour
|
||||
; Test to generate fibonacci numbers
|
||||
|
||||
define i32 @fib(i32 %n) #0 {
|
||||
; CHECK-LABEL: define {{[^@]+}}@fib
|
||||
; CHECK-SAME: (i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
|
||||
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[N]], 0
|
||||
; CHECK-NEXT: br i1 [[CMP1]], label [[EXIT:%.*]], label [[CONT1:%.*]]
|
||||
; CHECK: cont1:
|
||||
; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[N]], 1
|
||||
; CHECK-NEXT: br i1 [[CMP2]], label [[EXIT]], label [[CONT2:%.*]]
|
||||
; CHECK: cont2:
|
||||
; CHECK-NEXT: [[NM1:%.*]] = sub i32 [[N]], 1
|
||||
; CHECK-NEXT: [[FIBM1:%.*]] = call i32 @fib(i32 [[NM1]])
|
||||
; CHECK-NEXT: [[NM2:%.*]] = sub i32 [[N]], 2
|
||||
; CHECK-NEXT: [[FIBM2:%.*]] = call i32 @fib(i32 [[NM2]])
|
||||
; CHECK-NEXT: [[RETVAL:%.*]] = add i32 [[FIBM1]], [[FIBM2]]
|
||||
; CHECK-NEXT: ret i32 [[RETVAL]]
|
||||
; CHECK: exit:
|
||||
; CHECK-NEXT: ret i32 1
|
||||
; AKF_CHECK-LABEL: define {{[^@]+}}@fib
|
||||
; AKF_CHECK-SAME: (i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
|
||||
; AKF_CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[N]], 0
|
||||
; AKF_CHECK-NEXT: br i1 [[CMP1]], label [[EXIT:%.*]], label [[CONT1:%.*]]
|
||||
; AKF_CHECK: cont1:
|
||||
; AKF_CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[N]], 1
|
||||
; AKF_CHECK-NEXT: br i1 [[CMP2]], label [[EXIT]], label [[CONT2:%.*]]
|
||||
; AKF_CHECK: cont2:
|
||||
; AKF_CHECK-NEXT: [[NM1:%.*]] = sub i32 [[N]], 1
|
||||
; AKF_CHECK-NEXT: [[FIBM1:%.*]] = call i32 @fib(i32 [[NM1]])
|
||||
; AKF_CHECK-NEXT: [[NM2:%.*]] = sub i32 [[N]], 2
|
||||
; AKF_CHECK-NEXT: [[FIBM2:%.*]] = call i32 @fib(i32 [[NM2]])
|
||||
; AKF_CHECK-NEXT: [[RETVAL:%.*]] = add i32 [[FIBM1]], [[FIBM2]]
|
||||
; AKF_CHECK-NEXT: ret i32 [[RETVAL]]
|
||||
; AKF_CHECK: exit:
|
||||
; AKF_CHECK-NEXT: ret i32 1
|
||||
;
|
||||
; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@fib
|
||||
; ATTRIBUTOR_CHECK-SAME: (i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
|
||||
; ATTRIBUTOR_CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[N]], 0
|
||||
; ATTRIBUTOR_CHECK-NEXT: br i1 [[CMP1]], label [[EXIT:%.*]], label [[CONT1:%.*]]
|
||||
; ATTRIBUTOR_CHECK: cont1:
|
||||
; ATTRIBUTOR_CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[N]], 1
|
||||
; ATTRIBUTOR_CHECK-NEXT: br i1 [[CMP2]], label [[EXIT]], label [[CONT2:%.*]]
|
||||
; ATTRIBUTOR_CHECK: cont2:
|
||||
; ATTRIBUTOR_CHECK-NEXT: [[NM1:%.*]] = sub i32 [[N]], 1
|
||||
; ATTRIBUTOR_CHECK-NEXT: [[FIBM1:%.*]] = call i32 @fib(i32 [[NM1]]) #[[ATTR3:[0-9]+]]
|
||||
; ATTRIBUTOR_CHECK-NEXT: [[NM2:%.*]] = sub i32 [[N]], 2
|
||||
; ATTRIBUTOR_CHECK-NEXT: [[FIBM2:%.*]] = call i32 @fib(i32 [[NM2]]) #[[ATTR3]]
|
||||
; ATTRIBUTOR_CHECK-NEXT: [[RETVAL:%.*]] = add i32 [[FIBM1]], [[FIBM2]]
|
||||
; ATTRIBUTOR_CHECK-NEXT: ret i32 [[RETVAL]]
|
||||
; ATTRIBUTOR_CHECK: exit:
|
||||
; ATTRIBUTOR_CHECK-NEXT: ret i32 1
|
||||
;
|
||||
%cmp1 = icmp eq i32 %n, 0
|
||||
br i1 %cmp1, label %exit, label %cont1
|
||||
@@ -42,21 +60,97 @@ exit:
|
||||
ret i32 1
|
||||
}
|
||||
|
||||
define internal i32 @fib_internal(i32 %n) #0 {
|
||||
; AKF_CHECK-LABEL: define {{[^@]+}}@fib_internal
|
||||
; AKF_CHECK-SAME: (i32 [[N:%.*]]) #[[ATTR0]] {
|
||||
; AKF_CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[N]], 0
|
||||
; AKF_CHECK-NEXT: br i1 [[CMP1]], label [[EXIT:%.*]], label [[CONT1:%.*]]
|
||||
; AKF_CHECK: cont1:
|
||||
; AKF_CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[N]], 1
|
||||
; AKF_CHECK-NEXT: br i1 [[CMP2]], label [[EXIT]], label [[CONT2:%.*]]
|
||||
; AKF_CHECK: cont2:
|
||||
; AKF_CHECK-NEXT: [[NM1:%.*]] = sub i32 [[N]], 1
|
||||
; AKF_CHECK-NEXT: [[FIBM1:%.*]] = call i32 @fib_internal(i32 [[NM1]])
|
||||
; AKF_CHECK-NEXT: [[NM2:%.*]] = sub i32 [[N]], 2
|
||||
; AKF_CHECK-NEXT: [[FIBM2:%.*]] = call i32 @fib_internal(i32 [[NM2]])
|
||||
; AKF_CHECK-NEXT: [[RETVAL:%.*]] = add i32 [[FIBM1]], [[FIBM2]]
|
||||
; AKF_CHECK-NEXT: ret i32 [[RETVAL]]
|
||||
; AKF_CHECK: exit:
|
||||
; AKF_CHECK-NEXT: ret i32 1
|
||||
;
|
||||
; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@fib_internal
|
||||
; ATTRIBUTOR_CHECK-SAME: (i32 [[N:%.*]]) #[[ATTR1:[0-9]+]] {
|
||||
; ATTRIBUTOR_CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[N]], 0
|
||||
; ATTRIBUTOR_CHECK-NEXT: br i1 [[CMP1]], label [[EXIT:%.*]], label [[CONT1:%.*]]
|
||||
; ATTRIBUTOR_CHECK: cont1:
|
||||
; ATTRIBUTOR_CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[N]], 1
|
||||
; ATTRIBUTOR_CHECK-NEXT: br i1 [[CMP2]], label [[EXIT]], label [[CONT2:%.*]]
|
||||
; ATTRIBUTOR_CHECK: cont2:
|
||||
; ATTRIBUTOR_CHECK-NEXT: [[NM1:%.*]] = sub i32 [[N]], 1
|
||||
; ATTRIBUTOR_CHECK-NEXT: [[FIBM1:%.*]] = call i32 @fib_internal(i32 [[NM1]]) #[[ATTR4:[0-9]+]]
|
||||
; ATTRIBUTOR_CHECK-NEXT: [[NM2:%.*]] = sub i32 [[N]], 2
|
||||
; ATTRIBUTOR_CHECK-NEXT: [[FIBM2:%.*]] = call i32 @fib_internal(i32 [[NM2]]) #[[ATTR4]]
|
||||
; ATTRIBUTOR_CHECK-NEXT: [[RETVAL:%.*]] = add i32 [[FIBM1]], [[FIBM2]]
|
||||
; ATTRIBUTOR_CHECK-NEXT: ret i32 [[RETVAL]]
|
||||
; ATTRIBUTOR_CHECK: exit:
|
||||
; ATTRIBUTOR_CHECK-NEXT: ret i32 1
|
||||
;
|
||||
%cmp1 = icmp eq i32 %n, 0
|
||||
br i1 %cmp1, label %exit, label %cont1
|
||||
|
||||
cont1:
|
||||
%cmp2 = icmp eq i32 %n, 1
|
||||
br i1 %cmp2, label %exit, label %cont2
|
||||
|
||||
cont2:
|
||||
%nm1 = sub i32 %n, 1
|
||||
%fibm1 = call i32 @fib_internal(i32 %nm1)
|
||||
%nm2 = sub i32 %n, 2
|
||||
%fibm2 = call i32 @fib_internal(i32 %nm2)
|
||||
%retval = add i32 %fibm1, %fibm2
|
||||
|
||||
ret i32 %retval
|
||||
|
||||
exit:
|
||||
ret i32 1
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @kernel(i32 addrspace(1)* %m) #1 {
|
||||
; CHECK-LABEL: define {{[^@]+}}@kernel
|
||||
; CHECK-SAME: (i32 addrspace(1)* [[M:%.*]]) #[[ATTR1:[0-9]+]] {
|
||||
; CHECK-NEXT: [[R:%.*]] = call i32 @fib(i32 5)
|
||||
; CHECK-NEXT: store i32 [[R]], i32 addrspace(1)* [[M]], align 4
|
||||
; CHECK-NEXT: ret void
|
||||
; AKF_CHECK-LABEL: define {{[^@]+}}@kernel
|
||||
; AKF_CHECK-SAME: (i32 addrspace(1)* [[M:%.*]]) #[[ATTR1:[0-9]+]] {
|
||||
; AKF_CHECK-NEXT: [[R:%.*]] = call i32 @fib(i32 5)
|
||||
; AKF_CHECK-NEXT: [[R2:%.*]] = call i32 @fib_internal(i32 5)
|
||||
; AKF_CHECK-NEXT: store i32 [[R]], i32 addrspace(1)* [[M]], align 4
|
||||
; AKF_CHECK-NEXT: store i32 [[R2]], i32 addrspace(1)* [[M]], align 4
|
||||
; AKF_CHECK-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@kernel
|
||||
; ATTRIBUTOR_CHECK-SAME: (i32 addrspace(1)* [[M:%.*]]) #[[ATTR2:[0-9]+]] {
|
||||
; ATTRIBUTOR_CHECK-NEXT: [[R:%.*]] = call i32 @fib(i32 5) #[[ATTR3]]
|
||||
; ATTRIBUTOR_CHECK-NEXT: [[R2:%.*]] = call i32 @fib_internal(i32 noundef 5) #[[ATTR3]]
|
||||
; ATTRIBUTOR_CHECK-NEXT: store i32 [[R]], i32 addrspace(1)* [[M]], align 4
|
||||
; ATTRIBUTOR_CHECK-NEXT: store i32 [[R2]], i32 addrspace(1)* [[M]], align 4
|
||||
; ATTRIBUTOR_CHECK-NEXT: ret void
|
||||
;
|
||||
%r = call i32 @fib(i32 5)
|
||||
%r2 = call i32 @fib_internal(i32 5)
|
||||
|
||||
store i32 %r, i32 addrspace(1)* %m
|
||||
store i32 %r2, i32 addrspace(1)* %m
|
||||
ret void
|
||||
}
|
||||
|
||||
; nounwind and readnone are added to match attributor results.
|
||||
attributes #0 = { nounwind readnone }
|
||||
attributes #1 = { "uniform-work-group-size"="true" }
|
||||
|
||||
;.
|
||||
; CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="true" }
|
||||
; CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="true" }
|
||||
; AKF_CHECK: attributes #[[ATTR0]] = { nounwind readnone "uniform-work-group-size"="true" }
|
||||
; AKF_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="true" }
|
||||
;.
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { nounwind readnone "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { nofree nosync nounwind readnone "uniform-work-group-size"="true" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { "amdgpu-calls" "uniform-work-group-size"="true" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR3]] = { nounwind readnone }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR4]] = { nofree nounwind readnone }
|
||||
;.
|
||||
|
||||
@@ -1,28 +1,43 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals
|
||||
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features %s | FileCheck %s
|
||||
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-annotate-kernel-features %s | FileCheck -allow-unused-prefixes -check-prefixes=CHECK,AKF_CHECK %s
|
||||
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-attributor %s | FileCheck -allow-unused-prefixes -check-prefixes=CHECK,ATTRIBUTOR_CHECK %s
|
||||
|
||||
@x = global i32 0
|
||||
;.
|
||||
; CHECK: @[[X:[a-zA-Z0-9_$"\\.-]+]] = global i32 0
|
||||
;.
|
||||
define void @func1() {
|
||||
; CHECK-LABEL: define {{[^@]+}}@func1
|
||||
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
|
||||
; CHECK-NEXT: store i32 0, i32* @x, align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
store i32 0, i32* @x
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @func4() {
|
||||
; CHECK-LABEL: define {{[^@]+}}@func4
|
||||
; CHECK-SAME: () #[[ATTR0]] {
|
||||
; CHECK-NEXT: store i32 0, i32* @x, align 4
|
||||
; CHECK-NEXT: ret void
|
||||
;
|
||||
store i32 0, i32* @x
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @func2() #0 {
|
||||
; CHECK-LABEL: define {{[^@]+}}@func2
|
||||
; CHECK-SAME: () #[[ATTR0]] {
|
||||
; CHECK-NEXT: call void @func4()
|
||||
; CHECK-NEXT: call void @func1()
|
||||
; CHECK-NEXT: ret void
|
||||
; AKF_CHECK-LABEL: define {{[^@]+}}@func2
|
||||
; AKF_CHECK-SAME: () #[[ATTR0]] {
|
||||
; AKF_CHECK-NEXT: call void @func4()
|
||||
; AKF_CHECK-NEXT: call void @func1()
|
||||
; AKF_CHECK-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@func2
|
||||
; ATTRIBUTOR_CHECK-SAME: () #[[ATTR0]] {
|
||||
; ATTRIBUTOR_CHECK-NEXT: call void @func4() #[[ATTR2:[0-9]+]]
|
||||
; ATTRIBUTOR_CHECK-NEXT: call void @func1() #[[ATTR2]]
|
||||
; ATTRIBUTOR_CHECK-NEXT: ret void
|
||||
;
|
||||
call void @func4()
|
||||
call void @func1()
|
||||
@@ -30,21 +45,32 @@ define void @func2() #0 {
|
||||
}
|
||||
|
||||
define void @func3() {
|
||||
; CHECK-LABEL: define {{[^@]+}}@func3
|
||||
; CHECK-SAME: () #[[ATTR0]] {
|
||||
; CHECK-NEXT: call void @func1()
|
||||
; CHECK-NEXT: ret void
|
||||
; AKF_CHECK-LABEL: define {{[^@]+}}@func3
|
||||
; AKF_CHECK-SAME: () #[[ATTR0]] {
|
||||
; AKF_CHECK-NEXT: call void @func1()
|
||||
; AKF_CHECK-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@func3
|
||||
; ATTRIBUTOR_CHECK-SAME: () #[[ATTR0]] {
|
||||
; ATTRIBUTOR_CHECK-NEXT: call void @func1() #[[ATTR2]]
|
||||
; ATTRIBUTOR_CHECK-NEXT: ret void
|
||||
;
|
||||
call void @func1()
|
||||
ret void
|
||||
}
|
||||
|
||||
define amdgpu_kernel void @kernel3() #0 {
|
||||
; CHECK-LABEL: define {{[^@]+}}@kernel3
|
||||
; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
|
||||
; CHECK-NEXT: call void @func2()
|
||||
; CHECK-NEXT: call void @func3()
|
||||
; CHECK-NEXT: ret void
|
||||
; AKF_CHECK-LABEL: define {{[^@]+}}@kernel3
|
||||
; AKF_CHECK-SAME: () #[[ATTR1:[0-9]+]] {
|
||||
; AKF_CHECK-NEXT: call void @func2()
|
||||
; AKF_CHECK-NEXT: call void @func3()
|
||||
; AKF_CHECK-NEXT: ret void
|
||||
;
|
||||
; ATTRIBUTOR_CHECK-LABEL: define {{[^@]+}}@kernel3
|
||||
; ATTRIBUTOR_CHECK-SAME: () #[[ATTR1:[0-9]+]] {
|
||||
; ATTRIBUTOR_CHECK-NEXT: call void @func2() #[[ATTR2]]
|
||||
; ATTRIBUTOR_CHECK-NEXT: call void @func3() #[[ATTR2]]
|
||||
; ATTRIBUTOR_CHECK-NEXT: ret void
|
||||
;
|
||||
call void @func2()
|
||||
call void @func3()
|
||||
@@ -52,8 +78,11 @@ define amdgpu_kernel void @kernel3() #0 {
|
||||
}
|
||||
|
||||
attributes #0 = { "uniform-work-group-size"="false" }
|
||||
|
||||
;.
|
||||
; CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" }
|
||||
; CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="false" }
|
||||
; AKF_CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" }
|
||||
; AKF_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="false" }
|
||||
;.
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR0]] = { nounwind writeonly "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR1]] = { "amdgpu-calls" "uniform-work-group-size"="false" }
|
||||
; ATTRIBUTOR_CHECK: attributes #[[ATTR2]] = { nounwind writeonly }
|
||||
;.
|
||||
|
||||
Reference in New Issue
Block a user