Files
intel-graphics-compiler/IGC/Compiler/CISACodeGen/Emu64OpsPass.cpp
Wesierski, Lukasz 17c1de9abc Remove igc metrics
Remove igc metrics
2025-09-22 20:11:41 +02:00

2161 lines
71 KiB
C++

/*========================== begin_copyright_notice ============================
Copyright (C) 2017-2024 Intel Corporation
SPDX-License-Identifier: MIT
============================= end_copyright_notice ===========================*/
#include "common/LLVMWarningsPush.hpp"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include "llvmWrapper/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/Pass.h"
#include "llvm/Analysis/TargetFolder.h"
#include "llvmWrapper/IR/DerivedTypes.h"
#include "llvmWrapper/IR/Instructions.h"
#include "llvmWrapper/IR/Intrinsics.h"
#include "llvmWrapper/Support/Alignment.h"
#include "common/LLVMWarningsPop.hpp"
#include "common/LLVMUtils.h"
#include "common/IGCIRBuilder.h"
#include "GenISAIntrinsics/GenIntrinsics.h"
#include "Compiler/CISACodeGen/ShaderCodeGen.hpp"
#include "Compiler/IGCPassSupport.h"
#include "Compiler/MetaDataUtilsWrapper.h"
#include "Compiler/CISACodeGen/Emu64OpsPass.h"
#include "Probe/Assertion.h"
using namespace llvm;
using namespace IGC;
using namespace IGC::IGCMD;
using std::ldexp;
namespace {
typedef llvm::IGCIRBuilder<TargetFolder> BuilderType;
typedef std::pair<Value *, Value *> ValuePair;
class InstExpander;
class Preprocessor;
class Emu64Ops : public FunctionPass {
friend class InstExpander;
friend class Preprocessor;
const DataLayout *DL;
IGC::CodeGenContext *CGC;
BuilderType *IRB;
InstExpander *Expander;
LLVMContext *TheContext;
Module *TheModule;
Function *TheFunction;
typedef DenseMap<Value *, ValuePair> ValueMapTy;
ValueMapTy ValueMap;
// Special bitcasts of 64-bit arguments, which need special handling as we
// cannot replace argument type.
SmallPtrSet<BitCastInst *, 8> Arg64Casts;
SmallPtrSet<Instruction *, 32> DeadInsts;
SmallPtrSet<Instruction *, 16> Int64Insts;
bool doNotDeleteInstr = false;
public:
static char ID;
Emu64Ops()
: FunctionPass(ID), DL(nullptr), CGC(nullptr), IRB(nullptr), Expander(nullptr), TheContext(nullptr),
TheModule(nullptr), TheFunction(nullptr) {
initializeEmu64OpsPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override;
StringRef getPassName() const override { return "Emu64Ops"; }
private:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<CodeGenContextWrapper>();
AU.addRequired<MetaDataUtilsWrapper>();
}
LLVMContext *getContext() const { return TheContext; }
Module *getModule() const { return TheModule; }
Function *getFunction() const { return TheFunction; }
bool hasPtr64() const {
return (DL->getPointerSizeInBits() == 64 || DL->getPointerSizeInBits(ADDRESS_SPACE_GLOBAL) == 64 ||
DL->getPointerSizeInBits(ADDRESS_SPACE_CONSTANT) == 64);
}
bool isPtr64(const PointerType *PtrTy) const {
return CGC->getRegisterPointerSizeInBits(PtrTy->getAddressSpace()) == 64;
}
bool isInt64(const Type *Ty) const { return Ty->isIntegerTy(64); }
bool isInt64(const Value *V) const { return isInt64(V->getType()); }
bool isArg64Cast(BitCastInst *BC) const { return Arg64Casts.count(BC) != 0; }
Type *getV2Int32Ty(unsigned NumElts = 1) const {
return IGCLLVM::FixedVectorType::get(IRB->getInt32Ty(), NumElts * 2);
}
ValuePair getExpandedValues(Value *V);
void setExpandedValues(Value *V, Value *Lo, Value *Hi);
alignment_t getAlignment(LoadInst *LD) const {
auto Align = IGCLLVM::getAlignmentValue(LD);
if (Align == 0)
Align = DL->getABITypeAlign(LD->getType()).value();
return Align;
}
void copyKnownMetadata(Instruction *NewI, Instruction *OldI) const {
unsigned LscCacheCtrlID = OldI->getContext().getMDKindID("lsc.cache.ctrl");
SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
OldI->getAllMetadata(MD);
for (const auto &MDPair : MD) {
unsigned ID = MDPair.first;
MDNode *N = MDPair.second;
if (ID == LscCacheCtrlID)
NewI->setMetadata(ID, N);
}
}
void dupMemoryAttribute(LoadInst *NewLD, LoadInst *RefLD, unsigned Off) const {
auto alignment = getAlignment(RefLD);
NewLD->setVolatile(RefLD->isVolatile());
NewLD->setAlignment(IGCLLVM::getAlign(MinAlign(alignment, Off)));
NewLD->setOrdering(RefLD->getOrdering());
NewLD->setSyncScopeID(RefLD->getSyncScopeID());
copyKnownMetadata(NewLD, RefLD);
}
void dupMemoryAttribute(StoreInst *NewST, StoreInst *RefST, unsigned Off) const {
auto alignment = IGCLLVM::getAlignmentValue(RefST);
NewST->setVolatile(RefST->isVolatile());
NewST->setAlignment(IGCLLVM::getAlign(MinAlign(alignment, Off)));
NewST->setOrdering(RefST->getOrdering());
NewST->setSyncScopeID(RefST->getSyncScopeID());
copyKnownMetadata(NewST, RefST);
}
bool expandArguments(Function &F);
bool preparePHIs(Function &F);
bool expandInsts(Function &F);
bool populatePHIs(Function &F);
bool removeDeadInsts();
bool hasOpcodeInt64Support(unsigned int opcode) const;
void addInstToInt64InstrList(Instruction *inst) { Int64Insts.insert(inst); }
bool eraseInstrFromInt64InstrList(Instruction *inst) { return Int64Insts.erase(inst); }
};
class InstExpander : public InstVisitor<InstExpander, bool> {
friend class InstVisitor<InstExpander, bool>;
Emu64Ops *Emu;
BuilderType *IRB;
public:
InstExpander(Emu64Ops *E, BuilderType *B) : Emu(E), IRB(B) {}
bool expand(Instruction *I);
private:
bool visitInstruction(Instruction &);
bool visitRet(ReturnInst &);
bool visitBr(BranchInst &) { return false; }
bool visitSwitch(SwitchInst &) { return false; }
bool visitIndirectBr(IndirectBrInst &) { return false; }
bool visitInvoke(InvokeInst &) { return false; }
bool visitResume(ResumeInst &) { return false; }
bool visitUnreachable(UnreachableInst &) { return false; }
bool visitFreeze(FreezeInst &);
bool visitAdd(BinaryOperator &);
bool visitFAdd(BinaryOperator &) { return false; }
bool visitSub(BinaryOperator &);
bool visitFSub(BinaryOperator &) { return false; }
bool visitMul(BinaryOperator &);
bool visitFMul(BinaryOperator &) { return false; }
bool visitSDiv(BinaryOperator &);
bool visitUDiv(BinaryOperator &);
bool visitFDiv(BinaryOperator &) { return false; }
bool visitSRem(BinaryOperator &);
bool visitURem(BinaryOperator &);
bool visitFRem(BinaryOperator &) { return false; }
bool visitFNeg(UnaryOperator &) { return false; }
bool visitShl(BinaryOperator &);
bool visitLShr(BinaryOperator &);
bool visitAShr(BinaryOperator &);
bool visitAnd(BinaryOperator &);
bool visitOr(BinaryOperator &);
bool visitXor(BinaryOperator &);
bool visitAlloca(AllocaInst &) { return false; }
bool visitLoad(LoadInst &);
bool visitStore(StoreInst &);
bool visitGetElementPtr(GetElementPtrInst &) { return false; }
bool visitFence(FenceInst &) { return false; }
bool visitAtomicCmpXchg(AtomicCmpXchgInst &);
bool visitAtomicRMW(AtomicRMWInst &);
bool visitTrunc(TruncInst &);
bool visitSExt(SExtInst &);
bool visitZExt(ZExtInst &);
bool visitFPToUI(FPToUIInst &);
bool visitFPToSI(FPToSIInst &);
bool visitUIToFP(UIToFPInst &);
bool visitSIToFP(SIToFPInst &);
bool visitFPTrunc(FPTruncInst &) { return false; }
bool visitFPExt(FPExtInst &) { return false; }
bool visitPtrToInt(PtrToIntInst &);
bool visitIntToPtr(IntToPtrInst &);
bool visitBitCast(BitCastInst &);
bool visitAddrSpaceCast(AddrSpaceCastInst &) { return false; }
bool visitICmp(ICmpInst &);
bool visitFCmp(FCmpInst &) { return false; }
bool visitPHI(PHINode &);
bool visitCall(CallInst &);
bool visitSelect(SelectInst &);
bool visitVAArg(VAArgInst &);
bool visitExtractElement(ExtractElementInst &);
bool visitInsertElement(InsertElementInst &);
bool visitShuffleVector(ShuffleVectorInst &) { return false; }
bool visitExtractValue(ExtractValueInst &);
bool visitInsertValue(InsertValueInst &);
bool visitLandingPad(LandingPadInst &);
Value *convertUIToFP32(Type *DstTy, Value *Lo, Value *Hi, Instruction *Pos);
bool hasHWSupport(BinaryOperator &);
bool needsLoHiSplitting(Instruction &);
};
class Preprocessor {
Emu64Ops *Emu;
BuilderType *IRB;
public:
Preprocessor(Emu64Ops *E, BuilderType *B) : Emu(E), IRB(B) {}
bool preprocess(Function &F) {
bool Changed = false;
// Preprocess additions with overflow.
for (auto &BB : F) {
for (auto BI = BB.begin(), BE = BB.end(); BI != BE; /*EMPTY*/) {
IntrinsicInst *II = dyn_cast<IntrinsicInst>(BI);
if (!II || II->getIntrinsicID() != Intrinsic::uadd_with_overflow ||
!II->getArgOperand(0)->getType()->isIntegerTy(64)) {
++BI;
continue;
}
IRB->SetInsertPoint(II);
Value *LHS = II->getArgOperand(0);
Value *RHS = II->getArgOperand(1);
Value *Res = IRB->CreateAdd(LHS, RHS);
Value *Overflow = IRB->CreateICmpULT(Res, LHS);
for (auto UI = II->user_begin(), UE = II->user_end(); UI != UE; /*EMPTY*/) {
User *U = *UI++;
ExtractValueInst *Ex = cast<ExtractValueInst>(U);
IGC_ASSERT(nullptr != Ex);
IGC_ASSERT(Ex->getNumIndices() == 1);
unsigned Idx = *Ex->idx_begin();
IGC_ASSERT(Idx == 0 || Idx == 1);
Ex->replaceAllUsesWith((Idx == 0) ? Res : Overflow);
Ex->eraseFromParent();
}
IGC_ASSERT(II->user_empty());
++BI;
II->eraseFromParent();
Changed = true;
}
}
// Preprocess non-LOAD/-STORE pointer usage if there's 64-bit pointer.
IGC_ASSERT(nullptr != Emu);
if (Emu->hasPtr64()) {
for (auto &BB : F) {
SmallVector<Instruction *, 16> LocalDeadInsts;
for (auto BI = BB.begin(), BE = BB.end(); BI != BE; ++BI) {
switch (BI->getOpcode()) {
default: // By default, NOTHING!
break;
case Instruction::ICmp: {
ICmpInst *Cmp = cast<ICmpInst>(BI);
IGC_ASSERT(nullptr != Cmp);
PointerType *PtrTy = dyn_cast<PointerType>(Cmp->getOperand(0)->getType());
if (!PtrTy || !Emu->isPtr64(PtrTy))
continue;
IRB->SetInsertPoint(Cmp);
Value *LHS = Cmp->getOperand(0);
Value *RHS = Cmp->getOperand(1);
LHS = IRB->CreatePtrToInt(LHS, IRB->getInt64Ty());
RHS = IRB->CreatePtrToInt(RHS, IRB->getInt64Ty());
Cmp->setOperand(0, LHS);
Cmp->setOperand(1, RHS);
Changed = true;
break;
}
case Instruction::Select: {
SelectInst *SI = cast<SelectInst>(BI);
PointerType *PtrTy = dyn_cast<PointerType>(SI->getType());
if (!PtrTy || !Emu->isPtr64(PtrTy))
continue;
IRB->SetInsertPoint(SI);
Value *TVal = SI->getTrueValue();
Value *FVal = SI->getFalseValue();
TVal = IRB->CreatePtrToInt(TVal, IRB->getInt64Ty());
FVal = IRB->CreatePtrToInt(FVal, IRB->getInt64Ty());
Value *NewPtr = IRB->CreateSelect(SI->getCondition(), TVal, FVal);
NewPtr = IRB->CreateIntToPtr(NewPtr, PtrTy);
SI->replaceAllUsesWith(NewPtr);
LocalDeadInsts.push_back(SI);
Changed = true;
break;
}
case Instruction::Load: {
LoadInst *LD = cast<LoadInst>(BI);
PointerType *PtrTy = dyn_cast<PointerType>(LD->getType());
if (!PtrTy || !Emu->isPtr64(PtrTy))
continue;
IRB->SetInsertPoint(LD);
// Cast the original pointer to pointer to pointer to i64.
Value *OldPtr = LD->getPointerOperand();
PointerType *OldPtrTy = cast<PointerType>(OldPtr->getType());
PointerType *NewPtrTy = IRB->getInt64Ty()->getPointerTo(OldPtrTy->getAddressSpace());
Value *NewPtr = IRB->CreateBitCast(OldPtr, NewPtrTy);
// Create new load.
LoadInst *NewLD = IRB->CreateLoad(IRB->getInt64Ty(), NewPtr);
Emu->dupMemoryAttribute(NewLD, LD, 0);
// Cast the load i64 back to pointer.
Value *NewVal = IRB->CreateIntToPtr(NewLD, PtrTy);
LD->replaceAllUsesWith(NewVal);
LocalDeadInsts.push_back(LD);
Changed = true;
break;
}
case Instruction::Store: {
StoreInst *ST = cast<StoreInst>(BI);
PointerType *PtrTy = dyn_cast<PointerType>(ST->getValueOperand()->getType());
if (!PtrTy || !Emu->isPtr64(PtrTy))
continue;
IRB->SetInsertPoint(ST);
// Cast the pointer to pointer to pointer to i64.
Value *OldPtr = ST->getPointerOperand();
PointerType *OldPtrTy = cast<PointerType>(OldPtr->getType());
PointerType *NewPtrTy = IRB->getInt64Ty()->getPointerTo(OldPtrTy->getAddressSpace());
Value *NewPtr = IRB->CreateBitCast(OldPtr, NewPtrTy);
// Cast the pointer to be stored into i64.
Value *OldVal = ST->getValueOperand();
Value *NewVal = IRB->CreatePtrToInt(OldVal, IRB->getInt64Ty());
// Create new store.
StoreInst *NewST = IRB->CreateStore(NewVal, NewPtr);
Emu->dupMemoryAttribute(NewST, ST, 0);
LocalDeadInsts.push_back(ST);
Changed = true;
break;
}
case Instruction::IntToPtr: {
IntToPtrInst *I2P = cast<IntToPtrInst>(BI);
Value *Src = I2P->getOperand(0);
PointerType *PtrTy = cast<PointerType>(I2P->getType());
if (!Emu->isPtr64(PtrTy) && !Emu->isInt64(Src))
continue;
IRB->SetInsertPoint(I2P);
unsigned int ptrSize = Emu->CGC->getRegisterPointerSizeInBits(PtrTy->getAddressSpace());
Src = IRB->CreateZExtOrTrunc(Src, IRB->getIntNTy(ptrSize));
I2P->setOperand(0, Src);
Changed = true;
break;
}
case Instruction::PtrToInt: {
PtrToIntInst *P2I = cast<PtrToIntInst>(BI);
Value *Src = P2I->getOperand(0);
PointerType *PtrTy = cast<PointerType>(Src->getType());
if (!Emu->isPtr64(PtrTy) || Emu->isInt64(Src))
continue;
IRB->SetInsertPoint(P2I);
Value *NewVal = IRB->CreatePtrToInt(Src, IRB->getInt64Ty());
NewVal = IRB->CreateZExtOrTrunc(NewVal, P2I->getType());
P2I->replaceAllUsesWith(NewVal);
LocalDeadInsts.push_back(P2I);
Changed = true;
break;
}
}
}
// Remove dead instructions.
for (auto I : LocalDeadInsts)
I->eraseFromParent();
}
}
return Changed;
}
};
char Emu64Ops::ID = 0;
} // End anonymous namespace
FunctionPass *createEmu64OpsPass() { return new Emu64Ops(); }
#define PASS_FLAG "igc-emu64ops"
#define PASS_DESC "IGC 64-bit ops emulation"
#define PASS_CFG_ONLY false
#define PASS_ANALYSIS false
IGC_INITIALIZE_PASS_BEGIN(Emu64Ops, PASS_FLAG, PASS_DESC, PASS_CFG_ONLY, PASS_ANALYSIS)
IGC_INITIALIZE_PASS_DEPENDENCY(CodeGenContextWrapper)
IGC_INITIALIZE_PASS_DEPENDENCY(MetaDataUtilsWrapper)
IGC_INITIALIZE_PASS_END(Emu64Ops, PASS_FLAG, PASS_DESC, PASS_CFG_ONLY, PASS_ANALYSIS)
bool Emu64Ops::runOnFunction(Function &F) {
// Skip non-kernel function.
MetaDataUtils *MDU = nullptr;
MDU = getAnalysis<MetaDataUtilsWrapper>().getMetaDataUtils();
IGC_ASSERT(nullptr != MDU);
auto FII = MDU->findFunctionsInfoItem(&F);
if (FII == MDU->end_FunctionsInfo())
return false;
DL = &F.getParent()->getDataLayout();
CGC = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
BuilderType TheBuilder(F.getContext(), TargetFolder(*DL));
InstExpander TheExpander(this, &TheBuilder);
Preprocessor ThePreprocessor(this, &TheBuilder);
IRB = &TheBuilder;
Expander = &TheExpander;
TheContext = &F.getContext();
TheModule = F.getParent();
TheFunction = &F;
ValueMap.clear();
Arg64Casts.clear();
DeadInsts.clear();
Int64Insts.clear();
bool Changed = false;
Changed |= ThePreprocessor.preprocess(F);
Changed |= expandArguments(F);
Changed |= preparePHIs(F);
Changed |= expandInsts(F);
Changed |= populatePHIs(F);
Changed |= removeDeadInsts();
return Changed;
}
ValuePair Emu64Ops::getExpandedValues(Value *V) {
auto [VMI, New] = ValueMap.insert(std::make_pair(V, ValuePair()));
if (!New)
return VMI->second;
if (dyn_cast<ConstantInt>(V)) {
Value *Lo = IRB->CreateTrunc(V, IRB->getInt32Ty());
Value *Hi = IRB->CreateTrunc(IRB->CreateLShr(V, 32), IRB->getInt32Ty());
VMI->second = std::make_pair(Lo, Hi);
return VMI->second;
}
if (dyn_cast<UndefValue>(V)) {
Value *Lo = UndefValue::get(IRB->getInt32Ty());
Value *Hi = UndefValue::get(IRB->getInt32Ty());
VMI->second = std::make_pair(Lo, Hi);
return VMI->second;
}
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
Value *Lo = nullptr;
Value *Hi = nullptr;
if (isa<PtrToIntOperator>(CE)) {
V = IRB->CreateBitCast(V, getV2Int32Ty());
Lo = IRB->CreateExtractElement(V, IRB->getInt32(0));
Hi = IRB->CreateExtractElement(V, IRB->getInt32(1));
} else {
Lo = IRB->CreateTrunc(V, IRB->getInt32Ty());
Hi = IRB->CreateTrunc(IRB->CreateLShr(V, 32), IRB->getInt32Ty());
}
VMI->second = std::make_pair(Lo, Hi);
return VMI->second;
}
IGC_ASSERT_UNREACHABLE(); // TODO: NOT IMPLEMENTED!
}
void Emu64Ops::setExpandedValues(Value *V, Value *Lo, Value *Hi) {
ValuePair Pair = std::make_pair(Lo, Hi);
ValueMap.insert(std::make_pair(V, Pair));
}
bool Emu64Ops::expandArguments(Function &F) {
Instruction *Pos = &F.getEntryBlock().front();
IRB->SetInsertPoint(Pos);
bool Changed = false;
for (auto &Arg : F.args()) {
if (!isInt64(&Arg))
continue;
Value *V = IRB->CreateBitCast(&Arg, getV2Int32Ty());
Value *Lo = IRB->CreateExtractElement(V, IRB->getInt32(0));
Value *Hi = IRB->CreateExtractElement(V, IRB->getInt32(1));
setExpandedValues(&Arg, Lo, Hi);
Arg64Casts.insert(cast<BitCastInst>(V));
Changed = true;
}
return Changed;
}
bool Emu64Ops::preparePHIs(Function &F) {
bool Changed = false;
for (auto &BB : F) {
PHINode *PN = nullptr;
for (auto BI = BB.begin(); (PN = dyn_cast<PHINode>(BI)) != nullptr; ++BI) {
if (!isInt64(PN))
continue;
IRB->SetInsertPoint(PN);
unsigned NumIncomingValues = PN->getNumIncomingValues();
Value *Lo = IRB->CreatePHI(IRB->getInt32Ty(), NumIncomingValues);
Value *Hi = IRB->CreatePHI(IRB->getInt32Ty(), NumIncomingValues);
setExpandedValues(PN, Lo, Hi);
DeadInsts.insert(PN);
Changed = true;
}
}
return Changed;
}
bool Emu64Ops::expandInsts(Function &F) {
ReversePostOrderTraversal<Function *> RPOT(&F);
bool Changed = false;
for (auto &BB : RPOT) {
for (auto BI = BB->begin(), BE = BB->end(); BI != BE; /*EMPTY*/) {
Instruction *I = &(*BI++);
doNotDeleteInstr = false;
bool LocalChanged = Expander->expand(I);
Changed |= LocalChanged;
if (LocalChanged) {
BI = std::next(BasicBlock::iterator(I));
BE = I->getParent()->end();
DeadInsts.insert(I);
}
if (doNotDeleteInstr)
BE = I->getParent()->end();
}
}
return Changed;
}
bool Emu64Ops::populatePHIs(Function &F) {
bool Changed = false;
for (auto &BB : F) {
PHINode *PN;
for (auto BI = BB.begin(); (PN = dyn_cast<PHINode>(BI)) != nullptr; ++BI) {
if (!isInt64(PN))
continue;
auto [L, H] = getExpandedValues(PN);
PHINode *Lo = cast<PHINode>(L);
PHINode *Hi = cast<PHINode>(H);
for (auto &Op : PN->operands()) {
BasicBlock *BB = PN->getIncomingBlock(Op);
std::tie(L, H) = getExpandedValues(Op.get());
Lo->addIncoming(L, BB);
Hi->addIncoming(H, BB);
}
Changed = true;
}
}
return Changed;
}
bool Emu64Ops::removeDeadInsts() {
bool Changed = false;
for (auto *I : DeadInsts) {
Type *Ty = I->getType();
if (!Ty->isVoidTy())
I->replaceAllUsesWith(UndefValue::get(Ty));
I->eraseFromParent();
Changed = true;
}
return Changed;
}
bool Emu64Ops::hasOpcodeInt64Support(unsigned int opcode) const {
// PVC-B0 only
return ((opcode == llvm::Instruction::Shl) || (opcode == llvm::Instruction::LShr) ||
(opcode == llvm::Instruction::AShr) ||
(CGC->platform.hasInt64Add() && (opcode == llvm::Instruction::Add || opcode == llvm::Instruction::Sub)));
}
bool InstExpander::expand(Instruction *I) {
IRB->SetInsertPoint(I);
if (!visit(*I))
return false;
return true;
}
bool InstExpander::visitInstruction(Instruction &I) {
IGC_ASSERT_MESSAGE(0, "UNKNOWN INSTRUCTION is BEING EXPANDED!");
return false;
}
bool InstExpander::visitRet(ReturnInst &RI) {
if (Value *V = RI.getReturnValue()) {
IGC_ASSERT(nullptr != Emu);
if (Emu->isInt64(V)) {
auto [Lo, Hi] = Emu->getExpandedValues(V);
Type *V2I32Ty = Emu->getV2Int32Ty();
Value *NewVal = UndefValue::get(V2I32Ty);
NewVal = IRB->CreateInsertElement(NewVal, Lo, IRB->getInt32(0));
NewVal = IRB->CreateInsertElement(NewVal, Hi, IRB->getInt32(1));
NewVal = IRB->CreateBitCast(NewVal, IRB->getInt64Ty());
IRB->SetInsertPoint(&RI);
(void)IRB->CreateRet(NewVal);
return true;
}
}
return false;
}
bool InstExpander::visitFreeze(FreezeInst &FI) {
IGC_ASSERT(nullptr != Emu);
// Skip if it's not 64-bit integer.
if (!Emu->isInt64(&FI))
return false;
Value *Src = FI.getOperand(0);
IGC_ASSERT(nullptr != Src);
auto [Lo, Hi] = Emu->getExpandedValues(Src);
Value *newLo = IRB->CreateFreeze(Lo);
Value *newHi = IRB->CreateFreeze(Hi);
Emu->setExpandedValues(&FI, newLo, newHi);
return true;
}
bool InstExpander::visitAdd(BinaryOperator &BinOp) {
IGC_ASSERT(nullptr != Emu);
if (!Emu->isInt64(&BinOp))
return false;
if (Emu->CGC->platform.hasInt64Add()) {
if (hasHWSupport(BinOp))
return false;
}
auto [L0, H0] = Emu->getExpandedValues(BinOp.getOperand(0));
auto [L1, H1] = Emu->getExpandedValues(BinOp.getOperand(1));
GenISAIntrinsic::ID GIID = GenISAIntrinsic::GenISA_add_pair;
Function *IFunc = GenISAIntrinsic::getDeclaration(Emu->getModule(), GIID);
IGC_ASSERT(nullptr != IRB);
Value *V = IRB->CreateCall4(IFunc, L0, H0, L1, H1);
Value *Lo = IRB->CreateExtractValue(V, 0);
Value *Hi = IRB->CreateExtractValue(V, 1);
Emu->setExpandedValues(&BinOp, Lo, Hi);
return true;
}
bool InstExpander::visitSub(BinaryOperator &BinOp) {
IGC_ASSERT(nullptr != Emu);
if (!Emu->isInt64(&BinOp))
return false;
if (Emu->CGC->platform.hasInt64Add()) {
if (hasHWSupport(BinOp))
return false;
}
auto [L0, H0] = Emu->getExpandedValues(BinOp.getOperand(0));
auto [L1, H1] = Emu->getExpandedValues(BinOp.getOperand(1));
GenISAIntrinsic::ID GIID = GenISAIntrinsic::GenISA_sub_pair;
Function *IFunc = GenISAIntrinsic::getDeclaration(Emu->getModule(), GIID);
IGC_ASSERT(nullptr != IRB);
Value *V = IRB->CreateCall4(IFunc, L0, H0, L1, H1);
Value *Lo = IRB->CreateExtractValue(V, 0);
Value *Hi = IRB->CreateExtractValue(V, 1);
Emu->setExpandedValues(&BinOp, Lo, Hi);
return true;
}
bool InstExpander::visitMul(BinaryOperator &BinOp) {
IGC_ASSERT(nullptr != Emu);
if (!Emu->isInt64(&BinOp))
return false;
auto [L0, H0] = Emu->getExpandedValues(BinOp.getOperand(0));
auto [L1, H1] = Emu->getExpandedValues(BinOp.getOperand(1));
GenISAIntrinsic::ID GIID = GenISAIntrinsic::GenISA_mul_pair;
Function *IFunc = GenISAIntrinsic::getDeclaration(Emu->getModule(), GIID);
IGC_ASSERT(nullptr != IRB);
Value *V = IRB->CreateCall4(IFunc, L0, H0, L1, H1);
Value *Lo = IRB->CreateExtractValue(V, 0);
Value *Hi = IRB->CreateExtractValue(V, 1);
Emu->setExpandedValues(&BinOp, Lo, Hi);
return true;
}
bool InstExpander::visitSDiv(BinaryOperator &BinOp) {
IGC_ASSERT(nullptr != Emu);
IGC_ASSERT_MESSAGE(false == Emu->isInt64(&BinOp),
"There should not be `sdiv` which is already emulated by library call.");
return false;
}
bool InstExpander::visitUDiv(BinaryOperator &BinOp) {
IGC_ASSERT(nullptr != Emu);
IGC_ASSERT_MESSAGE(false == Emu->isInt64(&BinOp),
"There should not be `udiv` which is already emulated by library call.");
return false;
}
bool InstExpander::visitSRem(BinaryOperator &BinOp) {
IGC_ASSERT(nullptr != Emu);
IGC_ASSERT_MESSAGE(false == Emu->isInt64(&BinOp),
"There should not be `srem` which is already emulated by library call.");
return false;
}
bool InstExpander::visitURem(BinaryOperator &BinOp) {
IGC_ASSERT(nullptr != Emu);
IGC_ASSERT_MESSAGE(false == Emu->isInt64(&BinOp),
"There should not be `urem` which is already emulated by library call.");
return false;
}
bool InstExpander::visitShl(BinaryOperator &BinOp) {
IGC_ASSERT(nullptr != Emu);
if (!Emu->isInt64(&BinOp))
return false;
if (hasHWSupport(BinOp))
return false;
auto [Lo, Hi] = Emu->getExpandedValues(BinOp.getOperand(0));
Value *ShAmt = nullptr;
std::tie(ShAmt, std::ignore) = Emu->getExpandedValues(BinOp.getOperand(1));
BasicBlock *OldBB = BinOp.getParent();
BasicBlock *InnerTBB = nullptr;
BasicBlock *InnerFBB = nullptr;
PHINode *InnerResLo = nullptr;
PHINode *InnerResHi = nullptr;
Value *ResLo = nullptr;
Value *ResHi = nullptr;
DebugLoc BinOpDebugLoc = BinOp.getDebugLoc();
Value *Cond = nullptr;
// Only LSB of ShAmt is used for shift
ShAmt = IRB->CreateAnd(ShAmt, 63);
if (!isa<ConstantInt>(ShAmt)) {
// Create outer if-endif to handle the special case where `ShAmt` is zero.
// We have option to handle that with `((S >> (~ShAmt)) >> 1)`. However, as
// a zero `ShAmt` is a very rare case so that the outer branch should be
// uniform one in most cases.
Value *NE = IRB->CreateICmpNE(ShAmt, Constant::getNullValue(ShAmt->getType()));
BasicBlock *JointBB = OldBB->splitBasicBlock(&BinOp);
ResLo = PHINode::Create(IRB->getInt32Ty(), 2, ".shl.outer.merge.lo", &BinOp);
cast<Instruction>(ResLo)->setDebugLoc(BinOpDebugLoc);
ResHi = PHINode::Create(IRB->getInt32Ty(), 2, ".shl.outer.merge.hi", &BinOp);
cast<Instruction>(ResHi)->setDebugLoc(BinOpDebugLoc);
BasicBlock *TrueBB = BasicBlock::Create(*Emu->getContext(), ".shl.outer.true.branch");
TrueBB->insertInto(Emu->getFunction(), JointBB);
Instruction *TrueJmp = BranchInst::Create(JointBB, TrueBB);
TrueJmp->setDebugLoc(BinOpDebugLoc);
OldBB->getTerminator()->eraseFromParent();
Instruction *TempOldBBBranchInst = BranchInst::Create(TrueBB, JointBB, NE, OldBB);
TempOldBBBranchInst->setDebugLoc(BinOpDebugLoc);
// Create the inner branch.
IRB->SetInsertPoint(&(*TrueBB->begin()));
Cond = IRB->CreateICmpULT(ShAmt, IRB->getInt32(32));
// Prepare to generate branches to handle the case where `ShAmt` is less
// than 32 (true branch) or otherwise (false branch).
BasicBlock *InnerJBB = TrueBB->splitBasicBlock(TrueJmp);
InnerResLo = PHINode::Create(IRB->getInt32Ty(), 2, ".shl.merge.inner.lo", TrueJmp);
InnerResLo->setDebugLoc(BinOpDebugLoc);
InnerResHi = PHINode::Create(IRB->getInt32Ty(), 2, ".shl.merge.inner.hi", TrueJmp);
InnerResHi->setDebugLoc(BinOpDebugLoc);
InnerTBB = BasicBlock::Create(*Emu->getContext(), ".shl.inner.true.branch");
InnerTBB->insertInto(Emu->getFunction(), InnerJBB);
Instruction *TempInnerTBBBranchInst = BranchInst::Create(InnerJBB, InnerTBB);
TempInnerTBBBranchInst->setDebugLoc(BinOpDebugLoc);
InnerFBB = BasicBlock::Create(*Emu->getContext(), ".shl.inner.false.branch");
InnerFBB->insertInto(Emu->getFunction(), InnerJBB);
Instruction *TempInnerFBBBranchInst = BranchInst::Create(InnerJBB, InnerFBB);
TempInnerFBBBranchInst->setDebugLoc(BinOpDebugLoc);
TrueBB->getTerminator()->eraseFromParent();
Instruction *TempTrueBBBranchInst = BranchInst::Create(InnerTBB, InnerFBB, Cond, TrueBB);
TempTrueBBBranchInst->setDebugLoc(BinOpDebugLoc);
// The result is the same as the source if ShAmt is 0, i.e. NE is
// false.
cast<PHINode>(ResLo)->addIncoming(Lo, OldBB);
cast<PHINode>(ResHi)->addIncoming(Hi, OldBB);
// Prepare the result from inner if-else-endif.
cast<PHINode>(ResLo)->addIncoming(InnerResLo, InnerJBB);
cast<PHINode>(ResHi)->addIncoming(InnerResHi, InnerJBB);
} else
Cond = IRB->CreateICmpULT(ShAmt, IRB->getInt32(32));
if (Cond == IRB->getTrue() || InnerTBB) {
if (InnerTBB)
IRB->SetInsertPoint(&(*InnerTBB->begin()));
Value *L = IRB->CreateShl(Lo, ShAmt);
Value *Amt = IRB->CreateSub(IRB->getInt32(32), ShAmt);
Value *T0 = IRB->CreateLShr(Lo, Amt);
Value *H = IRB->CreateShl(Hi, ShAmt);
H = IRB->CreateOr(H, T0);
if (InnerTBB) {
IGC_ASSERT(isa<PHINode>(InnerResLo));
IGC_ASSERT(isa<PHINode>(InnerResHi));
InnerResLo->addIncoming(L, InnerTBB);
InnerResHi->addIncoming(H, InnerTBB);
} else {
ResLo = L;
ResHi = H;
}
}
if (Cond == IRB->getFalse() || InnerFBB) {
if (InnerFBB)
IRB->SetInsertPoint(&(*InnerFBB->begin()));
Value *L = IRB->getInt32(0);
Value *Amt = IRB->CreateSub(ShAmt, IRB->getInt32(32));
Value *H = IRB->CreateShl(Lo, Amt);
if (InnerFBB) {
IGC_ASSERT(isa<PHINode>(InnerResLo));
IGC_ASSERT(isa<PHINode>(InnerResHi));
InnerResLo->addIncoming(L, InnerFBB);
InnerResHi->addIncoming(H, InnerFBB);
} else {
ResLo = L;
ResHi = H;
}
}
Emu->setExpandedValues(&BinOp, ResLo, ResHi);
return true;
}
bool InstExpander::visitLShr(BinaryOperator &BinOp) {
IGC_ASSERT(nullptr != Emu);
if (!Emu->isInt64(&BinOp))
return false;
if (hasHWSupport(BinOp))
return false;
auto [Lo, Hi] = Emu->getExpandedValues(BinOp.getOperand(0));
Value *ShAmt = nullptr;
std::tie(ShAmt, std::ignore) = Emu->getExpandedValues(BinOp.getOperand(1));
BasicBlock *OldBB = BinOp.getParent();
BasicBlock *InnerTBB = nullptr;
BasicBlock *InnerFBB = nullptr;
PHINode *InnerResLo = nullptr;
PHINode *InnerResHi = nullptr;
Value *ResLo = nullptr;
Value *ResHi = nullptr;
DebugLoc BinOpDebugLoc = BinOp.getDebugLoc();
Value *Cond = nullptr;
// Only LSB of ShAmt is used for shift
ShAmt = IRB->CreateAnd(ShAmt, 63);
if (!isa<ConstantInt>(ShAmt)) {
// Create outer if-endif to handle the special case where `ShAmt` is zero.
// We have option to handle that with `((S >> (~ShAmt)) >> 1)`. However, as
// a zero `ShAmt` is a very rare case so that the outer branch should be
// uniform one in most cases.
Value *NE = IRB->CreateICmpNE(ShAmt, Constant::getNullValue(ShAmt->getType()));
BasicBlock *JointBB = OldBB->splitBasicBlock(&BinOp);
ResLo = PHINode::Create(IRB->getInt32Ty(), 2, ".lshr.outer.merge.lo", &BinOp);
cast<Instruction>(ResLo)->setDebugLoc(BinOpDebugLoc);
ResHi = PHINode::Create(IRB->getInt32Ty(), 2, ".lshr.outer.merge.hi", &BinOp);
cast<Instruction>(ResHi)->setDebugLoc(BinOpDebugLoc);
BasicBlock *TrueBB = BasicBlock::Create(*Emu->getContext(), ".lshr.outer.true.branch");
TrueBB->insertInto(Emu->getFunction(), JointBB);
Instruction *TrueJmp = BranchInst::Create(JointBB, TrueBB);
TrueJmp->setDebugLoc(BinOpDebugLoc);
OldBB->getTerminator()->eraseFromParent();
Instruction *TempOldBBBranchInst = BranchInst::Create(TrueBB, JointBB, NE, OldBB);
TempOldBBBranchInst->setDebugLoc(BinOpDebugLoc);
// Create the inner branch.
IRB->SetInsertPoint(&(*TrueBB->begin()));
Cond = IRB->CreateICmpULT(ShAmt, IRB->getInt32(32));
// Prepare to generate branches to handle the case where `ShAmt` is less
// than 32 (true branch) or otherwise (false branch).
BasicBlock *InnerJBB = TrueBB->splitBasicBlock(TrueJmp);
InnerResLo = PHINode::Create(IRB->getInt32Ty(), 2, ".lshr.merge.inner.lo", TrueJmp);
InnerResLo->setDebugLoc(BinOpDebugLoc);
InnerResHi = PHINode::Create(IRB->getInt32Ty(), 2, ".lshr.merge.inner.hi", TrueJmp);
InnerResHi->setDebugLoc(BinOpDebugLoc);
InnerTBB = BasicBlock::Create(*Emu->getContext(), ".lshr.inner.true.branch");
InnerTBB->insertInto(Emu->getFunction(), InnerJBB);
Instruction *TempInnerTBBBranchInst = BranchInst::Create(InnerJBB, InnerTBB);
TempInnerTBBBranchInst->setDebugLoc(BinOpDebugLoc);
InnerFBB = BasicBlock::Create(*Emu->getContext(), ".lshr.inner.false.branch");
InnerFBB->insertInto(Emu->getFunction(), InnerJBB);
Instruction *TempInnerFBBBranchInst = BranchInst::Create(InnerJBB, InnerFBB);
TempInnerFBBBranchInst->setDebugLoc(BinOpDebugLoc);
TrueBB->getTerminator()->eraseFromParent();
Instruction *TempTrueBBBranchInst = BranchInst::Create(InnerTBB, InnerFBB, Cond, TrueBB);
TempTrueBBBranchInst->setDebugLoc(BinOpDebugLoc);
// The result is the same as the source if ShAmt is 0, i.e. NE is
// false.
cast<PHINode>(ResLo)->addIncoming(Lo, OldBB);
cast<PHINode>(ResHi)->addIncoming(Hi, OldBB);
// Prepare the result from inner if-else-endif.
cast<PHINode>(ResLo)->addIncoming(InnerResLo, InnerJBB);
cast<PHINode>(ResHi)->addIncoming(InnerResHi, InnerJBB);
} else
Cond = IRB->CreateICmpULT(ShAmt, IRB->getInt32(32));
if (Cond == IRB->getTrue() || InnerTBB) {
if (InnerTBB)
IRB->SetInsertPoint(&(*InnerTBB->begin()));
Value *H = IRB->CreateLShr(Hi, ShAmt);
// Avoid using IRB->CreateNeg(ShAmt) as shift amount, because (-ShAmt) will
// be interpret as a number bigger than 32. LLVM might figure out this out
// and thus makes shift result undefined!
Value *Amt = IRB->CreateSub(IRB->getInt32(32), ShAmt);
Value *T0 = IRB->CreateShl(Hi, Amt);
Value *L = IRB->CreateLShr(Lo, ShAmt);
L = IRB->CreateOr(L, T0);
if (InnerTBB) {
InnerResLo->addIncoming(L, InnerTBB);
InnerResHi->addIncoming(H, InnerTBB);
} else {
ResLo = L;
ResHi = H;
}
}
if (Cond == IRB->getFalse() || InnerFBB) {
if (InnerFBB)
IRB->SetInsertPoint(&(*InnerFBB->begin()));
Value *H = IRB->getInt32(0);
// Avoid using ShAmt as shift amount as it is bigger than 32. LLVM might figure out
// this shift amount is bigger than 32 and thus makes the shift result undefined.
Value *Amt = IRB->CreateSub(ShAmt, IRB->getInt32(32));
Value *L = IRB->CreateLShr(Hi, Amt);
if (InnerFBB) {
InnerResLo->addIncoming(L, InnerFBB);
InnerResHi->addIncoming(H, InnerFBB);
} else {
ResLo = L;
ResHi = H;
}
}
Emu->setExpandedValues(&BinOp, ResLo, ResHi);
return true;
}
bool InstExpander::visitAShr(BinaryOperator &BinOp) {
IGC_ASSERT(nullptr != Emu);
if (!Emu->isInt64(&BinOp))
return false;
if (hasHWSupport(BinOp))
return false;
auto [Lo, Hi] = Emu->getExpandedValues(BinOp.getOperand(0));
Value *ShAmt = nullptr;
std::tie(ShAmt, std::ignore) = Emu->getExpandedValues(BinOp.getOperand(1));
BasicBlock *OldBB = BinOp.getParent();
BasicBlock *InnerTBB = nullptr;
BasicBlock *InnerFBB = nullptr;
PHINode *InnerResLo = nullptr;
PHINode *InnerResHi = nullptr;
Value *ResLo = nullptr;
Value *ResHi = nullptr;
DebugLoc BinOpDebugLoc = BinOp.getDebugLoc();
Value *Cond = nullptr;
// Only LSB of ShAmt is used for shift
ShAmt = IRB->CreateAnd(ShAmt, 63);
if (!isa<ConstantInt>(ShAmt)) {
// Create outer if-endif to handle the special case where `ShAmt` is zero.
// We have option to handle that with `((S >> (~ShAmt)) >> 1)`. However, as
// a zero `ShAmt` is a very rare case so that the outer branch should be
// uniform one in most cases.
Value *NE = IRB->CreateICmpNE(ShAmt, Constant::getNullValue(ShAmt->getType()));
BasicBlock *JointBB = OldBB->splitBasicBlock(&BinOp);
ResLo = PHINode::Create(IRB->getInt32Ty(), 2, ".ashr.outer.merge.lo", &BinOp);
cast<Instruction>(ResLo)->setDebugLoc(BinOpDebugLoc);
ResHi = PHINode::Create(IRB->getInt32Ty(), 2, ".ashr.outer.merge.hi", &BinOp);
cast<Instruction>(ResHi)->setDebugLoc(BinOpDebugLoc);
BasicBlock *TrueBB = BasicBlock::Create(*Emu->getContext(), ".ashr.outer.true.branch");
TrueBB->insertInto(Emu->getFunction(), JointBB);
Instruction *TrueJmp = BranchInst::Create(JointBB, TrueBB);
TrueJmp->setDebugLoc(BinOpDebugLoc);
OldBB->getTerminator()->eraseFromParent();
Instruction *TempOldBBBranchInst = BranchInst::Create(TrueBB, JointBB, NE, OldBB);
TempOldBBBranchInst->setDebugLoc(BinOpDebugLoc);
// Create the inner branch.
IRB->SetInsertPoint(&(*TrueBB->begin()));
Cond = IRB->CreateICmpULT(ShAmt, IRB->getInt32(32));
// Prepare to generate branches to handle the case where `ShAmt` is less
// than 32 or otherwise.
BasicBlock *InnerJBB = TrueBB->splitBasicBlock(TrueJmp);
InnerResLo = PHINode::Create(IRB->getInt32Ty(), 2, ".ashr.merge.inner.lo", TrueJmp);
InnerResLo->setDebugLoc(BinOpDebugLoc);
InnerResHi = PHINode::Create(IRB->getInt32Ty(), 2, ".ashr.merge.inner.hi", TrueJmp);
InnerResHi->setDebugLoc(BinOpDebugLoc);
InnerTBB = BasicBlock::Create(*Emu->getContext(), ".ashr.inner.true.branch");
InnerTBB->insertInto(Emu->getFunction(), InnerJBB);
Instruction *TempInnerTBBBranchInst = BranchInst::Create(InnerJBB, InnerTBB);
TempInnerTBBBranchInst->setDebugLoc(BinOpDebugLoc);
InnerFBB = BasicBlock::Create(*Emu->getContext(), ".ashr.inner.false.branch");
InnerFBB->insertInto(Emu->getFunction(), InnerJBB);
Instruction *TempInnerFBBBranchInst = BranchInst::Create(InnerJBB, InnerFBB);
TempInnerFBBBranchInst->setDebugLoc(BinOp.getDebugLoc());
TrueBB->getTerminator()->eraseFromParent();
Instruction *TempTrueBBBranchInst = BranchInst::Create(InnerTBB, InnerFBB, Cond, TrueBB);
TempTrueBBBranchInst->setDebugLoc(BinOpDebugLoc);
// The result is the same as the source if ShAmt is 0, i.e. NE is
// false.
cast<PHINode>(ResLo)->addIncoming(Lo, OldBB);
cast<PHINode>(ResHi)->addIncoming(Hi, OldBB);
// Prepare the result from inner if-else-endif.
cast<PHINode>(ResLo)->addIncoming(InnerResLo, InnerJBB);
cast<PHINode>(ResHi)->addIncoming(InnerResHi, InnerJBB);
} else
Cond = IRB->CreateICmpULT(ShAmt, IRB->getInt32(32));
if (Cond == IRB->getTrue() || InnerTBB) {
if (InnerTBB)
IRB->SetInsertPoint(&(*InnerTBB->begin()));
Value *H = IRB->CreateAShr(Hi, ShAmt);
Value *Amt = IRB->CreateSub(IRB->getInt32(32), ShAmt);
Value *T0 = IRB->CreateShl(Hi, Amt);
Value *L = IRB->CreateLShr(Lo, ShAmt);
L = IRB->CreateOr(L, T0);
if (InnerTBB) {
InnerResLo->addIncoming(L, InnerTBB);
InnerResHi->addIncoming(H, InnerTBB);
} else {
ResLo = L;
ResHi = H;
}
}
if (Cond == IRB->getFalse() || InnerFBB) {
if (InnerFBB)
IRB->SetInsertPoint(&(*InnerFBB->begin()));
Value *H = IRB->CreateAShr(Hi, 31);
Value *Amt = IRB->CreateSub(ShAmt, IRB->getInt32(32));
Value *L = IRB->CreateAShr(Hi, Amt);
if (InnerFBB) {
InnerResLo->addIncoming(L, InnerFBB);
InnerResHi->addIncoming(H, InnerFBB);
} else {
ResLo = L;
ResHi = H;
}
}
Emu->setExpandedValues(&BinOp, ResLo, ResHi);
return true;
}
bool InstExpander::visitAnd(BinaryOperator &BinOp) {
IGC_ASSERT(nullptr != Emu);
if (!Emu->isInt64(&BinOp))
return false;
auto [L0, H0] = Emu->getExpandedValues(BinOp.getOperand(0));
auto [L1, H1] = Emu->getExpandedValues(BinOp.getOperand(1));
Value *Lo = IRB->CreateAnd(L0, L1);
Value *Hi = IRB->CreateAnd(H0, H1);
Emu->setExpandedValues(&BinOp, Lo, Hi);
return true;
}
bool InstExpander::visitOr(BinaryOperator &BinOp) {
IGC_ASSERT(nullptr != Emu);
if (!Emu->isInt64(&BinOp))
return false;
auto [L0, H0] = Emu->getExpandedValues(BinOp.getOperand(0));
auto [L1, H1] = Emu->getExpandedValues(BinOp.getOperand(1));
Value *Lo = IRB->CreateOr(L0, L1);
Value *Hi = IRB->CreateOr(H0, H1);
Emu->setExpandedValues(&BinOp, Lo, Hi);
return true;
}
bool InstExpander::visitXor(BinaryOperator &BinOp) {
IGC_ASSERT(nullptr != Emu);
if (!Emu->isInt64(&BinOp))
return false;
auto [L0, H0] = Emu->getExpandedValues(BinOp.getOperand(0));
auto [L1, H1] = Emu->getExpandedValues(BinOp.getOperand(1));
Value *Lo = IRB->CreateXor(L0, L1);
Value *Hi = IRB->CreateXor(H0, H1);
Emu->setExpandedValues(&BinOp, Lo, Hi);
return true;
}
bool InstExpander::visitLoad(LoadInst &LD) {
IGC_ASSERT(nullptr != Emu);
if (!Emu->isInt64(&LD))
return false;
Value *OldPtr = LD.getPointerOperand();
PointerType *OldPtrTy = cast<PointerType>(OldPtr->getType());
PointerType *NewPtrTy = Emu->getV2Int32Ty()->getPointerTo(OldPtrTy->getAddressSpace());
Value *NewPtr = IRB->CreatePointerCast(OldPtr, NewPtrTy);
LoadInst *NewLD = IRB->CreateLoad(Emu->getV2Int32Ty(), NewPtr);
Emu->dupMemoryAttribute(NewLD, &LD, 0);
Value *Lo = IRB->CreateExtractElement(NewLD, IRB->getInt32(0));
Value *Hi = IRB->CreateExtractElement(NewLD, IRB->getInt32(1));
Emu->setExpandedValues(&LD, Lo, Hi);
return true;
}
bool InstExpander::visitStore(StoreInst &ST) {
Value *V = ST.getValueOperand();
IGC_ASSERT(nullptr != Emu);
if (!Emu->isInt64(V))
return false;
auto [Lo, Hi] = Emu->getExpandedValues(V);
Type *V2I32Ty = Emu->getV2Int32Ty();
Value *NewVal = UndefValue::get(V2I32Ty);
NewVal = IRB->CreateInsertElement(NewVal, Lo, IRB->getInt32(0));
NewVal = IRB->CreateInsertElement(NewVal, Hi, IRB->getInt32(1));
Value *OldPtr = ST.getPointerOperand();
PointerType *OldPtrTy = cast<PointerType>(OldPtr->getType());
PointerType *NewPtrTy = V2I32Ty->getPointerTo(OldPtrTy->getAddressSpace());
Value *NewPtr = IRB->CreatePointerCast(OldPtr, NewPtrTy);
StoreInst *NewST = IRB->CreateStore(NewVal, NewPtr);
Emu->dupMemoryAttribute(NewST, &ST, 0);
return true;
}
bool InstExpander::visitAtomicCmpXchg(AtomicCmpXchgInst &ACXI) {
Value *V = ACXI.getCompareOperand();
IGC_ASSERT(nullptr != V);
IGC_ASSERT(nullptr != Emu);
IGC_ASSERT_MESSAGE(false == Emu->isInt64(V), "TODO: NOT IMPLEMENTED YET!");
return false;
}
bool InstExpander::visitAtomicRMW(AtomicRMWInst &RMW) {
IGC_ASSERT(nullptr != Emu);
IGC_ASSERT_MESSAGE(false == Emu->isInt64(&RMW), "TODO: NOT IMPLEMENTED YET!");
return false;
}
bool InstExpander::visitTrunc(TruncInst &TI) {
Value *Src = TI.getOperand(0);
IGC_ASSERT(nullptr != Emu);
IGC_ASSERT(nullptr != Src);
if (!Emu->isInt64(Src))
return false;
Value *Lo = nullptr;
std::tie(Lo, std::ignore) = Emu->getExpandedValues(Src);
IGC_ASSERT(nullptr != Lo);
IGC_ASSERT(nullptr != Lo->getType());
IGC_ASSERT(nullptr != TI.getType());
IGC_ASSERT(Lo->getType()->getScalarSizeInBits() >= TI.getType()->getScalarSizeInBits());
if (Lo->getType()->getScalarSizeInBits() != TI.getType()->getScalarSizeInBits())
Lo = IRB->CreateTrunc(Lo, TI.getType());
TI.replaceAllUsesWith(Lo);
return true;
}
bool InstExpander::visitSExt(SExtInst &SEI) {
IGC_ASSERT(nullptr != Emu);
if (!Emu->isInt64(&SEI))
return false;
if (!needsLoHiSplitting(SEI))
return false;
Value *Src = SEI.getOperand(0);
Type *SrcTy = SEI.getSrcTy();
IGC_ASSERT(nullptr != SrcTy);
unsigned SrcWidth = SrcTy->getIntegerBitWidth();
IGC_ASSERT(SrcWidth <= 32);
Value *Lo = Src;
if (SrcWidth < 32)
Lo = IRB->CreateSExt(Lo, IRB->getInt32Ty());
Value *Hi = IRB->CreateAShr(Lo, IRB->getInt32(31));
Emu->setExpandedValues(&SEI, Lo, Hi);
return true;
}
bool InstExpander::visitZExt(ZExtInst &ZEI) {
IGC_ASSERT(nullptr != Emu);
if (!Emu->isInt64(&ZEI))
return false;
if (!needsLoHiSplitting(ZEI))
return false;
Value *Src = ZEI.getOperand(0);
Type *SrcTy = ZEI.getSrcTy();
IGC_ASSERT(nullptr != SrcTy);
unsigned SrcWidth = SrcTy->getIntegerBitWidth();
IGC_ASSERT(SrcWidth <= 32);
Value *Lo = Src;
if (SrcWidth < 32)
Lo = IRB->CreateZExt(Lo, IRB->getInt32Ty());
Value *Hi = IRB->getInt32(0);
Emu->setExpandedValues(&ZEI, Lo, Hi);
return true;
}
bool InstExpander::visitFPToUI(FPToUIInst &F2U) {
IGC_ASSERT(nullptr != Emu);
if (!Emu->isInt64(&F2U))
return false;
IGCLLVM::Intrinsic IID;
Value *Src = F2U.getOperand(0);
IGC_ASSERT(nullptr != Src);
Type *SrcTy = Src->getType();
IGC_ASSERT(nullptr != SrcTy);
if (SrcTy->isHalfTy()) {
// Convert half directly into 32-bit integer.
Value *Lo = IRB->CreateFPToUI(Src, IRB->getInt32Ty());
Value *Hi = Constant::getNullValue(IRB->getInt32Ty());
Emu->setExpandedValues(&F2U, Lo, Hi);
return true;
}
IID = Intrinsic::trunc;
Function *Trunc = Intrinsic::getDeclaration(Emu->getModule(), IID, SrcTy);
IID = Intrinsic::fma;
Function *Fma = Intrinsic::getDeclaration(Emu->getModule(), IID, SrcTy);
Value *FC0 = ConstantFP::get(SrcTy, ldexp(1., -32));
Value *FC1 = ConstantFP::get(SrcTy, ldexp(-1., 32));
Value *HiF = nullptr;
Value *Hi = nullptr;
if (SrcTy->isDoubleTy()) {
// HW doesn't support rounding on double but does support it on float,
// We have to build our own based on integer conversion.
Hi = IRB->CreateFPToUI(IRB->CreateFMul(Src, FC0), IRB->getInt32Ty());
HiF = IRB->CreateUIToFP(Hi, SrcTy);
} else {
HiF = IRB->CreateCall(Trunc, IRB->CreateFMul(Src, FC0));
Hi = IRB->CreateFPToUI(HiF, IRB->getInt32Ty());
}
Value *LoF = IRB->CreateCall3(Fma, HiF, FC1, Src);
Value *Lo = IRB->CreateFPToUI(LoF, IRB->getInt32Ty());
Emu->setExpandedValues(&F2U, Lo, Hi);
return true;
}
bool InstExpander::visitFPToSI(FPToSIInst &F2S) {
IGC_ASSERT(nullptr != Emu);
if (!Emu->isInt64(&F2S))
return false;
IGCLLVM::Intrinsic IID;
Value *Src = F2S.getOperand(0);
IGC_ASSERT(nullptr != Src);
Type *SrcTy = Src->getType();
IGC_ASSERT(nullptr != SrcTy);
if (SrcTy->isHalfTy()) {
// Convert half directly into 32-bit integer.
Value *Lo = IRB->CreateFPToSI(Src, IRB->getInt32Ty());
Value *Hi = IRB->CreateAShr(Lo, 31);
Emu->setExpandedValues(&F2S, Lo, Hi);
return true;
}
Value *Sign = nullptr;
if (SrcTy->isDoubleTy()) {
Sign = IRB->CreateBitCast(Src, Emu->getV2Int32Ty());
Sign = IRB->CreateExtractElement(Sign, IRB->getInt32(1));
} else {
IGC_ASSERT_MESSAGE(SrcTy->isFloatTy(), "Unknown float type!");
Sign = IRB->CreateBitCast(Src, IRB->getInt32Ty());
}
Sign = IRB->CreateAShr(Sign, 31);
IID = Intrinsic::fabs;
Function *FAbs = Intrinsic::getDeclaration(Emu->getModule(), IID, SrcTy);
IID = Intrinsic::trunc;
Function *Trunc = Intrinsic::getDeclaration(Emu->getModule(), IID, SrcTy);
IID = Intrinsic::fma;
Function *Fma = Intrinsic::getDeclaration(Emu->getModule(), IID, SrcTy);
Value *FC0 = ConstantFP::get(SrcTy, ldexp(1., -32));
Value *FC1 = ConstantFP::get(SrcTy, ldexp(-1., 32));
Src = IRB->CreateCall(FAbs, Src);
Value *HiF = nullptr;
Value *Hi = nullptr;
if (SrcTy->isDoubleTy()) {
// HW doesn't support rounding on double but does support it on float,
// We have to build our own based on integer conversion.
Hi = IRB->CreateFPToUI(IRB->CreateFMul(Src, FC0), IRB->getInt32Ty());
HiF = IRB->CreateUIToFP(Hi, SrcTy);
} else {
HiF = IRB->CreateCall(Trunc, IRB->CreateFMul(Src, FC0));
Hi = IRB->CreateFPToUI(HiF, IRB->getInt32Ty());
}
Value *LoF = IRB->CreateCall3(Fma, HiF, FC1, Src);
Value *Lo = IRB->CreateFPToUI(LoF, IRB->getInt32Ty());
Lo = IRB->CreateXor(Lo, Sign);
Hi = IRB->CreateXor(Hi, Sign);
GenISAIntrinsic::ID GIID = GenISAIntrinsic::GenISA_sub_pair;
Function *IFunc = GenISAIntrinsic::getDeclaration(Emu->getModule(), GIID);
Value *V = IRB->CreateCall4(IFunc, Lo, Hi, Sign, Sign);
Lo = IRB->CreateExtractValue(V, 0);
Hi = IRB->CreateExtractValue(V, 1);
Emu->setExpandedValues(&F2S, Lo, Hi);
return true;
}
Value *InstExpander::convertUIToFP32(Type *DstTy, Value *Lo, Value *Hi, Instruction *Pos) {
IGC_ASSERT(nullptr != Pos);
IGC_ASSERT(nullptr != IRB);
IGCLLVM::Intrinsic IID;
IID = Intrinsic::ctlz;
Function *Lzd = Intrinsic::getDeclaration(Emu->getModule(), IID, Lo->getType());
Value *ShAmt = IRB->CreateCall2(Lzd, Hi, IRB->getFalse());
// Check ShAmt == 32
Value *NE = IRB->CreateICmpNE(ShAmt, IRB->getInt32(32));
BasicBlock *OldBB = Pos->getParent();
IGC_ASSERT(nullptr != OldBB);
BasicBlock *JointBB = OldBB->splitBasicBlock(Pos);
// Set insert point to Pos in the new block JointBB
IRB->SetInsertPoint(Pos);
DebugLoc PosDebugLoc = Pos->getDebugLoc();
PHINode *Res = PHINode::Create(IRB->getInt32Ty(), 2, ".u2f.outer.merge", Pos);
Res->setDebugLoc(PosDebugLoc);
{
BuilderType::InsertPointGuard Guard(*IRB);
BasicBlock *TrueBB = BasicBlock::Create(*Emu->getContext(), ".u2f.outer.true.branch");
TrueBB->insertInto(Emu->getFunction(), JointBB);
Instruction *TrueJmp = BranchInst::Create(JointBB, TrueBB);
TrueJmp->setDebugLoc(PosDebugLoc);
OldBB->getTerminator()->eraseFromParent();
Instruction *TempOldBBBranchInst = BranchInst::Create(TrueBB, JointBB, NE, OldBB);
TempOldBBBranchInst->setDebugLoc(PosDebugLoc);
IRB->SetInsertPoint(&(*TrueBB->begin()));
// Check ShAmt == 0
NE = IRB->CreateICmpNE(ShAmt, Constant::getNullValue(ShAmt->getType()));
BasicBlock *InnerJBB = TrueBB->splitBasicBlock(TrueJmp);
PHINode *InnerResHi = PHINode::Create(IRB->getInt32Ty(), 2, ".u2f.inner.merge.hi", TrueJmp);
InnerResHi->setDebugLoc(PosDebugLoc);
PHINode *InnerResLo = PHINode::Create(IRB->getInt32Ty(), 2, ".u2f.inner.merge.lo", TrueJmp);
InnerResLo->setDebugLoc(PosDebugLoc);
BasicBlock *InnerTBB = BasicBlock::Create(*Emu->getContext(), ".u2f.inner.true.branch");
InnerTBB->insertInto(Emu->getFunction(), InnerJBB);
Instruction *TempInnerTBBBranchInst = BranchInst::Create(InnerJBB, InnerTBB);
TempInnerTBBBranchInst->setDebugLoc(PosDebugLoc);
TrueBB->getTerminator()->eraseFromParent();
Instruction *TempTrueBBBranchInst = BranchInst::Create(InnerTBB, InnerJBB, NE, TrueBB);
TempTrueBBBranchInst->setDebugLoc(PosDebugLoc);
IRB->SetInsertPoint(&(*InnerTBB->begin()));
// 0 < ShAmt < 32
Value *L = IRB->CreateShl(Lo, ShAmt);
Value *H = IRB->CreateShl(Hi, ShAmt);
Value *S = IRB->CreateSub(IRB->getInt32(32), ShAmt);
Value *T0 = IRB->CreateLShr(Lo, S);
H = IRB->CreateOr(H, T0);
InnerResHi->addIncoming(Hi, TrueBB);
InnerResLo->addIncoming(Lo, TrueBB);
InnerResHi->addIncoming(H, InnerTBB);
InnerResLo->addIncoming(L, InnerTBB);
Instruction *InnerJmp = InnerJBB->getTerminator();
IRB->SetInsertPoint(InnerJmp);
// Check InnerResLo != 0 to round correctly.
NE = IRB->CreateICmpNE(InnerResLo, Constant::getNullValue(InnerResLo->getType()));
BasicBlock *RoundingJBB = InnerJBB->splitBasicBlock(InnerJmp);
PHINode *RoundingRes = PHINode::Create(IRB->getInt32Ty(), 2, ".u2f.rounding.merge.hi", InnerJmp);
RoundingRes->setDebugLoc(PosDebugLoc);
BasicBlock *RoundingBB = BasicBlock::Create(*Emu->getContext(), ".u2f.rounding.branch");
RoundingBB->insertInto(Emu->getFunction(), RoundingJBB);
Instruction *TempRoundingBBBranchInst = BranchInst::Create(RoundingJBB, RoundingBB);
TempRoundingBBBranchInst->setDebugLoc(PosDebugLoc);
InnerJBB->getTerminator()->eraseFromParent();
Instruction *TempInnerJBBBranchInst = BranchInst::Create(RoundingBB, RoundingJBB, NE, InnerJBB);
TempInnerJBBBranchInst->setDebugLoc(PosDebugLoc);
// Rounding
IRB->SetInsertPoint(&(*RoundingBB->begin()));
Value *Rounded = IRB->CreateOr(InnerResHi, IRB->getInt32(1));
RoundingRes->addIncoming(InnerResHi, InnerJBB);
RoundingRes->addIncoming(Rounded, RoundingBB);
Res->addIncoming(Lo, OldBB);
Res->addIncoming(RoundingRes, RoundingJBB);
}
Value *NewVal = IRB->CreateUIToFP(Res, DstTy);
// Adjust
Value *Exp = IRB->CreateSub(IRB->getInt32(127 + 32), ShAmt);
Exp = IRB->CreateShl(Exp, IRB->getInt32(23));
Exp = IRB->CreateBitCast(Exp, NewVal->getType());
NewVal = IRB->CreateFMul(NewVal, Exp);
return NewVal;
}
bool InstExpander::visitUIToFP(UIToFPInst &U2F) {
Value *Src = U2F.getOperand(0);
IGC_ASSERT(nullptr != Emu);
if (!Emu->isInt64(Src))
return false;
auto [Lo, Hi] = Emu->getExpandedValues(Src);
Type *DstTy = U2F.getType();
Value *NewVal = nullptr;
if (ConstantInt *CI = dyn_cast<ConstantInt>(Hi)) {
if (CI->isNullValue()) {
NewVal = IRB->CreateUIToFP(Lo, DstTy);
U2F.replaceAllUsesWith(NewVal);
return true;
}
}
if (DstTy->isDoubleTy()) {
IGCLLVM::Intrinsic IID = Intrinsic::fma;
Function *Fma = Intrinsic::getDeclaration(Emu->getModule(), IID, DstTy);
Value *FC0 = ConstantFP::get(DstTy, ldexp(1., 32));
Value *LoF = IRB->CreateUIToFP(Lo, DstTy);
Value *HiF = IRB->CreateUIToFP(Hi, DstTy);
NewVal = IRB->CreateCall3(Fma, HiF, FC0, LoF);
} else {
NewVal = convertUIToFP32(IRB->getFloatTy(), Lo, Hi, &U2F);
// It's OK to apply the same approach in `convertUIToFP32` to convert 64-bit
// integer into half. But, it would introduce a little more instructions to
// properly round the remaining 48 bits into the high 16 bits. Instead, that
// 64-bit integer is firstly converted into `float` and then converted into
// `half`.
if (DstTy->isHalfTy())
NewVal = IRB->CreateFPTrunc(NewVal, DstTy);
}
U2F.replaceAllUsesWith(NewVal);
return true;
}
bool InstExpander::visitSIToFP(SIToFPInst &S2F) {
Value *Src = S2F.getOperand(0);
IGC_ASSERT(nullptr != Emu);
if (!Emu->isInt64(Src))
return false;
auto [Lo, Hi] = Emu->getExpandedValues(Src);
Type *DstTy = S2F.getType();
Value *NewVal = nullptr;
if (ConstantInt *CI = dyn_cast<ConstantInt>(Hi)) {
if (CI->isNullValue()) {
NewVal = IRB->CreateUIToFP(Lo, DstTy);
S2F.replaceAllUsesWith(NewVal);
return true;
}
if (CI->isMinusOne()) {
NewVal = IRB->CreateUIToFP(Lo, DstTy);
NewVal = IRB->CreateFNeg(NewVal);
S2F.replaceAllUsesWith(NewVal);
return true;
}
}
if (DstTy->isDoubleTy()) {
IGCLLVM::Intrinsic IID = Intrinsic::fma;
Function *Fma = Intrinsic::getDeclaration(Emu->getModule(), IID, DstTy);
Value *FC0 = ConstantFP::get(DstTy, ldexp(1., 32));
Value *LoF = IRB->CreateUIToFP(Lo, DstTy);
Value *HiF = IRB->CreateSIToFP(Hi, DstTy);
NewVal = IRB->CreateCall3(Fma, HiF, FC0, LoF);
} else {
GenISAIntrinsic::ID GIID = GenISAIntrinsic::GenISA_sub_pair;
Function *SubPair = GenISAIntrinsic::getDeclaration(Emu->getModule(), GIID);
Value *Sign = IRB->CreateAShr(Hi, 31);
// Get abs(x) = (x ^ s) - s;
Lo = IRB->CreateXor(Lo, Sign);
Hi = IRB->CreateXor(Hi, Sign);
Value *V = IRB->CreateCall4(SubPair, Lo, Hi, Sign, Sign);
Lo = IRB->CreateExtractValue(V, 0);
Hi = IRB->CreateExtractValue(V, 1);
NewVal = convertUIToFP32(IRB->getFloatTy(), Lo, Hi, &S2F);
NewVal = IRB->CreateBitCast(NewVal, IRB->getInt32Ty());
Sign = IRB->CreateAnd(Sign, IRB->getInt32(0x80000000));
NewVal = IRB->CreateOr(NewVal, Sign);
NewVal = IRB->CreateBitCast(NewVal, IRB->getFloatTy());
if (DstTy->isHalfTy())
NewVal = IRB->CreateFPTrunc(NewVal, DstTy);
}
S2F.replaceAllUsesWith(NewVal);
return true;
}
bool InstExpander::visitPtrToInt(PtrToIntInst &P2I) {
IGC_ASSERT(nullptr != Emu);
if (!Emu->isInt64(&P2I))
return false;
Value *Ptr = P2I.getOperand(0);
GenISAIntrinsic::ID GIID = GenISAIntrinsic::GenISA_ptr_to_pair;
Function *IFunc = GenISAIntrinsic::getDeclaration(Emu->getModule(), GIID, Ptr->getType());
Value *V = IRB->CreateCall(IFunc, Ptr);
Value *Lo = IRB->CreateExtractValue(V, 0);
Value *Hi = IRB->CreateExtractValue(V, 1);
Emu->setExpandedValues(&P2I, Lo, Hi);
return true;
}
bool InstExpander::visitIntToPtr(IntToPtrInst &I2P) {
Value *Src = I2P.getOperand(0);
IGC_ASSERT(nullptr != Emu);
if (!Emu->isInt64(Src))
return false;
auto [Lo, Hi] = Emu->getExpandedValues(Src);
GenISAIntrinsic::ID GIID = GenISAIntrinsic::GenISA_pair_to_ptr;
Function *IFunc = GenISAIntrinsic::getDeclaration(Emu->getModule(), GIID, I2P.getType());
Value *Ptr = IRB->CreateCall2(IFunc, Lo, Hi);
Ptr = IRB->CreateBitCast(Ptr, I2P.getType());
I2P.replaceAllUsesWith(Ptr);
return true;
}
bool InstExpander::visitBitCast(BitCastInst &BC) {
Value *Src = BC.getOperand(0);
IGC_ASSERT(nullptr != Emu);
if (!Emu->isInt64(&BC) && !Emu->isInt64(Src))
return false;
if (Emu->isInt64(&BC)) {
if (Emu->isInt64(Src)) {
// Somehow there are still 'bitcast's from i64 to i64. Directly
// re-use the already expanded values.
auto [Lo, Hi] = Emu->getExpandedValues(Src);
Emu->setExpandedValues(&BC, Lo, Hi);
return true;
}
Src = IRB->CreateBitCast(Src, Emu->getV2Int32Ty());
Value *Lo = IRB->CreateExtractElement(Src, IRB->getInt32(0));
Value *Hi = IRB->CreateExtractElement(Src, IRB->getInt32(1));
Emu->setExpandedValues(&BC, Lo, Hi);
return true;
}
IGC_ASSERT(Emu->isInt64(Src));
// Skip argument which is already prepared specially.
if (Emu->isArg64Cast(&BC))
return false;
auto [Lo, Hi] = Emu->getExpandedValues(Src);
Type *V2I32Ty = Emu->getV2Int32Ty();
Value *NewVal = UndefValue::get(V2I32Ty);
NewVal = IRB->CreateInsertElement(NewVal, Lo, IRB->getInt32(0));
NewVal = IRB->CreateInsertElement(NewVal, Hi, IRB->getInt32(1));
NewVal = IRB->CreateBitCast(NewVal, BC.getType());
BC.replaceAllUsesWith(NewVal);
return true;
}
bool InstExpander::visitICmp(ICmpInst &Cmp) {
IGC_ASSERT(nullptr != Emu);
if (!Emu->isInt64(Cmp.getOperand(0)))
return false;
auto Pred = Cmp.getPredicate();
auto [L0, H0] = Emu->getExpandedValues(Cmp.getOperand(0));
auto [L1, H1] = Emu->getExpandedValues(Cmp.getOperand(1));
Value *T0 = nullptr, *T1 = nullptr, *T2 = nullptr, *T3 = nullptr, *Res = nullptr;
switch (Pred) {
case CmpInst::ICMP_EQ:
T0 = IRB->CreateICmpEQ(L0, L1);
T1 = IRB->CreateICmpEQ(H0, H1);
Res = IRB->CreateAnd(T1, T0);
break;
case CmpInst::ICMP_NE:
T0 = IRB->CreateICmpNE(L0, L1), T1 = IRB->CreateICmpNE(H0, H1);
Res = IRB->CreateOr(T1, T0);
break;
case CmpInst::ICMP_UGT:
T0 = IRB->CreateICmpUGT(L0, L1);
T1 = IRB->CreateICmpEQ(H0, H1);
T2 = IRB->CreateAnd(T1, T0);
T3 = IRB->CreateICmpUGT(H0, H1);
Res = IRB->CreateOr(T2, T3);
break;
case CmpInst::ICMP_UGE:
T0 = IRB->CreateICmpUGE(L0, L1);
T1 = IRB->CreateICmpEQ(H0, H1);
T2 = IRB->CreateAnd(T1, T0);
T3 = IRB->CreateICmpUGT(H0, H1);
Res = IRB->CreateOr(T2, T3);
break;
case CmpInst::ICMP_ULT:
T0 = IRB->CreateICmpULT(L0, L1);
T1 = IRB->CreateICmpEQ(H0, H1);
T2 = IRB->CreateAnd(T1, T0);
T3 = IRB->CreateICmpULT(H0, H1);
Res = IRB->CreateOr(T2, T3);
break;
case CmpInst::ICMP_ULE:
T0 = IRB->CreateICmpULE(L0, L1);
T1 = IRB->CreateICmpEQ(H0, H1);
T2 = IRB->CreateAnd(T1, T0);
T3 = IRB->CreateICmpULT(H0, H1);
Res = IRB->CreateOr(T2, T3);
break;
case CmpInst::ICMP_SGT:
T0 = IRB->CreateICmpUGT(L0, L1);
T1 = IRB->CreateICmpEQ(H0, H1);
T2 = IRB->CreateAnd(T1, T0);
T3 = IRB->CreateICmpSGT(H0, H1);
Res = IRB->CreateOr(T2, T3);
break;
case CmpInst::ICMP_SGE:
T0 = IRB->CreateICmpUGE(L0, L1);
T1 = IRB->CreateICmpEQ(H0, H1);
T2 = IRB->CreateAnd(T1, T0);
T3 = IRB->CreateICmpSGT(H0, H1);
Res = IRB->CreateOr(T2, T3);
break;
case CmpInst::ICMP_SLT:
T0 = IRB->CreateICmpULT(L0, L1);
T1 = IRB->CreateICmpEQ(H0, H1);
T2 = IRB->CreateAnd(T1, T0);
T3 = IRB->CreateICmpSLT(H0, H1);
Res = IRB->CreateOr(T2, T3);
break;
case CmpInst::ICMP_SLE:
T0 = IRB->CreateICmpULE(L0, L1);
T1 = IRB->CreateICmpEQ(H0, H1);
T2 = IRB->CreateAnd(T1, T0);
T3 = IRB->CreateICmpSLT(H0, H1);
Res = IRB->CreateOr(T2, T3);
break;
default:
IGC_ASSERT_EXIT_MESSAGE(0, "Invalid ICmp predicate");
break;
}
IGC_ASSERT(nullptr != Res);
Cmp.replaceAllUsesWith(Res);
return true;
}
bool InstExpander::visitPHI(PHINode &PN) {
IGC_ASSERT(nullptr != Emu);
if (!Emu->isInt64(&PN))
return false;
auto [Lo, Hi] = Emu->getExpandedValues(&PN);
IGC_ASSERT(nullptr != Lo);
IGC_ASSERT(nullptr != Hi);
return false;
}
bool InstExpander::visitCall(CallInst &Call) {
// lambdas for splitting and combining i64 to <2 x i32>
auto Combine2xi32Toi64 = [this](Value *val) {
IGC_ASSERT(nullptr != Emu);
IGC_ASSERT(Emu->isInt64(val));
auto [InputLo, InputHi] = Emu->getExpandedValues(val);
Type *V2I32Ty = Emu->getV2Int32Ty();
Value *NewVal = UndefValue::get(V2I32Ty);
NewVal = IRB->CreateInsertElement(NewVal, InputLo, IRB->getInt32(0));
NewVal = IRB->CreateInsertElement(NewVal, InputHi, IRB->getInt32(1));
NewVal = IRB->CreateBitCast(NewVal, IRB->getInt64Ty());
return NewVal;
};
auto Spliti64To2xi32 = [this](Value *retVal) {
IGC_ASSERT(nullptr != Emu);
IGC_ASSERT(Emu->isInt64(retVal));
Value *V = IRB->CreateBitCast(retVal, Emu->getV2Int32Ty());
auto Lo = IRB->CreateExtractElement(V, IRB->getInt32(0));
auto Hi = IRB->CreateExtractElement(V, IRB->getInt32(1));
return std::make_pair(Lo, Hi);
};
IGC_ASSERT(nullptr != Emu);
const Function *F = Call.getCalledFunction();
bool doInt64BitCall = Emu->isInt64(&Call);
if (!doInt64BitCall) {
for (auto &Op : Call.operands()) {
if (Emu->isInt64(Op.get())) {
doInt64BitCall = true;
break;
}
}
}
if (!doInt64BitCall) {
return false;
}
if (F && F->isDeclaration()) {
Intrinsic::ID IntrID = F->getIntrinsicID();
switch (IntrID) {
default:
break;
// Ignore the following intrinsics in CG.
case Intrinsic::assume:
case Intrinsic::expect:
case Intrinsic::dbg_declare:
case Intrinsic::dbg_value:
case Intrinsic::var_annotation:
case Intrinsic::ptr_annotation:
case Intrinsic::annotation:
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
case Intrinsic::invariant_start:
case Intrinsic::invariant_end:
return false;
// emulate @llvm.abs.i64
case Intrinsic::abs: {
Value *OldVal = Call.getArgOperand(0);
auto [Lo, Hi] = Emu->getExpandedValues(OldVal);
Value *Cmp = IRB->CreateICmpSLT(Hi, IRB->getInt32(0));
GenISAIntrinsic::ID GIID = GenISAIntrinsic::GenISA_sub_pair;
Function *IFunc = GenISAIntrinsic::getDeclaration(Emu->getModule(), GIID);
Value *Sub = IRB->CreateCall4(IFunc, IRB->getInt32(0), IRB->getInt32(0), Lo, Hi);
Value *SubLo = IRB->CreateExtractValue(Sub, 0);
Value *SubHi = IRB->CreateExtractValue(Sub, 1);
Value *SelectLo = IRB->CreateSelect(Cmp, SubLo, Lo);
Value *SelectHo = IRB->CreateSelect(Cmp, SubHi, Hi);
Emu->setExpandedValues(&Call, SelectLo, SelectHo);
return true;
}
}
}
// Recreate Call with its operands/result emulated
// TODO: Investigate whether we should replace the call with two
// i32-operating calls for Lo and Hi instead (at least for certain
// intrinsics)
auto *CallCopy = Call.clone();
IGC_ASSERT(nullptr != CallCopy);
CallCopy->insertBefore(&Call);
IRB->SetInsertPoint(CallCopy);
for (int argNo = 0, sz = (int)IGCLLVM::getNumArgOperands(&Call); argNo < sz; ++argNo) {
Value *OldVal = Call.getArgOperand(argNo);
if (Emu->isInt64(OldVal)) {
Value *NewVal = nullptr;
// If this operand has been combined (same value is used more than
// once. for example, mul %12 %12), do not recombine them.
for (int i = 0; i < argNo; ++i) {
if (Call.getArgOperand(i) == OldVal) {
NewVal = CallCopy->getOperand(i);
break;
}
}
if (NewVal == nullptr) {
NewVal = Combine2xi32Toi64(OldVal);
}
CallCopy->setOperand(argNo, NewVal);
}
}
// For int64 return value, split it right after CallCopy
if (Emu->isInt64(&Call)) {
IRB->SetInsertPoint(&Call);
auto [OutputLo, OutputHi] = Spliti64To2xi32(CallCopy);
Emu->setExpandedValues(CallCopy, OutputLo, OutputHi);
}
Call.replaceAllUsesWith(CallCopy);
return true;
}
bool InstExpander::visitSelect(SelectInst &SI) {
IGC_ASSERT(nullptr != Emu);
if (!Emu->isInt64(&SI))
return false;
Value *Cond = SI.getOperand(0);
auto [L0, H0] = Emu->getExpandedValues(SI.getOperand(1));
auto [L1, H1] = Emu->getExpandedValues(SI.getOperand(2));
IGC_ASSERT(nullptr != IRB);
Value *Lo = IRB->CreateSelect(Cond, L0, L1);
Value *Hi = IRB->CreateSelect(Cond, H0, H1);
Emu->setExpandedValues(&SI, Lo, Hi);
return true;
}
bool InstExpander::visitVAArg(VAArgInst &VAAI) {
// TODO: Add i64 emulation support.
IGC_ASSERT(nullptr != Emu);
IGC_ASSERT_MESSAGE(false == Emu->isInt64(&VAAI), "TODO: NOT IMPLEMENTED YET!");
return false;
}
bool InstExpander::visitExtractElement(ExtractElementInst &EEI) {
// Fix index operand if necessary.
IGC_ASSERT(nullptr != Emu);
if (Emu->isInt64(EEI.getIndexOperand())) {
Value *L = nullptr;
std::tie(L, std::ignore) = Emu->getExpandedValues(EEI.getIndexOperand());
EEI.setOperand(1, L);
}
// Skip if it's not 64-bit integer.
if (!Emu->isInt64(&EEI))
return false;
// NOTE: This is NOT the efficient way to handle that and needs revising
// later.
Value *V = EEI.getVectorOperand();
unsigned NumElts = (unsigned)cast<IGCLLVM::FixedVectorType>(V->getType())->getNumElements();
V = IRB->CreateBitCast(V, Emu->getV2Int32Ty(NumElts));
// Re-calculate indices to Lo and Hi parts.
Value *Idx = EEI.getIndexOperand();
Idx = IRB->CreateZExt(Idx, IRB->getInt32Ty());
Idx = IRB->CreateMul(Idx, IRB->getInt32(2));
Value *IdxLo = IRB->CreateAdd(Idx, IRB->getInt32(0));
Value *IdxHi = IRB->CreateAdd(Idx, IRB->getInt32(1));
Value *Lo = IRB->CreateExtractElement(V, IdxLo);
Value *Hi = IRB->CreateExtractElement(V, IdxHi);
Emu->setExpandedValues(&EEI, Lo, Hi);
return true;
}
bool InstExpander::visitInsertElement(InsertElementInst &IEI) {
// Fix index operand if necessary.
IGC_ASSERT(nullptr != Emu);
if (Emu->isInt64(IEI.getOperand(2))) {
Value *L = nullptr;
std::tie(L, std::ignore) = Emu->getExpandedValues(IEI.getOperand(2));
IEI.setOperand(2, L);
}
// Skip if it's not 64-bit integer.
Value *V = IEI.getOperand(1);
if (!Emu->isInt64(V))
return false;
// NOTE: This is NOT the efficient way to handle that and needs revising
// later.
auto [Lo, Hi] = Emu->getExpandedValues(V);
// Create the emulated vector.
Value *NewVal = IEI.getOperand(0);
unsigned NumElts = (unsigned)cast<IGCLLVM::FixedVectorType>(NewVal->getType())->getNumElements();
NewVal = IRB->CreateBitCast(NewVal, Emu->getV2Int32Ty(NumElts));
// Re-calculate indices to Lo and Hi parts.
Value *Idx = IEI.getOperand(2);
Idx = IRB->CreateMul(Idx, IRB->getInt32(2));
Value *IdxLo = IRB->CreateAdd(Idx, IRB->getInt32(0));
Value *IdxHi = IRB->CreateAdd(Idx, IRB->getInt32(1));
// Insert Lo and Hi parts into the emulated vector.
NewVal = IRB->CreateInsertElement(NewVal, Lo, IdxLo);
NewVal = IRB->CreateInsertElement(NewVal, Hi, IdxHi);
NewVal = IRB->CreateBitCast(NewVal, IEI.getType());
IEI.replaceAllUsesWith(NewVal);
return true;
}
bool InstExpander::visitExtractValue(ExtractValueInst &EVI) {
IGC_ASSERT(nullptr != Emu);
if (!Emu->isInt64(&EVI))
return false;
IGC_ASSERT(EVI.getNumIndices() == 1);
auto *EVICopy = EVI.clone();
EVICopy->insertBefore(&EVI);
IRB->SetInsertPoint(&EVI);
Value *OutputLo = nullptr, *OutputHi = nullptr;
Value *V = IRB->CreateBitCast(EVICopy, Emu->getV2Int32Ty());
OutputLo = IRB->CreateExtractElement(V, IRB->getInt32(0));
OutputHi = IRB->CreateExtractElement(V, IRB->getInt32(1));
Emu->setExpandedValues(EVICopy, OutputLo, OutputHi);
EVI.replaceAllUsesWith(EVICopy);
return true;
}
bool InstExpander::visitInsertValue(InsertValueInst &IVI) {
IGC_ASSERT(nullptr != Emu);
if (!Emu->isInt64(IVI.getOperand(1)))
return false;
IGC_ASSERT(IVI.getNumIndices() == 1);
auto *IVICopy = IVI.clone();
IVICopy->insertBefore(&IVI);
IRB->SetInsertPoint(IVICopy);
Value *In64 = IVI.getOperand(1);
auto [InputLo, InputHi] = Emu->getExpandedValues(In64);
Type *V2I32Ty = Emu->getV2Int32Ty();
Value *NewVal = UndefValue::get(V2I32Ty);
NewVal = IRB->CreateInsertElement(NewVal, InputLo, IRB->getInt32(0));
NewVal = IRB->CreateInsertElement(NewVal, InputHi, IRB->getInt32(1));
NewVal = IRB->CreateBitCast(NewVal, IRB->getInt64Ty());
IVICopy->setOperand(1, NewVal);
IVI.replaceAllUsesWith(IVICopy);
return true;
}
bool InstExpander::visitLandingPad(LandingPadInst &LPI) {
// TODO: Add i64 emulation support.
IGC_ASSERT(nullptr != Emu);
IGC_ASSERT(1 < LPI.getNumOperands());
IGC_ASSERT_MESSAGE(false == Emu->isInt64(LPI.getOperand(1)), "TODO: NOT IMPLEMENTED YET!");
return false;
}
bool InstExpander::hasHWSupport(BinaryOperator &BinOp) {
if (Emu->CGC->platform.hasPartialInt64Support()) {
if (Emu->eraseInstrFromInt64InstrList(&BinOp)) {
// No need to reconstruct the 64b operands
// the former instruction SExt or ZExt has not been changed.
// Just do not delete an original instruction
// and split the result into Lo and Hi part.
} else {
// reconstruct the 64b operands
for (uint operandId = 0; operandId < 2; operandId++) {
auto [Lo, Hi] = Emu->getExpandedValues(BinOp.getOperand(operandId));
Type *V2I32Ty = Emu->getV2Int32Ty();
Value *reconstrut64bOperands = UndefValue::get(V2I32Ty);
reconstrut64bOperands = IRB->CreateInsertElement(reconstrut64bOperands, Lo, IRB->getInt32(0));
reconstrut64bOperands = IRB->CreateInsertElement(reconstrut64bOperands, Hi, IRB->getInt32(1));
reconstrut64bOperands = IRB->CreateBitCast(reconstrut64bOperands, IRB->getInt64Ty());
BinOp.setOperand(operandId, reconstrut64bOperands);
}
}
// Leave, do not delete an original instruction
BasicBlock::iterator BI = std::next(BasicBlock::iterator(&BinOp));
IRB->SetInsertPoint(&*BI);
// Split the result into Lo and Hi part.
Value *V = IRB->CreateBitCast(&BinOp, Emu->getV2Int32Ty());
Value *outputLo = IRB->CreateExtractElement(V, IRB->getInt32(0));
Value *outputHi = IRB->CreateExtractElement(V, IRB->getInt32(1));
Emu->setExpandedValues(&BinOp, outputLo, outputHi);
Emu->doNotDeleteInstr = true;
return true;
}
return false;
}
bool InstExpander::needsLoHiSplitting(Instruction &inst) {
bool splittingRequired = true;
if (Emu->CGC->platform.hasPartialInt64Support()) {
bool doNextStep = true;
// Do not split into Lo and Hi parts if the result of CastInst:
// (A) is used only by the opcode with HW native Int64 support, AND
// eg. %9 = shl nsw i64 %conv.i1, 1 ; % 5 = add i64 % 4, % conv.i1
// (B) is used by the binary instr w/ HW Int64 support and all operands come from SExt,ZExt or
// one is a constant value
// eg. %1 = shl nsw i64 %conv.i1, 3 ; %shr.i.i = lshr i64 % cond.i9.i, % shr.mask.i.i
for (auto *U : inst.users()) {
if (Instruction *_instr = dyn_cast<Instruction>(U)) {
if (!Emu->hasOpcodeInt64Support(_instr->getOpcode())) // (A)
{
doNextStep = false;
break;
}
for (auto &Op : _instr->operands()) {
if (!isa<ConstantInt>(Op.get()) && !isa<SExtInst>(Op.get()) && !isa<ZExtInst>(Op.get())) // (B)
{
doNextStep = false;
break;
}
}
}
}
if (doNextStep) {
for (auto *U : inst.users()) {
if (Instruction *binaryInstr = dyn_cast<BinaryOperator>(U)) {
Emu->addInstToInt64InstrList(binaryInstr);
splittingRequired = false;
}
}
}
}
return splittingRequired;
}