mirror of
https://github.com/intel/llvm.git
synced 2026-02-04 03:26:06 +08:00
Also take the chance and rename access functions to access relations. This is because we do not only allow plain functions to describe an access, but we can have any access relation that can be described with linear constraints. llvm-svn: 141257
1603 lines
56 KiB
C++
1603 lines
56 KiB
C++
//===------ CodeGeneration.cpp - Code generate the Scops. -----------------===//
|
|
//
|
|
// The LLVM Compiler Infrastructure
|
|
//
|
|
// This file is distributed under the University of Illinois Open Source
|
|
// License. See LICENSE.TXT for details.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// The CodeGeneration pass takes a Scop created by ScopInfo and translates it
|
|
// back to LLVM-IR using Cloog.
|
|
//
|
|
// The Scop describes the high level memory behaviour of a control flow region.
|
|
// Transformation passes can update the schedule (execution order) of statements
|
|
// in the Scop. Cloog is used to generate an abstract syntax tree (clast) that
|
|
// reflects the updated execution order. This clast is used to create new
|
|
// LLVM-IR that is computational equivalent to the original control flow region,
|
|
// but executes its code in the new execution order defined by the changed
|
|
// scattering.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#define DEBUG_TYPE "polly-codegen"
|
|
|
|
#include "polly/LinkAllPasses.h"
|
|
#include "polly/Support/GICHelper.h"
|
|
#include "polly/Support/ScopHelper.h"
|
|
#include "polly/Cloog.h"
|
|
#include "polly/Dependences.h"
|
|
#include "polly/ScopInfo.h"
|
|
#include "polly/TempScopInfo.h"
|
|
#include "llvm/Support/CommandLine.h"
|
|
#include "llvm/Support/Debug.h"
|
|
#include "llvm/Support/IRBuilder.h"
|
|
#include "llvm/Analysis/LoopInfo.h"
|
|
#include "llvm/Analysis/ScalarEvolutionExpander.h"
|
|
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
|
|
#include "llvm/Target/TargetData.h"
|
|
#include "llvm/Module.h"
|
|
#include "llvm/ADT/SetVector.h"
|
|
|
|
#define CLOOG_INT_GMP 1
|
|
#include "cloog/cloog.h"
|
|
#include "cloog/isl/cloog.h"
|
|
|
|
#include <vector>
|
|
#include <utility>
|
|
|
|
using namespace polly;
|
|
using namespace llvm;
|
|
|
|
struct isl_set;
|
|
|
|
namespace polly {
|
|
|
|
static cl::opt<bool>
|
|
Vector("enable-polly-vector",
|
|
cl::desc("Enable polly vector code generation"), cl::Hidden,
|
|
cl::value_desc("Vector code generation enabled if true"),
|
|
cl::init(false));
|
|
|
|
static cl::opt<bool>
|
|
OpenMP("enable-polly-openmp",
|
|
cl::desc("Generate OpenMP parallel code"), cl::Hidden,
|
|
cl::value_desc("OpenMP code generation enabled if true"),
|
|
cl::init(false));
|
|
|
|
static cl::opt<bool>
|
|
AtLeastOnce("enable-polly-atLeastOnce",
|
|
cl::desc("Give polly the hint, that every loop is executed at least"
|
|
"once"), cl::Hidden,
|
|
cl::value_desc("OpenMP code generation enabled if true"),
|
|
cl::init(false));
|
|
|
|
static cl::opt<bool>
|
|
Aligned("enable-polly-aligned",
|
|
cl::desc("Assumed aligned memory accesses."), cl::Hidden,
|
|
cl::value_desc("OpenMP code generation enabled if true"),
|
|
cl::init(false));
|
|
|
|
typedef DenseMap<const Value*, Value*> ValueMapT;
|
|
typedef DenseMap<const char*, Value*> CharMapT;
|
|
typedef std::vector<ValueMapT> VectorValueMapT;
|
|
|
|
// Create a new loop.
|
|
//
|
|
// @param Builder The builder used to create the loop. It also defines the
|
|
// place where to create the loop.
|
|
// @param UB The upper bound of the loop iv.
|
|
// @param Stride The number by which the loop iv is incremented after every
|
|
// iteration.
|
|
static void createLoop(IRBuilder<> *Builder, Value *LB, Value *UB, APInt Stride,
|
|
PHINode*& IV, BasicBlock*& AfterBB, Value*& IncrementedIV,
|
|
DominatorTree *DT) {
|
|
Function *F = Builder->GetInsertBlock()->getParent();
|
|
LLVMContext &Context = F->getContext();
|
|
|
|
BasicBlock *PreheaderBB = Builder->GetInsertBlock();
|
|
BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.loop_header", F);
|
|
BasicBlock *BodyBB = BasicBlock::Create(Context, "polly.loop_body", F);
|
|
AfterBB = BasicBlock::Create(Context, "polly.after_loop", F);
|
|
|
|
Builder->CreateBr(HeaderBB);
|
|
DT->addNewBlock(HeaderBB, PreheaderBB);
|
|
|
|
Builder->SetInsertPoint(BodyBB);
|
|
|
|
Builder->SetInsertPoint(HeaderBB);
|
|
|
|
// Use the type of upper and lower bound.
|
|
assert(LB->getType() == UB->getType()
|
|
&& "Different types for upper and lower bound.");
|
|
|
|
IntegerType *LoopIVType = dyn_cast<IntegerType>(UB->getType());
|
|
assert(LoopIVType && "UB is not integer?");
|
|
|
|
// IV
|
|
IV = Builder->CreatePHI(LoopIVType, 2, "polly.loopiv");
|
|
IV->addIncoming(LB, PreheaderBB);
|
|
|
|
// IV increment.
|
|
Value *StrideValue = ConstantInt::get(LoopIVType,
|
|
Stride.zext(LoopIVType->getBitWidth()));
|
|
IncrementedIV = Builder->CreateAdd(IV, StrideValue, "polly.next_loopiv");
|
|
|
|
// Exit condition.
|
|
if (AtLeastOnce) { // At least on iteration.
|
|
UB = Builder->CreateAdd(UB, Builder->getInt64(1));
|
|
Value *CMP = Builder->CreateICmpEQ(IV, UB);
|
|
Builder->CreateCondBr(CMP, AfterBB, BodyBB);
|
|
} else { // Maybe not executed at all.
|
|
Value *CMP = Builder->CreateICmpSLE(IV, UB);
|
|
Builder->CreateCondBr(CMP, BodyBB, AfterBB);
|
|
}
|
|
DT->addNewBlock(BodyBB, HeaderBB);
|
|
DT->addNewBlock(AfterBB, HeaderBB);
|
|
|
|
Builder->SetInsertPoint(BodyBB);
|
|
}
|
|
|
|
class BlockGenerator {
|
|
IRBuilder<> &Builder;
|
|
ValueMapT &VMap;
|
|
VectorValueMapT &ValueMaps;
|
|
Scop &S;
|
|
ScopStmt &statement;
|
|
isl_set *scatteringDomain;
|
|
|
|
public:
|
|
BlockGenerator(IRBuilder<> &B, ValueMapT &vmap, VectorValueMapT &vmaps,
|
|
ScopStmt &Stmt, isl_set *domain)
|
|
: Builder(B), VMap(vmap), ValueMaps(vmaps), S(*Stmt.getParent()),
|
|
statement(Stmt), scatteringDomain(domain) {}
|
|
|
|
const Region &getRegion() {
|
|
return S.getRegion();
|
|
}
|
|
|
|
Value* makeVectorOperand(Value *operand, int vectorWidth) {
|
|
if (operand->getType()->isVectorTy())
|
|
return operand;
|
|
|
|
VectorType *vectorType = VectorType::get(operand->getType(), vectorWidth);
|
|
Value *vector = UndefValue::get(vectorType);
|
|
vector = Builder.CreateInsertElement(vector, operand, Builder.getInt32(0));
|
|
|
|
std::vector<Constant*> splat;
|
|
|
|
for (int i = 0; i < vectorWidth; i++)
|
|
splat.push_back (Builder.getInt32(0));
|
|
|
|
Constant *splatVector = ConstantVector::get(splat);
|
|
|
|
return Builder.CreateShuffleVector(vector, vector, splatVector);
|
|
}
|
|
|
|
Value* getOperand(const Value *oldOperand, ValueMapT &BBMap,
|
|
ValueMapT *VectorMap = 0) {
|
|
const Instruction *OpInst = dyn_cast<Instruction>(oldOperand);
|
|
|
|
if (!OpInst)
|
|
return const_cast<Value*>(oldOperand);
|
|
|
|
if (VectorMap && VectorMap->count(oldOperand))
|
|
return (*VectorMap)[oldOperand];
|
|
|
|
// IVS and Parameters.
|
|
if (VMap.count(oldOperand)) {
|
|
Value *NewOperand = VMap[oldOperand];
|
|
|
|
// Insert a cast if types are different
|
|
if (oldOperand->getType()->getScalarSizeInBits()
|
|
< NewOperand->getType()->getScalarSizeInBits())
|
|
NewOperand = Builder.CreateTruncOrBitCast(NewOperand,
|
|
oldOperand->getType());
|
|
|
|
return NewOperand;
|
|
}
|
|
|
|
// Instructions calculated in the current BB.
|
|
if (BBMap.count(oldOperand)) {
|
|
return BBMap[oldOperand];
|
|
}
|
|
|
|
// Ignore instructions that are referencing ops in the old BB. These
|
|
// instructions are unused. They where replace by new ones during
|
|
// createIndependentBlocks().
|
|
if (getRegion().contains(OpInst->getParent()))
|
|
return NULL;
|
|
|
|
return const_cast<Value*>(oldOperand);
|
|
}
|
|
|
|
Type *getVectorPtrTy(const Value *V, int vectorWidth) {
|
|
PointerType *pointerType = dyn_cast<PointerType>(V->getType());
|
|
assert(pointerType && "PointerType expected");
|
|
|
|
Type *scalarType = pointerType->getElementType();
|
|
VectorType *vectorType = VectorType::get(scalarType, vectorWidth);
|
|
|
|
return PointerType::getUnqual(vectorType);
|
|
}
|
|
|
|
/// @brief Load a vector from a set of adjacent scalars
|
|
///
|
|
/// In case a set of scalars is known to be next to each other in memory,
|
|
/// create a vector load that loads those scalars
|
|
///
|
|
/// %vector_ptr= bitcast double* %p to <4 x double>*
|
|
/// %vec_full = load <4 x double>* %vector_ptr
|
|
///
|
|
Value *generateStrideOneLoad(const LoadInst *load, ValueMapT &BBMap,
|
|
int size) {
|
|
const Value *pointer = load->getPointerOperand();
|
|
Type *vectorPtrType = getVectorPtrTy(pointer, size);
|
|
Value *newPointer = getOperand(pointer, BBMap);
|
|
Value *VectorPtr = Builder.CreateBitCast(newPointer, vectorPtrType,
|
|
"vector_ptr");
|
|
LoadInst *VecLoad = Builder.CreateLoad(VectorPtr,
|
|
load->getNameStr()
|
|
+ "_p_vec_full");
|
|
if (!Aligned)
|
|
VecLoad->setAlignment(8);
|
|
|
|
return VecLoad;
|
|
}
|
|
|
|
/// @brief Load a vector initialized from a single scalar in memory
|
|
///
|
|
/// In case all elements of a vector are initialized to the same
|
|
/// scalar value, this value is loaded and shuffeled into all elements
|
|
/// of the vector.
|
|
///
|
|
/// %splat_one = load <1 x double>* %p
|
|
/// %splat = shufflevector <1 x double> %splat_one, <1 x
|
|
/// double> %splat_one, <4 x i32> zeroinitializer
|
|
///
|
|
Value *generateStrideZeroLoad(const LoadInst *load, ValueMapT &BBMap,
|
|
int size) {
|
|
const Value *pointer = load->getPointerOperand();
|
|
Type *vectorPtrType = getVectorPtrTy(pointer, 1);
|
|
Value *newPointer = getOperand(pointer, BBMap);
|
|
Value *vectorPtr = Builder.CreateBitCast(newPointer, vectorPtrType,
|
|
load->getNameStr() + "_p_vec_p");
|
|
LoadInst *scalarLoad= Builder.CreateLoad(vectorPtr,
|
|
load->getNameStr() + "_p_splat_one");
|
|
|
|
if (!Aligned)
|
|
scalarLoad->setAlignment(8);
|
|
|
|
std::vector<Constant*> splat;
|
|
|
|
for (int i = 0; i < size; i++)
|
|
splat.push_back (Builder.getInt32(0));
|
|
|
|
Constant *splatVector = ConstantVector::get(splat);
|
|
|
|
Value *vectorLoad = Builder.CreateShuffleVector(scalarLoad, scalarLoad,
|
|
splatVector,
|
|
load->getNameStr()
|
|
+ "_p_splat");
|
|
return vectorLoad;
|
|
}
|
|
|
|
/// @Load a vector from scalars distributed in memory
|
|
///
|
|
/// In case some scalars a distributed randomly in memory. Create a vector
|
|
/// by loading each scalar and by inserting one after the other into the
|
|
/// vector.
|
|
///
|
|
/// %scalar_1= load double* %p_1
|
|
/// %vec_1 = insertelement <2 x double> undef, double %scalar_1, i32 0
|
|
/// %scalar 2 = load double* %p_2
|
|
/// %vec_2 = insertelement <2 x double> %vec_1, double %scalar_1, i32 1
|
|
///
|
|
Value *generateUnknownStrideLoad(const LoadInst *load,
|
|
VectorValueMapT &scalarMaps,
|
|
int size) {
|
|
const Value *pointer = load->getPointerOperand();
|
|
VectorType *vectorType = VectorType::get(
|
|
dyn_cast<PointerType>(pointer->getType())->getElementType(), size);
|
|
|
|
Value *vector = UndefValue::get(vectorType);
|
|
|
|
for (int i = 0; i < size; i++) {
|
|
Value *newPointer = getOperand(pointer, scalarMaps[i]);
|
|
Value *scalarLoad = Builder.CreateLoad(newPointer,
|
|
load->getNameStr() + "_p_scalar_");
|
|
vector = Builder.CreateInsertElement(vector, scalarLoad,
|
|
Builder.getInt32(i),
|
|
load->getNameStr() + "_p_vec_");
|
|
}
|
|
|
|
return vector;
|
|
}
|
|
|
|
/// @brief Get the memory access offset to be added to the base address
|
|
std::vector <Value*> getMemoryAccessIndex(isl_map *accessRelation,
|
|
Value *baseAddr) {
|
|
isl_int offsetMPZ;
|
|
isl_int_init(offsetMPZ);
|
|
|
|
assert((isl_map_dim(accessRelation, isl_dim_out) == 1)
|
|
&& "Only single dimensional access functions supported");
|
|
|
|
if (isl_map_plain_is_fixed(accessRelation, isl_dim_out,
|
|
0, &offsetMPZ) == -1)
|
|
errs() << "Only fixed value access functions supported\n";
|
|
|
|
// Convert the offset from MPZ to Value*.
|
|
APInt offset = APInt_from_MPZ(offsetMPZ);
|
|
Value *offsetValue = ConstantInt::get(Builder.getContext(), offset);
|
|
PointerType *baseAddrType = dyn_cast<PointerType>(baseAddr->getType());
|
|
Type *arrayType = baseAddrType->getElementType();
|
|
Type *arrayElementType = dyn_cast<ArrayType>(arrayType)->getElementType();
|
|
offsetValue = Builder.CreateSExtOrBitCast(offsetValue, arrayElementType);
|
|
|
|
std::vector<Value*> indexArray;
|
|
Value *nullValue = Constant::getNullValue(arrayElementType);
|
|
indexArray.push_back(nullValue);
|
|
indexArray.push_back(offsetValue);
|
|
|
|
isl_int_clear(offsetMPZ);
|
|
return indexArray;
|
|
}
|
|
|
|
/// @brief Get the new operand address according to the changed access in
|
|
/// JSCOP file.
|
|
Value *getNewAccessOperand(isl_map *newAccessRelation, Value *baseAddr,
|
|
const Value *oldOperand, ValueMapT &BBMap) {
|
|
std::vector<Value*> indexArray = getMemoryAccessIndex(newAccessRelation,
|
|
baseAddr);
|
|
Value *newOperand = Builder.CreateGEP(baseAddr, indexArray,
|
|
"p_newarrayidx_");
|
|
return newOperand;
|
|
}
|
|
|
|
/// @brief Generate the operand address
|
|
Value *generateLocationAccessed(const Instruction *Inst,
|
|
const Value *pointer, ValueMapT &BBMap ) {
|
|
MemoryAccess &Access = statement.getAccessFor(Inst);
|
|
isl_map *CurrentAccessRelation = Access.getAccessRelation();
|
|
isl_map *NewAccessRelation = Access.getNewAccessRelation();
|
|
|
|
assert(isl_map_has_equal_space(CurrentAccessRelation, NewAccessRelation)
|
|
&& "Current and new access function use different spaces");
|
|
|
|
Value *NewPointer;
|
|
|
|
if (!NewAccessRelation) {
|
|
NewPointer = getOperand(pointer, BBMap);
|
|
} else {
|
|
Value *BaseAddr = const_cast<Value*>(Access.getBaseAddr());
|
|
NewPointer = getNewAccessOperand(NewAccessRelation, BaseAddr, pointer,
|
|
BBMap);
|
|
}
|
|
|
|
isl_map_free(CurrentAccessRelation);
|
|
isl_map_free(NewAccessRelation);
|
|
return NewPointer;
|
|
}
|
|
|
|
Value *generateScalarLoad(const LoadInst *load, ValueMapT &BBMap) {
|
|
const Value *pointer = load->getPointerOperand();
|
|
const Instruction *Inst = dyn_cast<Instruction>(load);
|
|
Value *newPointer = generateLocationAccessed(Inst, pointer, BBMap);
|
|
Value *scalarLoad = Builder.CreateLoad(newPointer,
|
|
load->getNameStr() + "_p_scalar_");
|
|
return scalarLoad;
|
|
}
|
|
|
|
/// @brief Load a value (or several values as a vector) from memory.
|
|
void generateLoad(const LoadInst *load, ValueMapT &vectorMap,
|
|
VectorValueMapT &scalarMaps, int vectorWidth) {
|
|
if (scalarMaps.size() == 1) {
|
|
scalarMaps[0][load] = generateScalarLoad(load, scalarMaps[0]);
|
|
return;
|
|
}
|
|
|
|
Value *newLoad;
|
|
|
|
MemoryAccess &Access = statement.getAccessFor(load);
|
|
|
|
assert(scatteringDomain && "No scattering domain available");
|
|
|
|
if (Access.isStrideZero(scatteringDomain))
|
|
newLoad = generateStrideZeroLoad(load, scalarMaps[0], vectorWidth);
|
|
else if (Access.isStrideOne(scatteringDomain))
|
|
newLoad = generateStrideOneLoad(load, scalarMaps[0], vectorWidth);
|
|
else
|
|
newLoad = generateUnknownStrideLoad(load, scalarMaps, vectorWidth);
|
|
|
|
vectorMap[load] = newLoad;
|
|
}
|
|
|
|
void copyUnaryInst(const UnaryInstruction *Inst, ValueMapT &BBMap,
|
|
ValueMapT &VectorMap, int VectorDimension,
|
|
int VectorWidth) {
|
|
Value *NewOperand = getOperand(Inst->getOperand(0), BBMap, &VectorMap);
|
|
NewOperand = makeVectorOperand(NewOperand, VectorWidth);
|
|
|
|
if (const CastInst *Cast = dyn_cast<CastInst>(Inst)) {
|
|
VectorType *DestType = VectorType::get(Inst->getType(), VectorWidth);
|
|
VectorMap[Inst] = Builder.CreateCast(Cast->getOpcode(), NewOperand,
|
|
DestType);
|
|
} else
|
|
llvm_unreachable("Can not generate vector code for instruction");
|
|
return;
|
|
}
|
|
|
|
void copyBinInst(const BinaryOperator *Inst, ValueMapT &BBMap,
|
|
ValueMapT &vectorMap, int vectorDimension, int vectorWidth) {
|
|
Value *opZero = Inst->getOperand(0);
|
|
Value *opOne = Inst->getOperand(1);
|
|
|
|
Value *newOpZero, *newOpOne;
|
|
newOpZero = getOperand(opZero, BBMap, &vectorMap);
|
|
newOpOne = getOperand(opOne, BBMap, &vectorMap);
|
|
|
|
newOpZero = makeVectorOperand(newOpZero, vectorWidth);
|
|
newOpOne = makeVectorOperand(newOpOne, vectorWidth);
|
|
|
|
Value *newInst = Builder.CreateBinOp(Inst->getOpcode(), newOpZero,
|
|
newOpOne,
|
|
Inst->getNameStr() + "p_vec");
|
|
vectorMap[Inst] = newInst;
|
|
|
|
return;
|
|
}
|
|
|
|
void copyVectorStore(const StoreInst *store, ValueMapT &BBMap,
|
|
ValueMapT &vectorMap, VectorValueMapT &scalarMaps,
|
|
int vectorDimension, int vectorWidth) {
|
|
// In vector mode we only generate a store for the first dimension.
|
|
if (vectorDimension > 0)
|
|
return;
|
|
|
|
MemoryAccess &Access = statement.getAccessFor(store);
|
|
|
|
assert(scatteringDomain && "No scattering domain available");
|
|
|
|
const Value *pointer = store->getPointerOperand();
|
|
Value *vector = getOperand(store->getValueOperand(), BBMap, &vectorMap);
|
|
|
|
if (Access.isStrideOne(scatteringDomain)) {
|
|
Type *vectorPtrType = getVectorPtrTy(pointer, vectorWidth);
|
|
Value *newPointer = getOperand(pointer, BBMap, &vectorMap);
|
|
|
|
Value *VectorPtr = Builder.CreateBitCast(newPointer, vectorPtrType,
|
|
"vector_ptr");
|
|
StoreInst *Store = Builder.CreateStore(vector, VectorPtr);
|
|
|
|
if (!Aligned)
|
|
Store->setAlignment(8);
|
|
} else {
|
|
for (unsigned i = 0; i < scalarMaps.size(); i++) {
|
|
Value *scalar = Builder.CreateExtractElement(vector,
|
|
Builder.getInt32(i));
|
|
Value *newPointer = getOperand(pointer, scalarMaps[i]);
|
|
Builder.CreateStore(scalar, newPointer);
|
|
}
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
void copyInstScalar(const Instruction *Inst, ValueMapT &BBMap) {
|
|
Instruction *NewInst = Inst->clone();
|
|
|
|
// Replace old operands with the new ones.
|
|
for (Instruction::const_op_iterator OI = Inst->op_begin(),
|
|
OE = Inst->op_end(); OI != OE; ++OI) {
|
|
Value *OldOperand = *OI;
|
|
Value *NewOperand = getOperand(OldOperand, BBMap);
|
|
|
|
if (!NewOperand) {
|
|
assert(!isa<StoreInst>(NewInst)
|
|
&& "Store instructions are always needed!");
|
|
delete NewInst;
|
|
return;
|
|
}
|
|
|
|
NewInst->replaceUsesOfWith(OldOperand, NewOperand);
|
|
}
|
|
|
|
Builder.Insert(NewInst);
|
|
BBMap[Inst] = NewInst;
|
|
|
|
if (!NewInst->getType()->isVoidTy())
|
|
NewInst->setName("p_" + Inst->getName());
|
|
}
|
|
|
|
bool hasVectorOperands(const Instruction *Inst, ValueMapT &VectorMap) {
|
|
for (Instruction::const_op_iterator OI = Inst->op_begin(),
|
|
OE = Inst->op_end(); OI != OE; ++OI)
|
|
if (VectorMap.count(*OI))
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
int getVectorSize() {
|
|
return ValueMaps.size();
|
|
}
|
|
|
|
bool isVectorBlock() {
|
|
return getVectorSize() > 1;
|
|
}
|
|
|
|
void copyInstruction(const Instruction *Inst, ValueMapT &BBMap,
|
|
ValueMapT &vectorMap, VectorValueMapT &scalarMaps,
|
|
int vectorDimension, int vectorWidth) {
|
|
// Terminator instructions control the control flow. They are explicitally
|
|
// expressed in the clast and do not need to be copied.
|
|
if (Inst->isTerminator())
|
|
return;
|
|
|
|
if (isVectorBlock()) {
|
|
// If this instruction is already in the vectorMap, a vector instruction
|
|
// was already issued, that calculates the values of all dimensions. No
|
|
// need to create any more instructions.
|
|
if (vectorMap.count(Inst))
|
|
return;
|
|
}
|
|
|
|
if (const LoadInst *load = dyn_cast<LoadInst>(Inst)) {
|
|
generateLoad(load, vectorMap, scalarMaps, vectorWidth);
|
|
return;
|
|
}
|
|
|
|
if (isVectorBlock() && hasVectorOperands(Inst, vectorMap)) {
|
|
if (const UnaryInstruction *UnaryInst = dyn_cast<UnaryInstruction>(Inst))
|
|
copyUnaryInst(UnaryInst, BBMap, vectorMap, vectorDimension,
|
|
vectorWidth);
|
|
else if
|
|
(const BinaryOperator *binaryInst = dyn_cast<BinaryOperator>(Inst))
|
|
copyBinInst(binaryInst, BBMap, vectorMap, vectorDimension, vectorWidth);
|
|
else if (const StoreInst *store = dyn_cast<StoreInst>(Inst))
|
|
copyVectorStore(store, BBMap, vectorMap, scalarMaps, vectorDimension,
|
|
vectorWidth);
|
|
else
|
|
llvm_unreachable("Cannot issue vector code for this instruction");
|
|
|
|
return;
|
|
}
|
|
|
|
copyInstScalar(Inst, BBMap);
|
|
}
|
|
// Insert a copy of a basic block in the newly generated code.
|
|
//
|
|
// @param Builder The builder used to insert the code. It also specifies
|
|
// where to insert the code.
|
|
// @param BB The basic block to copy
|
|
// @param VMap A map returning for any old value its new equivalent. This
|
|
// is used to update the operands of the statements.
|
|
// For new statements a relation old->new is inserted in this
|
|
// map.
|
|
void copyBB(BasicBlock *BB, DominatorTree *DT) {
|
|
Function *F = Builder.GetInsertBlock()->getParent();
|
|
LLVMContext &Context = F->getContext();
|
|
BasicBlock *CopyBB = BasicBlock::Create(Context,
|
|
"polly." + BB->getNameStr()
|
|
+ ".stmt",
|
|
F);
|
|
Builder.CreateBr(CopyBB);
|
|
DT->addNewBlock(CopyBB, Builder.GetInsertBlock());
|
|
Builder.SetInsertPoint(CopyBB);
|
|
|
|
// Create two maps that store the mapping from the original instructions of
|
|
// the old basic block to their copies in the new basic block. Those maps
|
|
// are basic block local.
|
|
//
|
|
// As vector code generation is supported there is one map for scalar values
|
|
// and one for vector values.
|
|
//
|
|
// In case we just do scalar code generation, the vectorMap is not used and
|
|
// the scalarMap has just one dimension, which contains the mapping.
|
|
//
|
|
// In case vector code generation is done, an instruction may either appear
|
|
// in the vector map once (as it is calculating >vectorwidth< values at a
|
|
// time. Or (if the values are calculated using scalar operations), it
|
|
// appears once in every dimension of the scalarMap.
|
|
VectorValueMapT scalarBlockMap(getVectorSize());
|
|
ValueMapT vectorBlockMap;
|
|
|
|
for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end();
|
|
II != IE; ++II)
|
|
for (int i = 0; i < getVectorSize(); i++) {
|
|
if (isVectorBlock())
|
|
VMap = ValueMaps[i];
|
|
|
|
copyInstruction(II, scalarBlockMap[i], vectorBlockMap,
|
|
scalarBlockMap, i, getVectorSize());
|
|
}
|
|
}
|
|
};
|
|
|
|
/// Class to generate LLVM-IR that calculates the value of a clast_expr.
|
|
class ClastExpCodeGen {
|
|
IRBuilder<> &Builder;
|
|
const CharMapT *IVS;
|
|
|
|
Value *codegen(const clast_name *e, Type *Ty) {
|
|
CharMapT::const_iterator I = IVS->find(e->name);
|
|
|
|
if (I != IVS->end())
|
|
return Builder.CreateSExtOrBitCast(I->second, Ty);
|
|
else
|
|
llvm_unreachable("Clast name not found");
|
|
}
|
|
|
|
Value *codegen(const clast_term *e, Type *Ty) {
|
|
APInt a = APInt_from_MPZ(e->val);
|
|
|
|
Value *ConstOne = ConstantInt::get(Builder.getContext(), a);
|
|
ConstOne = Builder.CreateSExtOrBitCast(ConstOne, Ty);
|
|
|
|
if (e->var) {
|
|
Value *var = codegen(e->var, Ty);
|
|
return Builder.CreateMul(ConstOne, var);
|
|
}
|
|
|
|
return ConstOne;
|
|
}
|
|
|
|
Value *codegen(const clast_binary *e, Type *Ty) {
|
|
Value *LHS = codegen(e->LHS, Ty);
|
|
|
|
APInt RHS_AP = APInt_from_MPZ(e->RHS);
|
|
|
|
Value *RHS = ConstantInt::get(Builder.getContext(), RHS_AP);
|
|
RHS = Builder.CreateSExtOrBitCast(RHS, Ty);
|
|
|
|
switch (e->type) {
|
|
case clast_bin_mod:
|
|
return Builder.CreateSRem(LHS, RHS);
|
|
case clast_bin_fdiv:
|
|
{
|
|
// floord(n,d) ((n < 0) ? (n - d + 1) : n) / d
|
|
Value *One = ConstantInt::get(Builder.getInt1Ty(), 1);
|
|
Value *Zero = ConstantInt::get(Builder.getInt1Ty(), 0);
|
|
One = Builder.CreateZExtOrBitCast(One, Ty);
|
|
Zero = Builder.CreateZExtOrBitCast(Zero, Ty);
|
|
Value *Sum1 = Builder.CreateSub(LHS, RHS);
|
|
Value *Sum2 = Builder.CreateAdd(Sum1, One);
|
|
Value *isNegative = Builder.CreateICmpSLT(LHS, Zero);
|
|
Value *Dividend = Builder.CreateSelect(isNegative, Sum2, LHS);
|
|
return Builder.CreateSDiv(Dividend, RHS);
|
|
}
|
|
case clast_bin_cdiv:
|
|
{
|
|
// ceild(n,d) ((n < 0) ? n : (n + d - 1)) / d
|
|
Value *One = ConstantInt::get(Builder.getInt1Ty(), 1);
|
|
Value *Zero = ConstantInt::get(Builder.getInt1Ty(), 0);
|
|
One = Builder.CreateZExtOrBitCast(One, Ty);
|
|
Zero = Builder.CreateZExtOrBitCast(Zero, Ty);
|
|
Value *Sum1 = Builder.CreateAdd(LHS, RHS);
|
|
Value *Sum2 = Builder.CreateSub(Sum1, One);
|
|
Value *isNegative = Builder.CreateICmpSLT(LHS, Zero);
|
|
Value *Dividend = Builder.CreateSelect(isNegative, LHS, Sum2);
|
|
return Builder.CreateSDiv(Dividend, RHS);
|
|
}
|
|
case clast_bin_div:
|
|
return Builder.CreateSDiv(LHS, RHS);
|
|
default:
|
|
llvm_unreachable("Unknown clast binary expression type");
|
|
};
|
|
}
|
|
|
|
Value *codegen(const clast_reduction *r, Type *Ty) {
|
|
assert(( r->type == clast_red_min
|
|
|| r->type == clast_red_max
|
|
|| r->type == clast_red_sum)
|
|
&& "Clast reduction type not supported");
|
|
Value *old = codegen(r->elts[0], Ty);
|
|
|
|
for (int i=1; i < r->n; ++i) {
|
|
Value *exprValue = codegen(r->elts[i], Ty);
|
|
|
|
switch (r->type) {
|
|
case clast_red_min:
|
|
{
|
|
Value *cmp = Builder.CreateICmpSLT(old, exprValue);
|
|
old = Builder.CreateSelect(cmp, old, exprValue);
|
|
break;
|
|
}
|
|
case clast_red_max:
|
|
{
|
|
Value *cmp = Builder.CreateICmpSGT(old, exprValue);
|
|
old = Builder.CreateSelect(cmp, old, exprValue);
|
|
break;
|
|
}
|
|
case clast_red_sum:
|
|
old = Builder.CreateAdd(old, exprValue);
|
|
break;
|
|
default:
|
|
llvm_unreachable("Clast unknown reduction type");
|
|
}
|
|
}
|
|
|
|
return old;
|
|
}
|
|
|
|
public:
|
|
|
|
// A generator for clast expressions.
|
|
//
|
|
// @param B The IRBuilder that defines where the code to calculate the
|
|
// clast expressions should be inserted.
|
|
// @param IVMAP A Map that translates strings describing the induction
|
|
// variables to the Values* that represent these variables
|
|
// on the LLVM side.
|
|
ClastExpCodeGen(IRBuilder<> &B, CharMapT *IVMap) : Builder(B), IVS(IVMap) {}
|
|
|
|
// Generates code to calculate a given clast expression.
|
|
//
|
|
// @param e The expression to calculate.
|
|
// @return The Value that holds the result.
|
|
Value *codegen(const clast_expr *e, Type *Ty) {
|
|
switch(e->type) {
|
|
case clast_expr_name:
|
|
return codegen((const clast_name *)e, Ty);
|
|
case clast_expr_term:
|
|
return codegen((const clast_term *)e, Ty);
|
|
case clast_expr_bin:
|
|
return codegen((const clast_binary *)e, Ty);
|
|
case clast_expr_red:
|
|
return codegen((const clast_reduction *)e, Ty);
|
|
default:
|
|
llvm_unreachable("Unknown clast expression!");
|
|
}
|
|
}
|
|
|
|
// @brief Reset the CharMap.
|
|
//
|
|
// This function is called to reset the CharMap to new one, while generating
|
|
// OpenMP code.
|
|
void setIVS(CharMapT *IVSNew) {
|
|
IVS = IVSNew;
|
|
}
|
|
|
|
};
|
|
|
|
class ClastStmtCodeGen {
|
|
// The Scop we code generate.
|
|
Scop *S;
|
|
ScalarEvolution &SE;
|
|
DominatorTree *DT;
|
|
ScopDetection *SD;
|
|
Dependences *DP;
|
|
TargetData *TD;
|
|
|
|
// The Builder specifies the current location to code generate at.
|
|
IRBuilder<> &Builder;
|
|
|
|
// Map the Values from the old code to their counterparts in the new code.
|
|
ValueMapT ValueMap;
|
|
|
|
// clastVars maps from the textual representation of a clast variable to its
|
|
// current *Value. clast variables are scheduling variables, original
|
|
// induction variables or parameters. They are used either in loop bounds or
|
|
// to define the statement instance that is executed.
|
|
//
|
|
// for (s = 0; s < n + 3; ++i)
|
|
// for (t = s; t < m; ++j)
|
|
// Stmt(i = s + 3 * m, j = t);
|
|
//
|
|
// {s,t,i,j,n,m} is the set of clast variables in this clast.
|
|
CharMapT *clastVars;
|
|
|
|
// Codegenerator for clast expressions.
|
|
ClastExpCodeGen ExpGen;
|
|
|
|
// Do we currently generate parallel code?
|
|
bool parallelCodeGeneration;
|
|
|
|
std::vector<std::string> parallelLoops;
|
|
|
|
public:
|
|
|
|
const std::vector<std::string> &getParallelLoops() {
|
|
return parallelLoops;
|
|
}
|
|
|
|
protected:
|
|
void codegen(const clast_assignment *a) {
|
|
(*clastVars)[a->LHS] = ExpGen.codegen(a->RHS,
|
|
TD->getIntPtrType(Builder.getContext()));
|
|
}
|
|
|
|
void codegen(const clast_assignment *a, ScopStmt *Statement,
|
|
unsigned Dimension, int vectorDim,
|
|
std::vector<ValueMapT> *VectorVMap = 0) {
|
|
Value *RHS = ExpGen.codegen(a->RHS,
|
|
TD->getIntPtrType(Builder.getContext()));
|
|
|
|
assert(!a->LHS && "Statement assignments do not have left hand side");
|
|
const PHINode *PN;
|
|
PN = Statement->getInductionVariableForDimension(Dimension);
|
|
const Value *V = PN;
|
|
|
|
if (VectorVMap)
|
|
(*VectorVMap)[vectorDim][V] = RHS;
|
|
|
|
ValueMap[V] = RHS;
|
|
}
|
|
|
|
void codegenSubstitutions(const clast_stmt *Assignment,
|
|
ScopStmt *Statement, int vectorDim = 0,
|
|
std::vector<ValueMapT> *VectorVMap = 0) {
|
|
int Dimension = 0;
|
|
|
|
while (Assignment) {
|
|
assert(CLAST_STMT_IS_A(Assignment, stmt_ass)
|
|
&& "Substitions are expected to be assignments");
|
|
codegen((const clast_assignment *)Assignment, Statement, Dimension,
|
|
vectorDim, VectorVMap);
|
|
Assignment = Assignment->next;
|
|
Dimension++;
|
|
}
|
|
}
|
|
|
|
void codegen(const clast_user_stmt *u, std::vector<Value*> *IVS = NULL,
|
|
const char *iterator = NULL, isl_set *scatteringDomain = 0) {
|
|
ScopStmt *Statement = (ScopStmt *)u->statement->usr;
|
|
BasicBlock *BB = Statement->getBasicBlock();
|
|
|
|
if (u->substitutions)
|
|
codegenSubstitutions(u->substitutions, Statement);
|
|
|
|
int vectorDimensions = IVS ? IVS->size() : 1;
|
|
|
|
VectorValueMapT VectorValueMap(vectorDimensions);
|
|
|
|
if (IVS) {
|
|
assert (u->substitutions && "Substitutions expected!");
|
|
int i = 0;
|
|
for (std::vector<Value*>::iterator II = IVS->begin(), IE = IVS->end();
|
|
II != IE; ++II) {
|
|
(*clastVars)[iterator] = *II;
|
|
codegenSubstitutions(u->substitutions, Statement, i, &VectorValueMap);
|
|
i++;
|
|
}
|
|
}
|
|
|
|
BlockGenerator Generator(Builder, ValueMap, VectorValueMap, *Statement,
|
|
scatteringDomain);
|
|
Generator.copyBB(BB, DT);
|
|
}
|
|
|
|
void codegen(const clast_block *b) {
|
|
if (b->body)
|
|
codegen(b->body);
|
|
}
|
|
|
|
/// @brief Create a classical sequential loop.
|
|
void codegenForSequential(const clast_for *f, Value *lowerBound = 0,
|
|
Value *upperBound = 0) {
|
|
APInt Stride = APInt_from_MPZ(f->stride);
|
|
PHINode *IV;
|
|
Value *IncrementedIV;
|
|
BasicBlock *AfterBB;
|
|
// The value of lowerbound and upperbound will be supplied, if this
|
|
// function is called while generating OpenMP code. Otherwise get
|
|
// the values.
|
|
assert(((lowerBound && upperBound) || (!lowerBound && !upperBound))
|
|
&& "Either give both bounds or none");
|
|
if (lowerBound == 0 || upperBound == 0) {
|
|
lowerBound = ExpGen.codegen(f->LB,
|
|
TD->getIntPtrType(Builder.getContext()));
|
|
upperBound = ExpGen.codegen(f->UB,
|
|
TD->getIntPtrType(Builder.getContext()));
|
|
}
|
|
createLoop(&Builder, lowerBound, upperBound, Stride, IV, AfterBB,
|
|
IncrementedIV, DT);
|
|
|
|
// Add loop iv to symbols.
|
|
(*clastVars)[f->iterator] = IV;
|
|
|
|
if (f->body)
|
|
codegen(f->body);
|
|
|
|
// Loop is finished, so remove its iv from the live symbols.
|
|
clastVars->erase(f->iterator);
|
|
|
|
BasicBlock *HeaderBB = *pred_begin(AfterBB);
|
|
BasicBlock *LastBodyBB = Builder.GetInsertBlock();
|
|
Builder.CreateBr(HeaderBB);
|
|
IV->addIncoming(IncrementedIV, LastBodyBB);
|
|
Builder.SetInsertPoint(AfterBB);
|
|
}
|
|
|
|
/// @brief Add a new definition of an openmp subfunction.
|
|
Function* addOpenMPSubfunction(Module *M) {
|
|
Function *F = Builder.GetInsertBlock()->getParent();
|
|
const std::string &Name = F->getNameStr() + ".omp_subfn";
|
|
|
|
std::vector<Type*> Arguments(1, Builder.getInt8PtrTy());
|
|
FunctionType *FT = FunctionType::get(Builder.getVoidTy(), Arguments, false);
|
|
Function *FN = Function::Create(FT, Function::InternalLinkage, Name, M);
|
|
// Do not run any polly pass on the new function.
|
|
SD->markFunctionAsInvalid(FN);
|
|
|
|
Function::arg_iterator AI = FN->arg_begin();
|
|
AI->setName("omp.userContext");
|
|
|
|
return FN;
|
|
}
|
|
|
|
/// @brief Add values to the OpenMP structure.
|
|
///
|
|
/// Create the subfunction structure and add the values from the list.
|
|
Value *addValuesToOpenMPStruct(SetVector<Value*> OMPDataVals,
|
|
Function *SubFunction) {
|
|
std::vector<Type*> structMembers;
|
|
|
|
// Create the structure.
|
|
for (unsigned i = 0; i < OMPDataVals.size(); i++)
|
|
structMembers.push_back(OMPDataVals[i]->getType());
|
|
|
|
StructType *structTy = StructType::get(Builder.getContext(),
|
|
structMembers);
|
|
// Store the values into the structure.
|
|
Value *structData = Builder.CreateAlloca(structTy, 0, "omp.userContext");
|
|
for (unsigned i = 0; i < OMPDataVals.size(); i++) {
|
|
Value *storeAddr = Builder.CreateStructGEP(structData, i);
|
|
Builder.CreateStore(OMPDataVals[i], storeAddr);
|
|
}
|
|
|
|
return structData;
|
|
}
|
|
|
|
/// @brief Create OpenMP structure values.
|
|
///
|
|
/// Create a list of values that has to be stored into the subfuncition
|
|
/// structure.
|
|
SetVector<Value*> createOpenMPStructValues() {
|
|
SetVector<Value*> OMPDataVals;
|
|
|
|
// Push the clast variables available in the clastVars.
|
|
for (CharMapT::iterator I = clastVars->begin(), E = clastVars->end();
|
|
I != E; I++)
|
|
OMPDataVals.insert(I->second);
|
|
|
|
// Push the base addresses of memory references.
|
|
for (Scop::iterator SI = S->begin(), SE = S->end(); SI != SE; ++SI) {
|
|
ScopStmt *Stmt = *SI;
|
|
for (SmallVector<MemoryAccess*, 8>::iterator I = Stmt->memacc_begin(),
|
|
E = Stmt->memacc_end(); I != E; ++I) {
|
|
Value *BaseAddr = const_cast<Value*>((*I)->getBaseAddr());
|
|
OMPDataVals.insert((BaseAddr));
|
|
}
|
|
}
|
|
|
|
return OMPDataVals;
|
|
}
|
|
|
|
/// @brief Extract the values from the subfunction parameter.
|
|
///
|
|
/// Extract the values from the subfunction parameter and update the clast
|
|
/// variables to point to the new values.
|
|
void extractValuesFromOpenMPStruct(CharMapT *clastVarsOMP,
|
|
SetVector<Value*> OMPDataVals,
|
|
Value *userContext) {
|
|
// Extract the clast variables.
|
|
unsigned i = 0;
|
|
for (CharMapT::iterator I = clastVars->begin(), E = clastVars->end();
|
|
I != E; I++) {
|
|
Value *loadAddr = Builder.CreateStructGEP(userContext, i);
|
|
(*clastVarsOMP)[I->first] = Builder.CreateLoad(loadAddr);
|
|
i++;
|
|
}
|
|
|
|
// Extract the base addresses of memory references.
|
|
for (unsigned j = i; j < OMPDataVals.size(); j++) {
|
|
Value *loadAddr = Builder.CreateStructGEP(userContext, j);
|
|
Value *baseAddr = OMPDataVals[j];
|
|
ValueMap[baseAddr] = Builder.CreateLoad(loadAddr);
|
|
}
|
|
|
|
}
|
|
|
|
/// @brief Add body to the subfunction.
|
|
void addOpenMPSubfunctionBody(Function *FN, const clast_for *f,
|
|
Value *structData,
|
|
SetVector<Value*> OMPDataVals) {
|
|
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
|
|
LLVMContext &Context = FN->getContext();
|
|
IntegerType *intPtrTy = TD->getIntPtrType(Context);
|
|
|
|
// Store the previous basic block.
|
|
BasicBlock *PrevBB = Builder.GetInsertBlock();
|
|
|
|
// Create basic blocks.
|
|
BasicBlock *HeaderBB = BasicBlock::Create(Context, "omp.setup", FN);
|
|
BasicBlock *ExitBB = BasicBlock::Create(Context, "omp.exit", FN);
|
|
BasicBlock *checkNextBB = BasicBlock::Create(Context, "omp.checkNext", FN);
|
|
BasicBlock *loadIVBoundsBB = BasicBlock::Create(Context, "omp.loadIVBounds",
|
|
FN);
|
|
|
|
DT->addNewBlock(HeaderBB, PrevBB);
|
|
DT->addNewBlock(ExitBB, HeaderBB);
|
|
DT->addNewBlock(checkNextBB, HeaderBB);
|
|
DT->addNewBlock(loadIVBoundsBB, HeaderBB);
|
|
|
|
// Fill up basic block HeaderBB.
|
|
Builder.SetInsertPoint(HeaderBB);
|
|
Value *lowerBoundPtr = Builder.CreateAlloca(intPtrTy, 0,
|
|
"omp.lowerBoundPtr");
|
|
Value *upperBoundPtr = Builder.CreateAlloca(intPtrTy, 0,
|
|
"omp.upperBoundPtr");
|
|
Value *userContext = Builder.CreateBitCast(FN->arg_begin(),
|
|
structData->getType(),
|
|
"omp.userContext");
|
|
|
|
CharMapT clastVarsOMP;
|
|
extractValuesFromOpenMPStruct(&clastVarsOMP, OMPDataVals, userContext);
|
|
|
|
Builder.CreateBr(checkNextBB);
|
|
|
|
// Add code to check if another set of iterations will be executed.
|
|
Builder.SetInsertPoint(checkNextBB);
|
|
Function *runtimeNextFunction = M->getFunction("GOMP_loop_runtime_next");
|
|
Value *ret1 = Builder.CreateCall2(runtimeNextFunction,
|
|
lowerBoundPtr, upperBoundPtr);
|
|
Value *hasNextSchedule = Builder.CreateTrunc(ret1, Builder.getInt1Ty(),
|
|
"omp.hasNextScheduleBlock");
|
|
Builder.CreateCondBr(hasNextSchedule, loadIVBoundsBB, ExitBB);
|
|
|
|
// Add code to to load the iv bounds for this set of iterations.
|
|
Builder.SetInsertPoint(loadIVBoundsBB);
|
|
Value *lowerBound = Builder.CreateLoad(lowerBoundPtr, "omp.lowerBound");
|
|
Value *upperBound = Builder.CreateLoad(upperBoundPtr, "omp.upperBound");
|
|
|
|
// Subtract one as the upper bound provided by openmp is a < comparison
|
|
// whereas the codegenForSequential function creates a <= comparison.
|
|
upperBound = Builder.CreateSub(upperBound, ConstantInt::get(intPtrTy, 1),
|
|
"omp.upperBoundAdjusted");
|
|
|
|
// Use clastVarsOMP during code generation of the OpenMP subfunction.
|
|
CharMapT *oldClastVars = clastVars;
|
|
clastVars = &clastVarsOMP;
|
|
ExpGen.setIVS(&clastVarsOMP);
|
|
|
|
codegenForSequential(f, lowerBound, upperBound);
|
|
|
|
// Restore the old clastVars.
|
|
clastVars = oldClastVars;
|
|
ExpGen.setIVS(oldClastVars);
|
|
|
|
Builder.CreateBr(checkNextBB);
|
|
|
|
// Add code to terminate this openmp subfunction.
|
|
Builder.SetInsertPoint(ExitBB);
|
|
Function *endnowaitFunction = M->getFunction("GOMP_loop_end_nowait");
|
|
Builder.CreateCall(endnowaitFunction);
|
|
Builder.CreateRetVoid();
|
|
|
|
// Restore the builder back to previous basic block.
|
|
Builder.SetInsertPoint(PrevBB);
|
|
}
|
|
|
|
/// @brief Create an OpenMP parallel for loop.
|
|
///
|
|
/// This loop reflects a loop as if it would have been created by an OpenMP
|
|
/// statement.
|
|
void codegenForOpenMP(const clast_for *f) {
|
|
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
|
|
IntegerType *intPtrTy = TD->getIntPtrType(Builder.getContext());
|
|
|
|
Function *SubFunction = addOpenMPSubfunction(M);
|
|
SetVector<Value*> OMPDataVals = createOpenMPStructValues();
|
|
Value *structData = addValuesToOpenMPStruct(OMPDataVals, SubFunction);
|
|
|
|
addOpenMPSubfunctionBody(SubFunction, f, structData, OMPDataVals);
|
|
|
|
// Create call for GOMP_parallel_loop_runtime_start.
|
|
Value *subfunctionParam = Builder.CreateBitCast(structData,
|
|
Builder.getInt8PtrTy(),
|
|
"omp_data");
|
|
|
|
Value *numberOfThreads = Builder.getInt32(0);
|
|
Value *lowerBound = ExpGen.codegen(f->LB, intPtrTy);
|
|
Value *upperBound = ExpGen.codegen(f->UB, intPtrTy);
|
|
|
|
// Add one as the upper bound provided by openmp is a < comparison
|
|
// whereas the codegenForSequential function creates a <= comparison.
|
|
upperBound = Builder.CreateAdd(upperBound, ConstantInt::get(intPtrTy, 1));
|
|
APInt APStride = APInt_from_MPZ(f->stride);
|
|
Value *stride = ConstantInt::get(intPtrTy,
|
|
APStride.zext(intPtrTy->getBitWidth()));
|
|
|
|
SmallVector<Value *, 6> Arguments;
|
|
Arguments.push_back(SubFunction);
|
|
Arguments.push_back(subfunctionParam);
|
|
Arguments.push_back(numberOfThreads);
|
|
Arguments.push_back(lowerBound);
|
|
Arguments.push_back(upperBound);
|
|
Arguments.push_back(stride);
|
|
|
|
Function *parallelStartFunction =
|
|
M->getFunction("GOMP_parallel_loop_runtime_start");
|
|
Builder.CreateCall(parallelStartFunction, Arguments);
|
|
|
|
// Create call to the subfunction.
|
|
Builder.CreateCall(SubFunction, subfunctionParam);
|
|
|
|
// Create call for GOMP_parallel_end.
|
|
Function *FN = M->getFunction("GOMP_parallel_end");
|
|
Builder.CreateCall(FN);
|
|
}
|
|
|
|
bool isInnermostLoop(const clast_for *f) {
|
|
const clast_stmt *stmt = f->body;
|
|
|
|
while (stmt) {
|
|
if (!CLAST_STMT_IS_A(stmt, stmt_user))
|
|
return false;
|
|
|
|
stmt = stmt->next;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/// @brief Get the number of loop iterations for this loop.
|
|
/// @param f The clast for loop to check.
|
|
int getNumberOfIterations(const clast_for *f) {
|
|
isl_set *loopDomain = isl_set_copy(isl_set_from_cloog_domain(f->domain));
|
|
isl_set *tmp = isl_set_copy(loopDomain);
|
|
|
|
// Calculate a map similar to the identity map, but with the last input
|
|
// and output dimension not related.
|
|
// [i0, i1, i2, i3] -> [i0, i1, i2, o0]
|
|
isl_space *Space = isl_set_get_space(loopDomain);
|
|
Space = isl_space_drop_outputs(Space,
|
|
isl_set_dim(loopDomain, isl_dim_set) - 2, 1);
|
|
Space = isl_space_map_from_set(Space);
|
|
isl_map *identity = isl_map_identity(Space);
|
|
identity = isl_map_add_dims(identity, isl_dim_in, 1);
|
|
identity = isl_map_add_dims(identity, isl_dim_out, 1);
|
|
|
|
isl_map *map = isl_map_from_domain_and_range(tmp, loopDomain);
|
|
map = isl_map_intersect(map, identity);
|
|
|
|
isl_map *lexmax = isl_map_lexmax(isl_map_copy(map));
|
|
isl_map *lexmin = isl_map_lexmin(map);
|
|
isl_map *sub = isl_map_sum(lexmax, isl_map_neg(lexmin));
|
|
|
|
isl_set *elements = isl_map_range(sub);
|
|
|
|
if (!isl_set_is_singleton(elements)) {
|
|
isl_set_free(elements);
|
|
return -1;
|
|
}
|
|
|
|
isl_point *p = isl_set_sample_point(elements);
|
|
|
|
isl_int v;
|
|
isl_int_init(v);
|
|
isl_point_get_coordinate(p, isl_dim_set, isl_set_n_dim(loopDomain) - 1, &v);
|
|
int numberIterations = isl_int_get_si(v);
|
|
isl_int_clear(v);
|
|
isl_point_free(p);
|
|
|
|
return (numberIterations) / isl_int_get_si(f->stride) + 1;
|
|
}
|
|
|
|
/// @brief Create vector instructions for this loop.
|
|
void codegenForVector(const clast_for *f) {
|
|
DEBUG(dbgs() << "Vectorizing loop '" << f->iterator << "'\n";);
|
|
int vectorWidth = getNumberOfIterations(f);
|
|
|
|
Value *LB = ExpGen.codegen(f->LB,
|
|
TD->getIntPtrType(Builder.getContext()));
|
|
|
|
APInt Stride = APInt_from_MPZ(f->stride);
|
|
IntegerType *LoopIVType = dyn_cast<IntegerType>(LB->getType());
|
|
Stride = Stride.zext(LoopIVType->getBitWidth());
|
|
Value *StrideValue = ConstantInt::get(LoopIVType, Stride);
|
|
|
|
std::vector<Value*> IVS(vectorWidth);
|
|
IVS[0] = LB;
|
|
|
|
for (int i = 1; i < vectorWidth; i++)
|
|
IVS[i] = Builder.CreateAdd(IVS[i-1], StrideValue, "p_vector_iv");
|
|
|
|
isl_set *scatteringDomain = isl_set_from_cloog_domain(f->domain);
|
|
|
|
// Add loop iv to symbols.
|
|
(*clastVars)[f->iterator] = LB;
|
|
|
|
const clast_stmt *stmt = f->body;
|
|
|
|
while (stmt) {
|
|
codegen((const clast_user_stmt *)stmt, &IVS, f->iterator,
|
|
scatteringDomain);
|
|
stmt = stmt->next;
|
|
}
|
|
|
|
// Loop is finished, so remove its iv from the live symbols.
|
|
clastVars->erase(f->iterator);
|
|
}
|
|
|
|
void codegen(const clast_for *f) {
|
|
if (Vector && isInnermostLoop(f) && DP->isParallelFor(f)
|
|
&& (-1 != getNumberOfIterations(f))
|
|
&& (getNumberOfIterations(f) <= 16)) {
|
|
codegenForVector(f);
|
|
} else if (OpenMP && !parallelCodeGeneration && DP->isParallelFor(f)) {
|
|
parallelCodeGeneration = true;
|
|
parallelLoops.push_back(f->iterator);
|
|
codegenForOpenMP(f);
|
|
parallelCodeGeneration = false;
|
|
} else
|
|
codegenForSequential(f);
|
|
}
|
|
|
|
Value *codegen(const clast_equation *eq) {
|
|
Value *LHS = ExpGen.codegen(eq->LHS,
|
|
TD->getIntPtrType(Builder.getContext()));
|
|
Value *RHS = ExpGen.codegen(eq->RHS,
|
|
TD->getIntPtrType(Builder.getContext()));
|
|
CmpInst::Predicate P;
|
|
|
|
if (eq->sign == 0)
|
|
P = ICmpInst::ICMP_EQ;
|
|
else if (eq->sign > 0)
|
|
P = ICmpInst::ICMP_SGE;
|
|
else
|
|
P = ICmpInst::ICMP_SLE;
|
|
|
|
return Builder.CreateICmp(P, LHS, RHS);
|
|
}
|
|
|
|
void codegen(const clast_guard *g) {
|
|
Function *F = Builder.GetInsertBlock()->getParent();
|
|
LLVMContext &Context = F->getContext();
|
|
BasicBlock *ThenBB = BasicBlock::Create(Context, "polly.then", F);
|
|
BasicBlock *MergeBB = BasicBlock::Create(Context, "polly.merge", F);
|
|
DT->addNewBlock(ThenBB, Builder.GetInsertBlock());
|
|
DT->addNewBlock(MergeBB, Builder.GetInsertBlock());
|
|
|
|
Value *Predicate = codegen(&(g->eq[0]));
|
|
|
|
for (int i = 1; i < g->n; ++i) {
|
|
Value *TmpPredicate = codegen(&(g->eq[i]));
|
|
Predicate = Builder.CreateAnd(Predicate, TmpPredicate);
|
|
}
|
|
|
|
Builder.CreateCondBr(Predicate, ThenBB, MergeBB);
|
|
Builder.SetInsertPoint(ThenBB);
|
|
|
|
codegen(g->then);
|
|
|
|
Builder.CreateBr(MergeBB);
|
|
Builder.SetInsertPoint(MergeBB);
|
|
}
|
|
|
|
void codegen(const clast_stmt *stmt) {
|
|
if (CLAST_STMT_IS_A(stmt, stmt_root))
|
|
assert(false && "No second root statement expected");
|
|
else if (CLAST_STMT_IS_A(stmt, stmt_ass))
|
|
codegen((const clast_assignment *)stmt);
|
|
else if (CLAST_STMT_IS_A(stmt, stmt_user))
|
|
codegen((const clast_user_stmt *)stmt);
|
|
else if (CLAST_STMT_IS_A(stmt, stmt_block))
|
|
codegen((const clast_block *)stmt);
|
|
else if (CLAST_STMT_IS_A(stmt, stmt_for))
|
|
codegen((const clast_for *)stmt);
|
|
else if (CLAST_STMT_IS_A(stmt, stmt_guard))
|
|
codegen((const clast_guard *)stmt);
|
|
|
|
if (stmt->next)
|
|
codegen(stmt->next);
|
|
}
|
|
|
|
void addParameters(const CloogNames *names) {
|
|
SCEVExpander Rewriter(SE, "polly");
|
|
|
|
// Create an instruction that specifies the location where the parameters
|
|
// are expanded.
|
|
CastInst::CreateIntegerCast(ConstantInt::getTrue(Builder.getContext()),
|
|
Builder.getInt16Ty(), false, "insertInst",
|
|
Builder.GetInsertBlock());
|
|
|
|
int i = 0;
|
|
for (Scop::param_iterator PI = S->param_begin(), PE = S->param_end();
|
|
PI != PE; ++PI) {
|
|
assert(i < names->nb_parameters && "Not enough parameter names");
|
|
|
|
const SCEV *Param = *PI;
|
|
Type *Ty = Param->getType();
|
|
|
|
Instruction *insertLocation = --(Builder.GetInsertBlock()->end());
|
|
Value *V = Rewriter.expandCodeFor(Param, Ty, insertLocation);
|
|
(*clastVars)[names->parameters[i]] = V;
|
|
|
|
++i;
|
|
}
|
|
}
|
|
|
|
public:
|
|
void codegen(const clast_root *r) {
|
|
clastVars = new CharMapT();
|
|
addParameters(r->names);
|
|
ExpGen.setIVS(clastVars);
|
|
|
|
parallelCodeGeneration = false;
|
|
|
|
const clast_stmt *stmt = (const clast_stmt*) r;
|
|
if (stmt->next)
|
|
codegen(stmt->next);
|
|
|
|
delete clastVars;
|
|
}
|
|
|
|
ClastStmtCodeGen(Scop *scop, ScalarEvolution &se, DominatorTree *dt,
|
|
ScopDetection *sd, Dependences *dp, TargetData *td,
|
|
IRBuilder<> &B) :
|
|
S(scop), SE(se), DT(dt), SD(sd), DP(dp), TD(td), Builder(B),
|
|
ExpGen(Builder, NULL) {}
|
|
|
|
};
|
|
}
|
|
|
|
namespace {
|
|
class CodeGeneration : public ScopPass {
|
|
Region *region;
|
|
Scop *S;
|
|
DominatorTree *DT;
|
|
ScalarEvolution *SE;
|
|
ScopDetection *SD;
|
|
TargetData *TD;
|
|
RegionInfo *RI;
|
|
|
|
std::vector<std::string> parallelLoops;
|
|
|
|
public:
|
|
static char ID;
|
|
|
|
CodeGeneration() : ScopPass(ID) {}
|
|
|
|
// Adding prototypes required if OpenMP is enabled.
|
|
void addOpenMPDefinitions(IRBuilder<> &Builder)
|
|
{
|
|
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
|
|
LLVMContext &Context = Builder.getContext();
|
|
IntegerType *intPtrTy = TD->getIntPtrType(Context);
|
|
|
|
if (!M->getFunction("GOMP_parallel_end")) {
|
|
FunctionType *FT = FunctionType::get(Type::getVoidTy(Context), false);
|
|
Function::Create(FT, Function::ExternalLinkage, "GOMP_parallel_end", M);
|
|
}
|
|
|
|
if (!M->getFunction("GOMP_parallel_loop_runtime_start")) {
|
|
// Type of first argument.
|
|
std::vector<Type*> Arguments(1, Builder.getInt8PtrTy());
|
|
FunctionType *FnArgTy = FunctionType::get(Builder.getVoidTy(), Arguments,
|
|
false);
|
|
PointerType *FnPtrTy = PointerType::getUnqual(FnArgTy);
|
|
|
|
std::vector<Type*> args;
|
|
args.push_back(FnPtrTy);
|
|
args.push_back(Builder.getInt8PtrTy());
|
|
args.push_back(Builder.getInt32Ty());
|
|
args.push_back(intPtrTy);
|
|
args.push_back(intPtrTy);
|
|
args.push_back(intPtrTy);
|
|
|
|
FunctionType *type = FunctionType::get(Builder.getVoidTy(), args, false);
|
|
Function::Create(type, Function::ExternalLinkage,
|
|
"GOMP_parallel_loop_runtime_start", M);
|
|
}
|
|
|
|
if (!M->getFunction("GOMP_loop_runtime_next")) {
|
|
PointerType *intLongPtrTy = PointerType::getUnqual(intPtrTy);
|
|
|
|
std::vector<Type*> args;
|
|
args.push_back(intLongPtrTy);
|
|
args.push_back(intLongPtrTy);
|
|
|
|
FunctionType *type = FunctionType::get(Builder.getInt8Ty(), args, false);
|
|
Function::Create(type, Function::ExternalLinkage,
|
|
"GOMP_loop_runtime_next", M);
|
|
}
|
|
|
|
if (!M->getFunction("GOMP_loop_end_nowait")) {
|
|
FunctionType *FT = FunctionType::get(Builder.getVoidTy(),
|
|
std::vector<Type*>(), false);
|
|
Function::Create(FT, Function::ExternalLinkage,
|
|
"GOMP_loop_end_nowait", M);
|
|
}
|
|
}
|
|
|
|
// Split the entry edge of the region and generate a new basic block on this
|
|
// edge. This function also updates ScopInfo and RegionInfo.
|
|
//
|
|
// @param region The region where the entry edge will be splitted.
|
|
BasicBlock *splitEdgeAdvanced(Region *region) {
|
|
BasicBlock *newBlock;
|
|
BasicBlock *splitBlock;
|
|
|
|
newBlock = SplitEdge(region->getEnteringBlock(), region->getEntry(), this);
|
|
|
|
if (DT->dominates(region->getEntry(), newBlock)) {
|
|
// Update ScopInfo.
|
|
for (Scop::iterator SI = S->begin(), SE = S->end(); SI != SE; ++SI)
|
|
if ((*SI)->getBasicBlock() == newBlock) {
|
|
(*SI)->setBasicBlock(newBlock);
|
|
break;
|
|
}
|
|
|
|
// Update RegionInfo.
|
|
splitBlock = region->getEntry();
|
|
region->replaceEntry(newBlock);
|
|
RI->setRegionFor(newBlock, region);
|
|
} else {
|
|
RI->setRegionFor(newBlock, region->getParent());
|
|
splitBlock = newBlock;
|
|
}
|
|
|
|
return splitBlock;
|
|
}
|
|
|
|
// Create a split block that branches either to the old code or to a new basic
|
|
// block where the new code can be inserted.
|
|
//
|
|
// @param builder A builder that will be set to point to a basic block, where
|
|
// the new code can be generated.
|
|
// @return The split basic block.
|
|
BasicBlock *addSplitAndStartBlock(IRBuilder<> *builder) {
|
|
BasicBlock *splitBlock = splitEdgeAdvanced(region);
|
|
|
|
splitBlock->setName("polly.enterScop");
|
|
|
|
Function *function = splitBlock->getParent();
|
|
BasicBlock *startBlock = BasicBlock::Create(function->getContext(),
|
|
"polly.start", function);
|
|
splitBlock->getTerminator()->eraseFromParent();
|
|
builder->SetInsertPoint(splitBlock);
|
|
builder->CreateCondBr(builder->getTrue(), startBlock, region->getEntry());
|
|
DT->addNewBlock(startBlock, splitBlock);
|
|
|
|
// Start code generation here.
|
|
builder->SetInsertPoint(startBlock);
|
|
return splitBlock;
|
|
}
|
|
|
|
// Merge the control flow of the newly generated code with the existing code.
|
|
//
|
|
// @param splitBlock The basic block where the control flow was split between
|
|
// old and new version of the Scop.
|
|
// @param builder An IRBuilder that points to the last instruction of the
|
|
// newly generated code.
|
|
void mergeControlFlow(BasicBlock *splitBlock, IRBuilder<> *builder) {
|
|
BasicBlock *mergeBlock;
|
|
Region *R = region;
|
|
|
|
if (R->getExit()->getSinglePredecessor())
|
|
// No splitEdge required. A block with a single predecessor cannot have
|
|
// PHI nodes that would complicate life.
|
|
mergeBlock = R->getExit();
|
|
else {
|
|
mergeBlock = SplitEdge(R->getExitingBlock(), R->getExit(), this);
|
|
// SplitEdge will never split R->getExit(), as R->getExit() has more than
|
|
// one predecessor. Hence, mergeBlock is always a newly generated block.
|
|
mergeBlock->setName("polly.finalMerge");
|
|
R->replaceExit(mergeBlock);
|
|
}
|
|
|
|
builder->CreateBr(mergeBlock);
|
|
|
|
if (DT->dominates(splitBlock, mergeBlock))
|
|
DT->changeImmediateDominator(mergeBlock, splitBlock);
|
|
}
|
|
|
|
bool runOnScop(Scop &scop) {
|
|
S = &scop;
|
|
region = &S->getRegion();
|
|
DT = &getAnalysis<DominatorTree>();
|
|
Dependences *DP = &getAnalysis<Dependences>();
|
|
SE = &getAnalysis<ScalarEvolution>();
|
|
SD = &getAnalysis<ScopDetection>();
|
|
TD = &getAnalysis<TargetData>();
|
|
RI = &getAnalysis<RegionInfo>();
|
|
|
|
parallelLoops.clear();
|
|
|
|
assert(region->isSimple() && "Only simple regions are supported");
|
|
|
|
// In the CFG and we generate next to original code of the Scop the
|
|
// optimized version. Both the new and the original version of the code
|
|
// remain in the CFG. A branch statement decides which version is executed.
|
|
// At the moment, we always execute the newly generated version (the old one
|
|
// is dead code eliminated by the cleanup passes). Later we may decide to
|
|
// execute the new version only under certain conditions. This will be the
|
|
// case if we support constructs for which we cannot prove all assumptions
|
|
// at compile time.
|
|
//
|
|
// Before transformation:
|
|
//
|
|
// bb0
|
|
// |
|
|
// orig_scop
|
|
// |
|
|
// bb1
|
|
//
|
|
// After transformation:
|
|
// bb0
|
|
// |
|
|
// polly.splitBlock
|
|
// / \.
|
|
// | startBlock
|
|
// | |
|
|
// orig_scop new_scop
|
|
// \ /
|
|
// \ /
|
|
// bb1 (joinBlock)
|
|
IRBuilder<> builder(region->getEntry());
|
|
|
|
// The builder will be set to startBlock.
|
|
BasicBlock *splitBlock = addSplitAndStartBlock(&builder);
|
|
|
|
if (OpenMP)
|
|
addOpenMPDefinitions(builder);
|
|
|
|
ClastStmtCodeGen CodeGen(S, *SE, DT, SD, DP, TD, builder);
|
|
CloogInfo &C = getAnalysis<CloogInfo>();
|
|
CodeGen.codegen(C.getClast());
|
|
|
|
parallelLoops.insert(parallelLoops.begin(),
|
|
CodeGen.getParallelLoops().begin(),
|
|
CodeGen.getParallelLoops().end());
|
|
|
|
mergeControlFlow(splitBlock, &builder);
|
|
|
|
return true;
|
|
}
|
|
|
|
virtual void printScop(raw_ostream &OS) const {
|
|
for (std::vector<std::string>::const_iterator PI = parallelLoops.begin(),
|
|
PE = parallelLoops.end(); PI != PE; ++PI)
|
|
OS << "Parallel loop with iterator '" << *PI << "' generated\n";
|
|
}
|
|
|
|
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
|
|
AU.addRequired<CloogInfo>();
|
|
AU.addRequired<Dependences>();
|
|
AU.addRequired<DominatorTree>();
|
|
AU.addRequired<ScalarEvolution>();
|
|
AU.addRequired<RegionInfo>();
|
|
AU.addRequired<ScopDetection>();
|
|
AU.addRequired<ScopInfo>();
|
|
AU.addRequired<TargetData>();
|
|
|
|
AU.addPreserved<CloogInfo>();
|
|
AU.addPreserved<Dependences>();
|
|
|
|
// FIXME: We do not create LoopInfo for the newly generated loops.
|
|
AU.addPreserved<LoopInfo>();
|
|
AU.addPreserved<DominatorTree>();
|
|
AU.addPreserved<ScopDetection>();
|
|
AU.addPreserved<ScalarEvolution>();
|
|
|
|
// FIXME: We do not yet add regions for the newly generated code to the
|
|
// region tree.
|
|
AU.addPreserved<RegionInfo>();
|
|
AU.addPreserved<TempScopInfo>();
|
|
AU.addPreserved<ScopInfo>();
|
|
AU.addPreservedID(IndependentBlocksID);
|
|
}
|
|
};
|
|
}
|
|
|
|
char CodeGeneration::ID = 1;
|
|
|
|
static RegisterPass<CodeGeneration>
|
|
Z("polly-codegen", "Polly - Create LLVM-IR from the polyhedral information");
|
|
|
|
Pass* polly::createCodeGenerationPass() {
|
|
return new CodeGeneration();
|
|
}
|