mirror of
https://github.com/intel/llvm.git
synced 2026-02-08 00:50:03 +08:00
make the argument passing stuff in the FCA case smarter still, by
avoiding making the FCA at all when the types exactly line up. For
example, before we made:
%struct.DeclGroup = type { i64, i64 }
define i64 @_Z3foo9DeclGroup(i64, i64) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=3]
%2 = insertvalue %struct.DeclGroup undef, i64 %0, 0 ; <%struct.DeclGroup> [#uses=1]
%3 = insertvalue %struct.DeclGroup %2, i64 %1, 1 ; <%struct.DeclGroup> [#uses=1]
store %struct.DeclGroup %3, %struct.DeclGroup* %D
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1]
%tmp3 = load i64* %tmp2 ; <i64> [#uses=1]
%add = add nsw i64 %tmp1, %tmp3 ; <i64> [#uses=1]
ret i64 %add
}
... which has the pointless insertvalue, which fastisel hates, now we
make:
%struct.DeclGroup = type { i64, i64 }
define i64 @_Z3foo9DeclGroup(i64, i64) nounwind {
entry:
%D = alloca %struct.DeclGroup, align 8 ; <%struct.DeclGroup*> [#uses=4]
%2 = getelementptr %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
store i64 %0, i64* %2
%3 = getelementptr %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1]
store i64 %1, i64* %3
%tmp = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 0 ; <i64*> [#uses=1]
%tmp1 = load i64* %tmp ; <i64> [#uses=1]
%tmp2 = getelementptr inbounds %struct.DeclGroup* %D, i32 0, i32 1 ; <i64*> [#uses=1]
%tmp3 = load i64* %tmp2 ; <i64> [#uses=1]
%add = add nsw i64 %tmp1, %tmp3 ; <i64> [#uses=1]
ret i64 %add
}
This only kicks in when x86-64 abi lowering decides it likes us.
llvm-svn: 107104
This commit is contained in:
@@ -894,29 +894,41 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
|
||||
continue;
|
||||
|
||||
case ABIArgInfo::Coerce: {
|
||||
// If the coerce-to type is a first class aggregate, we flatten it and
|
||||
// pass the elements. Either way is semantically identical, but fast-isel
|
||||
// and the optimizer generally likes scalar values better than FCAs.
|
||||
llvm::Value *FormalArg;
|
||||
if (const llvm::StructType *STy =
|
||||
dyn_cast<llvm::StructType>(ArgI.getCoerceToType())) {
|
||||
// Reconstruct the FCA here.
|
||||
// FIXME: If we have a direct match, do nice gep/store series.
|
||||
FormalArg = llvm::UndefValue::get(STy);
|
||||
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
|
||||
assert(AI != Fn->arg_end() && "Argument mismatch!");
|
||||
FormalArg = Builder.CreateInsertValue(FormalArg, AI++, i);
|
||||
}
|
||||
} else {
|
||||
assert(AI != Fn->arg_end() && "Argument mismatch!");
|
||||
FormalArg = AI++;
|
||||
}
|
||||
|
||||
// FIXME: This is very wasteful; EmitParmDecl is just going to drop the
|
||||
// result in a new alloca anyway, so we could just store into that
|
||||
// directly if we broke the abstraction down more.
|
||||
llvm::Value *V = CreateMemTemp(Ty, "coerce");
|
||||
CreateCoercedStore(FormalArg, V, /*DestIsVolatile=*/false, *this);
|
||||
|
||||
// If the coerce-to type is a first class aggregate, we flatten it and
|
||||
// pass the elements. Either way is semantically identical, but fast-isel
|
||||
// and the optimizer generally likes scalar values better than FCAs.
|
||||
if (const llvm::StructType *STy =
|
||||
dyn_cast<llvm::StructType>(ArgI.getCoerceToType())) {
|
||||
// If the argument and alloca types match up, we don't have to build the
|
||||
// FCA at all, emit a series of GEPs and stores, which is better for
|
||||
// fast isel.
|
||||
if (STy == cast<llvm::PointerType>(V->getType())->getElementType()) {
|
||||
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
|
||||
assert(AI != Fn->arg_end() && "Argument mismatch!");
|
||||
llvm::Value *EltPtr = Builder.CreateConstGEP2_32(V, 0, i);
|
||||
Builder.CreateStore(AI++, EltPtr);
|
||||
}
|
||||
} else {
|
||||
// Reconstruct the FCA here so we can do a coerced store.
|
||||
llvm::Value *FormalArg = llvm::UndefValue::get(STy);
|
||||
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
|
||||
assert(AI != Fn->arg_end() && "Argument mismatch!");
|
||||
FormalArg = Builder.CreateInsertValue(FormalArg, AI++, i);
|
||||
}
|
||||
CreateCoercedStore(FormalArg, V, /*DestIsVolatile=*/false, *this);
|
||||
}
|
||||
} else {
|
||||
// Simple case, just do a coerced store of the argument into the alloca.
|
||||
assert(AI != Fn->arg_end() && "Argument mismatch!");
|
||||
CreateCoercedStore(AI++, V, /*DestIsVolatile=*/false, *this);
|
||||
}
|
||||
|
||||
|
||||
// Match to what EmitParmDecl is expecting for this type.
|
||||
if (!CodeGenFunction::hasAggregateLLVMType(Ty)) {
|
||||
V = EmitLoadOfScalar(V, false, Ty);
|
||||
@@ -1116,19 +1128,32 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
|
||||
} else
|
||||
SrcPtr = RV.getAggregateAddr();
|
||||
|
||||
llvm::Value *SrcVal =
|
||||
CreateCoercedLoad(SrcPtr, ArgInfo.getCoerceToType(), *this);
|
||||
|
||||
// If the coerce-to type is a first class aggregate, we flatten it and
|
||||
// pass the elements. Either way is semantically identical, but fast-isel
|
||||
// and the optimizer generally likes scalar values better than FCAs.
|
||||
if (const llvm::StructType *STy =
|
||||
dyn_cast<llvm::StructType>(SrcVal->getType())) {
|
||||
// Extract the elements of the value to pass in.
|
||||
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
|
||||
Args.push_back(Builder.CreateExtractValue(SrcVal, i));
|
||||
dyn_cast<llvm::StructType>(ArgInfo.getCoerceToType())) {
|
||||
// If the argument and alloca types match up, we don't have to build the
|
||||
// FCA at all, emit a series of GEPs and loads, which is better for
|
||||
// fast isel.
|
||||
if (STy ==cast<llvm::PointerType>(SrcPtr->getType())->getElementType()){
|
||||
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
|
||||
llvm::Value *EltPtr = Builder.CreateConstGEP2_32(SrcPtr, 0, i);
|
||||
Args.push_back(Builder.CreateLoad(EltPtr));
|
||||
}
|
||||
} else {
|
||||
// Otherwise, do a coerced load the entire FCA and handle the pieces.
|
||||
llvm::Value *SrcVal =
|
||||
CreateCoercedLoad(SrcPtr, ArgInfo.getCoerceToType(), *this);
|
||||
|
||||
// Extract the elements of the value to pass in.
|
||||
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
|
||||
Args.push_back(Builder.CreateExtractValue(SrcVal, i));
|
||||
}
|
||||
} else {
|
||||
Args.push_back(SrcVal);
|
||||
// In the simple case, just pass the coerced loaded value.
|
||||
Args.push_back(CreateCoercedLoad(SrcPtr, ArgInfo.getCoerceToType(),
|
||||
*this));
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
Reference in New Issue
Block a user