mirror of
https://github.com/intel/llvm.git
synced 2026-01-26 03:56:16 +08:00
[OPENMP, NVPTX] Emit correct thread id.
We emitted fake thread id for the outined function in NVPTX codegen. Patch adds emission of the real thread id. llvm-svn: 327867
This commit is contained in:
@@ -268,6 +268,10 @@ protected:
|
||||
void emitCall(CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *Callee,
|
||||
ArrayRef<llvm::Value *> Args = llvm::None) const;
|
||||
|
||||
/// \brief Emits address of the word in a memory where current thread id is
|
||||
/// stored.
|
||||
virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc);
|
||||
|
||||
private:
|
||||
/// \brief Default const ident_t object used for initialization of all other
|
||||
/// ident_t objects.
|
||||
@@ -564,10 +568,6 @@ private:
|
||||
/// \return Cache variable for the specified threadprivate.
|
||||
llvm::Constant *getOrCreateThreadPrivateCache(const VarDecl *VD);
|
||||
|
||||
/// \brief Emits address of the word in a memory where current thread id is
|
||||
/// stored.
|
||||
virtual Address emitThreadIDAddress(CodeGenFunction &CGF, SourceLocation Loc);
|
||||
|
||||
/// \brief Gets (if variable with the given name already exist) or creates
|
||||
/// internal global variable with the specified Name. The created variable has
|
||||
/// linkage CommonLinkage by default and is initialized by null value.
|
||||
|
||||
@@ -608,7 +608,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericEntryHeader(CodeGenFunction &CGF,
|
||||
Bld.CreateCondBr(IsWorker, WorkerBB, MasterCheckBB);
|
||||
|
||||
CGF.EmitBlock(WorkerBB);
|
||||
emitOutlinedFunctionCall(CGF, WST.Loc, WST.WorkerFn);
|
||||
emitCall(CGF, WST.Loc, WST.WorkerFn);
|
||||
CGF.EmitBranch(EST.ExitBB);
|
||||
|
||||
CGF.EmitBlock(MasterCheckBB);
|
||||
@@ -831,10 +831,9 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF,
|
||||
// Insert call to work function via shared wrapper. The shared
|
||||
// wrapper takes two arguments:
|
||||
// - the parallelism level;
|
||||
// - the master thread ID;
|
||||
emitOutlinedFunctionCall(CGF, WST.Loc, W,
|
||||
{Bld.getInt16(/*ParallelLevel=*/0),
|
||||
getMasterThreadID(CGF)});
|
||||
// - the thread ID;
|
||||
emitCall(CGF, WST.Loc, W,
|
||||
{Bld.getInt16(/*ParallelLevel=*/0), getThreadID(CGF, WST.Loc)});
|
||||
|
||||
// Go to end of parallel region.
|
||||
CGF.EmitBranch(TerminateBB);
|
||||
@@ -1316,12 +1315,12 @@ void CGOpenMPRuntimeNVPTX::emitTeamsCall(CodeGenFunction &CGF,
|
||||
if (!CGF.HaveInsertPoint())
|
||||
return;
|
||||
|
||||
Address ZeroAddr =
|
||||
CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
|
||||
/*Name*/ ".zero.addr");
|
||||
Address ZeroAddr = CGF.CreateMemTemp(
|
||||
CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
|
||||
/*Name*/ ".zero.addr");
|
||||
CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
|
||||
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
|
||||
OutlinedFnArgs.push_back(ZeroAddr.getPointer());
|
||||
OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer());
|
||||
OutlinedFnArgs.push_back(ZeroAddr.getPointer());
|
||||
OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
|
||||
emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
|
||||
@@ -1350,7 +1349,7 @@ void CGOpenMPRuntimeNVPTX::emitGenericParallelCall(
|
||||
// Force inline this outlined function at its call site.
|
||||
Fn->setLinkage(llvm::GlobalValue::InternalLinkage);
|
||||
|
||||
auto &&L0ParallelGen = [this, WFn, &CapturedVars](CodeGenFunction &CGF,
|
||||
auto &&L0ParallelGen = [this, WFn, CapturedVars](CodeGenFunction &CGF,
|
||||
PrePostActionTy &) {
|
||||
CGBuilderTy &Bld = CGF.Builder;
|
||||
|
||||
@@ -1420,17 +1419,20 @@ void CGOpenMPRuntimeNVPTX::emitGenericParallelCall(
|
||||
auto *ThreadID = getThreadID(CGF, Loc);
|
||||
llvm::Value *Args[] = {RTLoc, ThreadID};
|
||||
|
||||
auto &&SeqGen = [this, Fn, &CapturedVars, &Args, Loc](CodeGenFunction &CGF,
|
||||
PrePostActionTy &) {
|
||||
auto &&CodeGen = [this, Fn, &CapturedVars, Loc](CodeGenFunction &CGF,
|
||||
PrePostActionTy &Action) {
|
||||
auto &&SeqGen = [this, Fn, CapturedVars, Args, Loc](CodeGenFunction &CGF,
|
||||
PrePostActionTy &) {
|
||||
auto &&CodeGen = [this, Fn, CapturedVars, Loc](CodeGenFunction &CGF,
|
||||
PrePostActionTy &Action) {
|
||||
Action.Enter(CGF);
|
||||
|
||||
llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
|
||||
OutlinedFnArgs.push_back(
|
||||
llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()));
|
||||
OutlinedFnArgs.push_back(
|
||||
llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()));
|
||||
Address ZeroAddr =
|
||||
CGF.CreateMemTemp(CGF.getContext().getIntTypeForBitwidth(
|
||||
/*DestWidth=*/32, /*Signed=*/1),
|
||||
".zero.addr");
|
||||
CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
|
||||
OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer());
|
||||
OutlinedFnArgs.push_back(ZeroAddr.getPointer());
|
||||
OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
|
||||
emitOutlinedFunctionCall(CGF, Loc, Fn, OutlinedFnArgs);
|
||||
};
|
||||
@@ -1468,7 +1470,7 @@ void CGOpenMPRuntimeNVPTX::emitSpmdParallelCall(
|
||||
CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
|
||||
".zero.addr");
|
||||
CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
|
||||
OutlinedFnArgs.push_back(ZeroAddr.getPointer());
|
||||
OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer());
|
||||
OutlinedFnArgs.push_back(ZeroAddr.getPointer());
|
||||
OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
|
||||
emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
|
||||
@@ -2873,14 +2875,15 @@ llvm::Function *CGOpenMPRuntimeNVPTX::createParallelDataSharingWrapper(
|
||||
const auto *RD = CS.getCapturedRecordDecl();
|
||||
auto CurField = RD->field_begin();
|
||||
|
||||
Address ZeroAddr = CGF.CreateMemTemp(
|
||||
CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
|
||||
/*Name*/ ".zero.addr");
|
||||
CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
|
||||
// Get the array of arguments.
|
||||
SmallVector<llvm::Value *, 8> Args;
|
||||
|
||||
// TODO: suppport SIMD and pass actual values
|
||||
Args.emplace_back(
|
||||
llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()));
|
||||
Args.emplace_back(
|
||||
llvm::ConstantPointerNull::get(CGM.Int32Ty->getPointerTo()));
|
||||
Args.emplace_back(CGF.GetAddrOfLocalVar(&WrapperArg).getPointer());
|
||||
Args.emplace_back(ZeroAddr.getPointer());
|
||||
|
||||
CGBuilderTy &Bld = CGF.Builder;
|
||||
auto CI = CS.capture_begin();
|
||||
|
||||
@@ -105,7 +105,7 @@ int main (int argc, char **argv) {
|
||||
// CK2: [[AADDR:%.+]] = alloca i{{[0-9]+}},
|
||||
// CK2: [[BADDR:%.+]] = alloca i{{[0-9]+}},
|
||||
// CK2: [[ARGCADDR:%.+]] = alloca i{{[0-9]+}},
|
||||
// CK2-NOT: {{%.+}} = call i32 @__kmpc_global_thread_num(
|
||||
// CK2: {{%.+}} = call i32 @__kmpc_global_thread_num(
|
||||
// CK2: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[AADDR]],
|
||||
// CK2: store i{{[0-9]+}} [[B_IN]], i{{[0-9]+}}* [[BADDR]],
|
||||
// CK2: store i{{[0-9]+}} [[ARGC_IN]], i{{[0-9]+}}* [[ARGCADDR]],
|
||||
@@ -129,7 +129,7 @@ int main (int argc, char **argv) {
|
||||
// CK2: [[AADDR:%.+]] = alloca i{{[0-9]+}},
|
||||
// CK2: [[BADDR:%.+]] = alloca i{{[0-9]+}},
|
||||
// CK2: [[ARGCADDR:%.+]] = alloca i{{[0-9]+}}**,
|
||||
// CK2-NOT: {{%.+}} = call i32 @__kmpc_global_thread_num(
|
||||
// CK2: {{%.+}} = call i32 @__kmpc_global_thread_num(
|
||||
// CK2: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[AADDR]],
|
||||
// CK2: store i{{[0-9]+}} [[B_IN]], i{{[0-9]+}}* [[BADDR]],
|
||||
// CK2: store i{{[0-9]+}}** [[ARGC]], i{{[0-9]+}}*** [[ARGCADDR]],
|
||||
|
||||
Reference in New Issue
Block a user