From 442e722c1e09a5370728ba7b307d90d28e9b4fa1 Mon Sep 17 00:00:00 2001 From: Siddharth Bhat Date: Tue, 1 Aug 2017 14:36:24 +0000 Subject: [PATCH] [GPUJIT] Call `cuProfilerStop` before destroying the context to flush profiler cache. This is necessary to get accurate traces from `nvprof` / `nvcc`. Otherwise, we lose some profiling information. Differential Revision: https://reviews.llvm.org/D35940 llvm-svn: 309682 --- polly/tools/GPURuntime/GPUJIT.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/polly/tools/GPURuntime/GPUJIT.c b/polly/tools/GPURuntime/GPUJIT.c index 02a91a7913d3..a26bc7c01b2d 100644 --- a/polly/tools/GPURuntime/GPUJIT.c +++ b/polly/tools/GPURuntime/GPUJIT.c @@ -958,6 +958,9 @@ static CuMemFreeFcnTy *CuMemFreeFcnPtr; typedef CUresult CUDAAPI CuModuleUnloadFcnTy(CUmodule); static CuModuleUnloadFcnTy *CuModuleUnloadFcnPtr; +typedef CUresult CUDAAPI CuProfilerStopFcnTy(); +static CuProfilerStopFcnTy *CuProfilerStopFcnPtr; + typedef CUresult CUDAAPI CuCtxDestroyFcnTy(CUcontext); static CuCtxDestroyFcnTy *CuCtxDestroyFcnPtr; @@ -1085,6 +1088,9 @@ static int initialDeviceAPIsCUDA() { CuModuleUnloadFcnPtr = (CuModuleUnloadFcnTy *)getAPIHandleCUDA(HandleCuda, "cuModuleUnload"); + CuProfilerStopFcnPtr = + (CuProfilerStopFcnTy *)getAPIHandleCUDA(HandleCuda, "cuProfilerStop"); + CuCtxDestroyFcnPtr = (CuCtxDestroyFcnTy *)getAPIHandleCUDA(HandleCuda, "cuCtxDestroy"); @@ -1416,6 +1422,7 @@ static void freeContextCUDA(PollyGPUContext *Context) { CUDAContext *Ctx = (CUDAContext *)Context->Context; if (Ctx->Cuda) { + CuProfilerStopFcnPtr(); CuCtxDestroyFcnPtr(Ctx->Cuda); free(Ctx); free(Context);