[GPUJIT] Call cuProfilerStop before destroying the context to flush profiler cache.

This is necessary to get accurate traces from `nvprof` / `nvcc`.
Otherwise, we lose some profiling information.

Differential Revision: https://reviews.llvm.org/D35940

llvm-svn: 309682
This commit is contained in:
Siddharth Bhat
2017-08-01 14:36:24 +00:00
parent edf9581e4c
commit 442e722c1e

View File

@@ -958,6 +958,9 @@ static CuMemFreeFcnTy *CuMemFreeFcnPtr;
typedef CUresult CUDAAPI CuModuleUnloadFcnTy(CUmodule);
static CuModuleUnloadFcnTy *CuModuleUnloadFcnPtr;
typedef CUresult CUDAAPI CuProfilerStopFcnTy();
static CuProfilerStopFcnTy *CuProfilerStopFcnPtr;
typedef CUresult CUDAAPI CuCtxDestroyFcnTy(CUcontext);
static CuCtxDestroyFcnTy *CuCtxDestroyFcnPtr;
@@ -1085,6 +1088,9 @@ static int initialDeviceAPIsCUDA() {
CuModuleUnloadFcnPtr =
(CuModuleUnloadFcnTy *)getAPIHandleCUDA(HandleCuda, "cuModuleUnload");
CuProfilerStopFcnPtr =
(CuProfilerStopFcnTy *)getAPIHandleCUDA(HandleCuda, "cuProfilerStop");
CuCtxDestroyFcnPtr =
(CuCtxDestroyFcnTy *)getAPIHandleCUDA(HandleCuda, "cuCtxDestroy");
@@ -1416,6 +1422,7 @@ static void freeContextCUDA(PollyGPUContext *Context) {
CUDAContext *Ctx = (CUDAContext *)Context->Context;
if (Ctx->Cuda) {
CuProfilerStopFcnPtr();
CuCtxDestroyFcnPtr(Ctx->Cuda);
free(Ctx);
free(Context);