diff --git a/polly/tools/GPURuntime/GPUJIT.c b/polly/tools/GPURuntime/GPUJIT.c index 02a91a7913d3..a26bc7c01b2d 100644 --- a/polly/tools/GPURuntime/GPUJIT.c +++ b/polly/tools/GPURuntime/GPUJIT.c @@ -958,6 +958,9 @@ static CuMemFreeFcnTy *CuMemFreeFcnPtr; typedef CUresult CUDAAPI CuModuleUnloadFcnTy(CUmodule); static CuModuleUnloadFcnTy *CuModuleUnloadFcnPtr; +typedef CUresult CUDAAPI CuProfilerStopFcnTy(); +static CuProfilerStopFcnTy *CuProfilerStopFcnPtr; + typedef CUresult CUDAAPI CuCtxDestroyFcnTy(CUcontext); static CuCtxDestroyFcnTy *CuCtxDestroyFcnPtr; @@ -1085,6 +1088,9 @@ static int initialDeviceAPIsCUDA() { CuModuleUnloadFcnPtr = (CuModuleUnloadFcnTy *)getAPIHandleCUDA(HandleCuda, "cuModuleUnload"); + CuProfilerStopFcnPtr = + (CuProfilerStopFcnTy *)getAPIHandleCUDA(HandleCuda, "cuProfilerStop"); + CuCtxDestroyFcnPtr = (CuCtxDestroyFcnTy *)getAPIHandleCUDA(HandleCuda, "cuCtxDestroy"); @@ -1416,6 +1422,7 @@ static void freeContextCUDA(PollyGPUContext *Context) { CUDAContext *Ctx = (CUDAContext *)Context->Context; if (Ctx->Cuda) { + CuProfilerStopFcnPtr(); CuCtxDestroyFcnPtr(Ctx->Cuda); free(Ctx); free(Context);