mirror of
https://github.com/intel/llvm.git
synced 2026-01-22 23:49:22 +08:00
[GPUJIT] Call cuProfilerStop before destroying the context to flush profiler cache.
This is necessary to get accurate traces from `nvprof` / `nvcc`. Otherwise, we lose some profiling information. Differential Revision: https://reviews.llvm.org/D35940 llvm-svn: 309682
This commit is contained in:
@@ -958,6 +958,9 @@ static CuMemFreeFcnTy *CuMemFreeFcnPtr;
|
||||
typedef CUresult CUDAAPI CuModuleUnloadFcnTy(CUmodule);
|
||||
static CuModuleUnloadFcnTy *CuModuleUnloadFcnPtr;
|
||||
|
||||
typedef CUresult CUDAAPI CuProfilerStopFcnTy();
|
||||
static CuProfilerStopFcnTy *CuProfilerStopFcnPtr;
|
||||
|
||||
typedef CUresult CUDAAPI CuCtxDestroyFcnTy(CUcontext);
|
||||
static CuCtxDestroyFcnTy *CuCtxDestroyFcnPtr;
|
||||
|
||||
@@ -1085,6 +1088,9 @@ static int initialDeviceAPIsCUDA() {
|
||||
CuModuleUnloadFcnPtr =
|
||||
(CuModuleUnloadFcnTy *)getAPIHandleCUDA(HandleCuda, "cuModuleUnload");
|
||||
|
||||
CuProfilerStopFcnPtr =
|
||||
(CuProfilerStopFcnTy *)getAPIHandleCUDA(HandleCuda, "cuProfilerStop");
|
||||
|
||||
CuCtxDestroyFcnPtr =
|
||||
(CuCtxDestroyFcnTy *)getAPIHandleCUDA(HandleCuda, "cuCtxDestroy");
|
||||
|
||||
@@ -1416,6 +1422,7 @@ static void freeContextCUDA(PollyGPUContext *Context) {
|
||||
|
||||
CUDAContext *Ctx = (CUDAContext *)Context->Context;
|
||||
if (Ctx->Cuda) {
|
||||
CuProfilerStopFcnPtr();
|
||||
CuCtxDestroyFcnPtr(Ctx->Cuda);
|
||||
free(Ctx);
|
||||
free(Context);
|
||||
|
||||
Reference in New Issue
Block a user