[OpenMP] Add info for device table changes

Summary:
This patch adds a feature to print information whenever the host-device pointer
mapping table is changed by inserting or removing an entry. This introduces a
new bit field for LIBOMPTARGET_INFO at position 0x8.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D100600
This commit is contained in:
Joseph Huber
2021-04-15 17:27:17 -04:00
committed by Huber, Joseph
parent afec953857
commit 83d4b2e2e0
4 changed files with 39 additions and 14 deletions

View File

@@ -87,6 +87,7 @@ with `-g` for full debug information. A full list of flags supported by
* Indicate when a mapped address already exists in the device mapping table:
``0x02``
* Dump the contents of the device pointer map at kernel exit: ``0x04``
* Indicate when an entry is changed in the device mapping table: ``0x08``
* Print OpenMP kernel information from device plugins: ``0x10``
Any combination of these flags can be used by setting the appropriate bits. For
@@ -140,6 +141,10 @@ provide the following output from the runtime library.
Info: Entering OpenMP data region at zaxpy.cpp:14:1 with 2 arguments:
Info: to(X[0:N])[16384]
Info: tofrom(Y[0:N])[16384]
Info: Creating new map entry with HstPtrBegin=0x00007fff963f4000,
TgtPtrBegin=0x00007fff963f4000, Size=16384, Name=X[0:N]
Info: Creating new map entry with HstPtrBegin=0x00007fff963f8000,
TgtPtrBegin=0x00007fff963f00000, Size=16384, Name=Y[0:N]
Info: OpenMP Host-Device pointer mappings after block at zaxpy.cpp:14:1:
Info: Host Ptr Target Ptr Size (B) RefCount Declaration
Info: 0x00007fff963f4000 0x00007fd225004000 16384 1 Y[0:N] at zaxpy.cpp:13:17
@@ -151,10 +156,14 @@ provide the following output from the runtime library.
Info: use_address(X)[0] (implicit)
Info: Mapping exists (implicit) with HstPtrBegin=0x00007ffe37d8be80,
TgtPtrBegin=0x00007f90ff004000, Size=0, updated RefCount=2, Name=Y
Info: Creating new map entry with HstPtrBegin=0x00007fff963f33ff0,
TgtPtrBegin=0x00007fd225003ff0, Size=16, Name=D
Info: Mapping exists (implicit) with HstPtrBegin=0x00007ffe37d8fe80,
TgtPtrBegin=0x00007f90ff000000, Size=0, updated RefCount=2, Name=X
Info: Launching kernel __omp_offloading_fd02_c2c4ac1a__Z5daxpyPNSt3__17complexIdEES2_S1_m_l6
with 8 blocks and 128 threads in SPMD mode
Info: Removing map entry with HstPtrBegin=0x00007fff963f33ff0,
TgtPtrBegin=0x00007fd225003ff0, Size=16, Name=D
Info: OpenMP Host-Device pointer mappings after block at zaxpy.cpp:6:1:
Info: Host Ptr Target Ptr Size (B) RefCount Declaration
Info: 0x00007fff963f4000 0x00007fd225004000 16384 1 Y[0:N] at zaxpy.cpp:13:17
@@ -162,6 +171,10 @@ provide the following output from the runtime library.
Info: Exiting OpenMP data region at zaxpy.cpp:14:1 with 2 arguments:
Info: to(X[0:N])[16384]
Info: tofrom(Y[0:N])[16384]
Info: Removing map entry with HstPtrBegin=0x00007fff963f4000,
TgtPtrBegin=0x00007fff963f4000, Size=16384, Name=X[0:N]
Info: Removing map entry with HstPtrBegin=0x00007fff963f8000,
TgtPtrBegin=0x00007fff963f00000, Size=16384, Name=Y[0:N]
From this information, we can see the OpenMP kernel being launched on the CUDA
device with enough threads and blocks for all ``1024`` iterations of the loop in

View File

@@ -47,6 +47,8 @@ enum OpenMPInfoType : uint32_t {
OMP_INFOTYPE_MAPPING_EXISTS = 0x0002,
// Dump the contents of the device pointer map at kernel exit or failure.
OMP_INFOTYPE_DUMP_TABLE = 0x0004,
// Indicate when an address is added to the device mapping table.
OMP_INFOTYPE_MAPPING_CHANGED = 0x0008,
// Print kernel information from target device plugins.
OMP_INFOTYPE_PLUGIN_KERNEL = 0x0010,
// Enable every flag.

View File

@@ -264,10 +264,11 @@ void *DeviceTy::getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase,
// If it is not contained and Size > 0, we should create a new entry for it.
IsNew = true;
uintptr_t tp = (uintptr_t)allocData(Size, HstPtrBegin);
DP("Creating new map entry: HstBase=" DPxMOD ", HstBegin=" DPxMOD ", "
"HstEnd=" DPxMOD ", TgtBegin=" DPxMOD "\n",
DPxPTR(HstPtrBase), DPxPTR(HstPtrBegin),
DPxPTR((uintptr_t)HstPtrBegin + Size), DPxPTR(tp));
INFO(OMP_INFOTYPE_MAPPING_CHANGED, DeviceID,
"Creating new map entry with "
"HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", Size=%ld, Name=%s\n",
DPxPTR(HstPtrBegin), DPxPTR(tp), Size,
(HstPtrName) ? getNameFromMapping(HstPtrName).c_str() : "unknown");
HostDataToTargetMap.emplace(
HostDataToTargetTy((uintptr_t)HstPtrBase, (uintptr_t)HstPtrBegin,
(uintptr_t)HstPtrBegin + Size, tp, HstPtrName));
@@ -351,10 +352,13 @@ int DeviceTy::deallocTgtPtr(void *HstPtrBegin, int64_t Size, bool ForceDelete,
DP("Deleting tgt data " DPxMOD " of size %" PRId64 "\n",
DPxPTR(HT.TgtPtrBegin), Size);
deleteData((void *)HT.TgtPtrBegin);
DP("Removing%s mapping with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD
", Size=%" PRId64 "\n",
(ForceDelete ? " (forced)" : ""), DPxPTR(HT.HstPtrBegin),
DPxPTR(HT.TgtPtrBegin), Size);
INFO(OMP_INFOTYPE_MAPPING_CHANGED, DeviceID,
"Removing%s map entry with HstPtrBegin=" DPxMOD
", TgtPtrBegin=" DPxMOD ", Size=%" PRId64 ", Name=%s\n",
(ForceDelete ? " (forced)" : ""), DPxPTR(HT.HstPtrBegin),
DPxPTR(HT.TgtPtrBegin), Size,
(HT.HstPtrName) ? getNameFromMapping(HT.HstPtrName).c_str()
: "unknown");
HostDataToTargetMap.erase(lr.Entry);
}
rc = OFFLOAD_SUCCESS;

View File

@@ -1,4 +1,4 @@
// RUN: %libomptarget-compile-nvptx64-nvidia-cuda -gline-tables-only && env LIBOMPTARGET_INFO=23 %libomptarget-run-nvptx64-nvidia-cuda 2>&1 | %fcheck-nvptx64-nvidia-cuda -allow-empty -check-prefix=INFO
// RUN: %libomptarget-compile-nvptx64-nvidia-cuda -gline-tables-only && env LIBOMPTARGET_INFO=31 %libomptarget-run-nvptx64-nvidia-cuda 2>&1 | %fcheck-nvptx64-nvidia-cuda -allow-empty -check-prefix=INFO
#include <stdio.h>
#include <omp.h>
@@ -12,24 +12,30 @@ int main() {
int val = 1;
// INFO: CUDA device 0 info: Device supports up to {{.*}} CUDA blocks and {{.*}} threads with a warp size of {{.*}}
// INFO: Libomptarget device 0 info: Entering OpenMP data region at info.c:33:1 with 3 arguments:
// INFO: Libomptarget device 0 info: Entering OpenMP data region at info.c:39:1 with 3 arguments:
// INFO: Libomptarget device 0 info: alloc(A[0:64])[256]
// INFO: Libomptarget device 0 info: tofrom(B[0:64])[256]
// INFO: Libomptarget device 0 info: to(C[0:64])[256]
// INFO: Libomptarget device 0 info: OpenMP Host-Device pointer mappings after block at info.c:33:1:
// INFO: Libomptarget device 0 info: Creating new map entry with HstPtrBegin={{.*}}, TgtPtrBegin={{.*}}, Size=256, Name=A[0:64]
// INFO: Libomptarget device 0 info: Creating new map entry with HstPtrBegin={{.*}}, TgtPtrBegin={{.*}}, Size=256, Name=B[0:64]
// INFO: Libomptarget device 0 info: Creating new map entry with HstPtrBegin={{.*}}, TgtPtrBegin={{.*}}, Size=256, Name=C[0:64]
// INFO: Libomptarget device 0 info: OpenMP Host-Device pointer mappings after block at info.c:39:1:
// INFO: Libomptarget device 0 info: Host Ptr Target Ptr Size (B) RefCount Declaration
// INFO: Libomptarget device 0 info: {{.*}} {{.*}} 256 1 C[0:64] at info.c:11:7
// INFO: Libomptarget device 0 info: {{.*}} {{.*}} 256 1 B[0:64] at info.c:10:7
// INFO: Libomptarget device 0 info: {{.*}} {{.*}} 256 1 A[0:64] at info.c:9:7
// INFO: Libomptarget device 0 info: Entering OpenMP kernel at info.c:34:1 with 1 arguments:
// INFO: Libomptarget device 0 info: Entering OpenMP kernel at info.c:40:1 with 1 arguments:
// INFO: Libomptarget device 0 info: firstprivate(val)[4]
// INFO: CUDA device 0 info: Launching kernel {{.*}} with {{.*}} and {{.*}} threads in {{.*}} mode
// INFO: Libomptarget device 0 info: OpenMP Host-Device pointer mappings after block at info.c:34:1:
// INFO: Libomptarget device 0 info: OpenMP Host-Device pointer mappings after block at info.c:40:1:
// INFO: Libomptarget device 0 info: Host Ptr Target Ptr Size (B) RefCount Declaration
// INFO: Libomptarget device 0 info: 0x{{.*}} 0x{{.*}} 256 1 C[0:64] at info.c:11:7
// INFO: Libomptarget device 0 info: 0x{{.*}} 0x{{.*}} 256 1 B[0:64] at info.c:10:7
// INFO: Libomptarget device 0 info: 0x{{.*}} 0x{{.*}} 256 1 A[0:64] at info.c:9:7
// INFO: Libomptarget device 0 info: Exiting OpenMP data region at info.c:33:1
// INFO: Libomptarget device 0 info: Exiting OpenMP data region at info.c:39:1
// INFO: Libomptarget device 0 info: Removing map entry with HstPtrBegin={{.*}}, TgtPtrBegin={{.*}}, Size=256, Name=C[0:64]
// INFO: Libomptarget device 0 info: Removing map entry with HstPtrBegin={{.*}}, TgtPtrBegin={{.*}}, Size=256, Name=B[0:64]
// INFO: Libomptarget device 0 info: Removing map entry with HstPtrBegin={{.*}}, TgtPtrBegin={{.*}}, Size=256, Name=A[0:64]
#pragma omp target data map(alloc:A[0:N]) map(tofrom:B[0:N]) map(to:C[0:N])
#pragma omp target firstprivate(val)
{ val = 1; }