2017-12-06 21:59:09 +00:00
|
|
|
//===--------- device.cpp - Target independent OpenMP target RTL ----------===//
|
|
|
|
|
//
|
2019-01-19 10:56:40 +00:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2017-12-06 21:59:09 +00:00
|
|
|
//
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
//
|
|
|
|
|
// Functionality for managing devices that are handled by RTL plugins.
|
|
|
|
|
//
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
|
|
#include "device.h"
|
2022-03-02 13:34:24 -06:00
|
|
|
#include "omptarget.h"
|
2017-12-06 21:59:09 +00:00
|
|
|
#include "private.h"
|
|
|
|
|
#include "rtl.h"
|
|
|
|
|
|
|
|
|
|
#include <cassert>
|
|
|
|
|
#include <climits>
|
2022-03-05 15:14:20 -06:00
|
|
|
#include <cstdint>
|
2020-09-15 15:04:37 -04:00
|
|
|
#include <cstdio>
|
2017-12-06 21:59:09 +00:00
|
|
|
#include <string>
|
2022-03-02 13:46:01 -06:00
|
|
|
#include <thread>
|
2017-12-06 21:59:09 +00:00
|
|
|
|
2022-03-02 13:34:24 -06:00
|
|
|
int HostDataToTargetTy::addEventIfNecessary(DeviceTy &Device,
|
|
|
|
|
AsyncInfoTy &AsyncInfo) const {
|
2022-01-18 18:56:10 -06:00
|
|
|
// First, check if the user disabled atomic map transfer/malloc/dealloc.
|
|
|
|
|
if (!PM->UseEventsForAtomicTransfers)
|
|
|
|
|
return OFFLOAD_SUCCESS;
|
|
|
|
|
|
|
|
|
|
void *Event = getEvent();
|
|
|
|
|
bool NeedNewEvent = Event == nullptr;
|
|
|
|
|
if (NeedNewEvent && Device.createEvent(&Event) != OFFLOAD_SUCCESS) {
|
|
|
|
|
REPORT("Failed to create event\n");
|
|
|
|
|
return OFFLOAD_FAIL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// We cannot assume the event should not be nullptr because we don't
|
|
|
|
|
// know if the target support event. But if a target doesn't,
|
|
|
|
|
// recordEvent should always return success.
|
|
|
|
|
if (Device.recordEvent(Event, AsyncInfo) != OFFLOAD_SUCCESS) {
|
|
|
|
|
REPORT("Failed to set dependence on event " DPxMOD "\n", DPxPTR(Event));
|
|
|
|
|
return OFFLOAD_FAIL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (NeedNewEvent)
|
|
|
|
|
setEvent(Event);
|
|
|
|
|
|
|
|
|
|
return OFFLOAD_SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
|
[OpenMP] Introduce target memory manager
Target memory manager is introduced in this patch which aims to manage target
memory such that they will not be freed immediately when they are not used
because the overhead of memory allocation and free is very large. For CUDA
device, cuMemFree even blocks the context switch on device which affects
concurrent kernel execution.
The memory manager can be taken as a memory pool. It divides the pool into
multiple buckets according to the size such that memory allocation/free
distributed to different buckets will not affect each other.
In this version, we use the exact-equality policy to find a free buffer. This
is an open question: will best-fit work better here? IMO, best-fit is not good
for target memory management because computation on GPU usually requires GBs of
data. Best-fit might lead to a serious waste. For example, there is a free
buffer of size 1960MB, and now we need a buffer of size 1200MB. If best-fit,
the free buffer will be returned, leading to a 760MB waste.
The allocation will happen when there is no free memory left, and the memory
free on device will take place in the following two cases:
1. The program ends. Obviously. However, there is a little problem that plugin
library is destroyed before the memory manager is destroyed, leading to a fact
that the call to target plugin will not succeed.
2. Device is out of memory when we request a new memory. The manager will walk
through all free buffers from the bucket with largest base size, pick up one
buffer, free it, and try to allocate immediately. If it succeeds, it will
return right away rather than freeing all buffers in free list.
Update:
A threshold (8KB by default) is set such that users could control what size of memory
will be managed by the manager. It can also be configured by an environment variable
`LIBOMPTARGET_MEMORY_MANAGER_THRESHOLD`.
Reviewed By: jdoerfert, ye-luo, JonChesterfield
Differential Revision: https://reviews.llvm.org/D81054
2020-08-19 23:12:02 -04:00
|
|
|
DeviceTy::DeviceTy(RTLInfoTy *RTL)
|
|
|
|
|
: DeviceID(-1), RTL(RTL), RTLDeviceID(-1), IsInit(false), InitFlag(),
|
2022-03-05 15:14:20 -06:00
|
|
|
HasPendingGlobals(false), PendingCtorsDtors(), ShadowPtrMap(),
|
|
|
|
|
PendingGlobalsMtx(), ShadowMtx() {}
|
[OpenMP] Introduce target memory manager
Target memory manager is introduced in this patch which aims to manage target
memory such that they will not be freed immediately when they are not used
because the overhead of memory allocation and free is very large. For CUDA
device, cuMemFree even blocks the context switch on device which affects
concurrent kernel execution.
The memory manager can be taken as a memory pool. It divides the pool into
multiple buckets according to the size such that memory allocation/free
distributed to different buckets will not affect each other.
In this version, we use the exact-equality policy to find a free buffer. This
is an open question: will best-fit work better here? IMO, best-fit is not good
for target memory management because computation on GPU usually requires GBs of
data. Best-fit might lead to a serious waste. For example, there is a free
buffer of size 1960MB, and now we need a buffer of size 1200MB. If best-fit,
the free buffer will be returned, leading to a 760MB waste.
The allocation will happen when there is no free memory left, and the memory
free on device will take place in the following two cases:
1. The program ends. Obviously. However, there is a little problem that plugin
library is destroyed before the memory manager is destroyed, leading to a fact
that the call to target plugin will not succeed.
2. Device is out of memory when we request a new memory. The manager will walk
through all free buffers from the bucket with largest base size, pick up one
buffer, free it, and try to allocate immediately. If it succeeds, it will
return right away rather than freeing all buffers in free list.
Update:
A threshold (8KB by default) is set such that users could control what size of memory
will be managed by the manager. It can also be configured by an environment variable
`LIBOMPTARGET_MEMORY_MANAGER_THRESHOLD`.
Reviewed By: jdoerfert, ye-luo, JonChesterfield
Differential Revision: https://reviews.llvm.org/D81054
2020-08-19 23:12:02 -04:00
|
|
|
|
2020-09-15 15:04:37 -04:00
|
|
|
DeviceTy::~DeviceTy() {
|
2020-12-18 15:14:44 -05:00
|
|
|
if (DeviceID == -1 || !(getInfoLevel() & OMP_INFOTYPE_DUMP_TABLE))
|
2020-09-15 15:04:37 -04:00
|
|
|
return;
|
|
|
|
|
|
2022-07-01 11:48:15 -04:00
|
|
|
ident_t Loc = {0, 0, 0, 0, ";libomptarget;libomptarget;0;0;;"};
|
|
|
|
|
dumpTargetPointerMappings(&Loc, *this);
|
2020-09-15 15:04:37 -04:00
|
|
|
}
|
[OpenMP] Introduce target memory manager
Target memory manager is introduced in this patch which aims to manage target
memory such that they will not be freed immediately when they are not used
because the overhead of memory allocation and free is very large. For CUDA
device, cuMemFree even blocks the context switch on device which affects
concurrent kernel execution.
The memory manager can be taken as a memory pool. It divides the pool into
multiple buckets according to the size such that memory allocation/free
distributed to different buckets will not affect each other.
In this version, we use the exact-equality policy to find a free buffer. This
is an open question: will best-fit work better here? IMO, best-fit is not good
for target memory management because computation on GPU usually requires GBs of
data. Best-fit might lead to a serious waste. For example, there is a free
buffer of size 1960MB, and now we need a buffer of size 1200MB. If best-fit,
the free buffer will be returned, leading to a 760MB waste.
The allocation will happen when there is no free memory left, and the memory
free on device will take place in the following two cases:
1. The program ends. Obviously. However, there is a little problem that plugin
library is destroyed before the memory manager is destroyed, leading to a fact
that the call to target plugin will not succeed.
2. Device is out of memory when we request a new memory. The manager will walk
through all free buffers from the bucket with largest base size, pick up one
buffer, free it, and try to allocate immediately. If it succeeds, it will
return right away rather than freeing all buffers in free list.
Update:
A threshold (8KB by default) is set such that users could control what size of memory
will be managed by the manager. It can also be configured by an environment variable
`LIBOMPTARGET_MEMORY_MANAGER_THRESHOLD`.
Reviewed By: jdoerfert, ye-luo, JonChesterfield
Differential Revision: https://reviews.llvm.org/D81054
2020-08-19 23:12:02 -04:00
|
|
|
|
2017-12-06 21:59:09 +00:00
|
|
|
int DeviceTy::associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size) {
|
2022-03-05 15:14:20 -06:00
|
|
|
HDTTMapAccessorTy HDTTMap = HostDataToTargetMap.getExclusiveAccessor();
|
2017-12-06 21:59:09 +00:00
|
|
|
|
|
|
|
|
// Check if entry exists
|
2022-03-05 15:14:20 -06:00
|
|
|
auto It = HDTTMap->find(HstPtrBegin);
|
|
|
|
|
if (It != HDTTMap->end()) {
|
|
|
|
|
HostDataToTargetTy &HDTT = *It->HDTT;
|
2020-06-24 12:18:00 -04:00
|
|
|
// Mapping already exists
|
2022-07-01 11:48:15 -04:00
|
|
|
bool IsValid = HDTT.HstPtrEnd == (uintptr_t)HstPtrBegin + Size &&
|
2022-03-05 15:14:20 -06:00
|
|
|
HDTT.TgtPtrBegin == (uintptr_t)TgtPtrBegin;
|
2022-07-01 11:48:15 -04:00
|
|
|
if (IsValid) {
|
2020-06-24 12:18:00 -04:00
|
|
|
DP("Attempt to re-associate the same device ptr+offset with the same "
|
|
|
|
|
"host ptr, nothing to do\n");
|
|
|
|
|
return OFFLOAD_SUCCESS;
|
2017-12-06 21:59:09 +00:00
|
|
|
}
|
2022-07-01 11:48:15 -04:00
|
|
|
REPORT("Not allowed to re-associate a different device ptr+offset with "
|
|
|
|
|
"the same host ptr\n");
|
|
|
|
|
return OFFLOAD_FAIL;
|
2017-12-06 21:59:09 +00:00
|
|
|
}
|
|
|
|
|
|
2020-01-14 16:30:38 -08:00
|
|
|
// Mapping does not exist, allocate it with refCount=INF
|
2022-07-01 11:48:15 -04:00
|
|
|
const HostDataToTargetTy &NewEntry =
|
2022-03-05 15:14:20 -06:00
|
|
|
*HDTTMap
|
|
|
|
|
->emplace(new HostDataToTargetTy(
|
[OpenMP] Improve ref count debug messages
For example, without this patch:
```
$ cat test.c
int main() {
int x;
#pragma omp target enter data map(alloc: x)
#pragma omp target exit data map(release: x)
;
return 0;
}
$ clang -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda test.c
$ LIBOMPTARGET_DEBUG=1 ./a.out |& grep 'Creating\|Mapping exists'
Libomptarget --> Creating new map entry with HstPtrBegin=0x00007ffcace8e448, TgtPtrBegin=0x00007f12ef600000, Size=4, Name=unknown
Libomptarget --> Mapping exists with HstPtrBegin=0x00007ffcace8e448, TgtPtrBegin=0x00007f12ef600000, Size=4, updated RefCount=1
```
There are two problems in this example:
* `RefCount` is not reported when a mapping is created, but it might
be 1 or infinite. In this case, because it's created by `omp target
enter data`, it's 1. Seeing that would make later `RefCount`
messages easier to understand.
* `RefCount` is still 1 at the `omp target exit data`, but it's
reported as `updated`. The reason it's still 1 is that, upon
deletions, the reference count is generally not updated in
`DeviceTy::getTgtPtrBegin`, where the report is produced. Instead,
it's zeroed later in `DeviceTy::deallocTgtPtr`, where it's actually
removed from the mapping table.
This patch makes the following changes:
* Report the reference count when creating a mapping.
* Where an existing mapping is reported, always report a reference
count action:
* `update suppressed` when `UpdateRefCount=false`
* `incremented`
* `decremented`
* `deferred final decrement`, which replaces the misleading
`updated` in the above example
* Add comments to `DeviceTy::getTgtPtrBegin` to explain why it does
not zero the reference count. (Please advise if these comments miss
the point.)
* For unified shared memory, don't report confusing messages like
`RefCount=` or `RefCount= updated` given that reference counts are
irrelevant in this case. Instead, just report `for unified shared
memory`.
* Use `INFO` not `DP` consistently for `Mapping exists` messages.
* Fix device table dumps to print `INF` instead of `-1` for an
infinite reference count.
Reviewed By: jhuber6, grokos
Differential Revision: https://reviews.llvm.org/D104559
2021-06-23 09:37:54 -04:00
|
|
|
/*HstPtrBase=*/(uintptr_t)HstPtrBegin,
|
|
|
|
|
/*HstPtrBegin=*/(uintptr_t)HstPtrBegin,
|
|
|
|
|
/*HstPtrEnd=*/(uintptr_t)HstPtrBegin + Size,
|
2021-08-31 15:21:16 -04:00
|
|
|
/*TgtPtrBegin=*/(uintptr_t)TgtPtrBegin,
|
|
|
|
|
/*UseHoldRefCount=*/false, /*Name=*/nullptr,
|
2022-03-05 15:14:20 -06:00
|
|
|
/*IsRefCountINF=*/true))
|
|
|
|
|
.first->HDTT;
|
2021-02-10 11:45:39 -06:00
|
|
|
DP("Creating new map entry: HstBase=" DPxMOD ", HstBegin=" DPxMOD
|
2021-08-31 15:21:16 -04:00
|
|
|
", HstEnd=" DPxMOD ", TgtBegin=" DPxMOD ", DynRefCount=%s, "
|
|
|
|
|
"HoldRefCount=%s\n",
|
2022-07-01 11:48:15 -04:00
|
|
|
DPxPTR(NewEntry.HstPtrBase), DPxPTR(NewEntry.HstPtrBegin),
|
|
|
|
|
DPxPTR(NewEntry.HstPtrEnd), DPxPTR(NewEntry.TgtPtrBegin),
|
|
|
|
|
NewEntry.dynRefCountToStr().c_str(), NewEntry.holdRefCountToStr().c_str());
|
|
|
|
|
(void)NewEntry;
|
2017-12-06 21:59:09 +00:00
|
|
|
|
2023-01-25 01:04:07 +01:00
|
|
|
// Notify the plugin about the new mapping.
|
|
|
|
|
return notifyDataMapped(HstPtrBegin, Size);
|
2017-12-06 21:59:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int DeviceTy::disassociatePtr(void *HstPtrBegin) {
|
2022-03-05 15:14:20 -06:00
|
|
|
HDTTMapAccessorTy HDTTMap = HostDataToTargetMap.getExclusiveAccessor();
|
2017-12-06 21:59:09 +00:00
|
|
|
|
2022-03-05 15:14:20 -06:00
|
|
|
auto It = HDTTMap->find(HstPtrBegin);
|
|
|
|
|
if (It != HDTTMap->end()) {
|
|
|
|
|
HostDataToTargetTy &HDTT = *It->HDTT;
|
2020-06-24 12:18:00 -04:00
|
|
|
// Mapping exists
|
2022-03-05 15:14:20 -06:00
|
|
|
if (HDTT.getHoldRefCount()) {
|
2021-08-31 15:21:16 -04:00
|
|
|
// This is based on OpenACC 3.1, sec 3.2.33 "acc_unmap_data", L3656-3657:
|
|
|
|
|
// "It is an error to call acc_unmap_data if the structured reference
|
|
|
|
|
// count for the pointer is not zero."
|
|
|
|
|
REPORT("Trying to disassociate a pointer with a non-zero hold reference "
|
|
|
|
|
"count\n");
|
2022-03-05 15:14:20 -06:00
|
|
|
} else if (HDTT.isDynRefCountInf()) {
|
2020-06-24 12:18:00 -04:00
|
|
|
DP("Association found, removing it\n");
|
2022-03-05 15:14:20 -06:00
|
|
|
void *Event = HDTT.getEvent();
|
|
|
|
|
delete &HDTT;
|
2022-01-05 20:19:55 -05:00
|
|
|
if (Event)
|
|
|
|
|
destroyEvent(Event);
|
2022-03-05 15:14:20 -06:00
|
|
|
HDTTMap->erase(It);
|
2023-01-25 01:04:07 +01:00
|
|
|
|
|
|
|
|
// Notify the plugin about the unmapped memory.
|
|
|
|
|
return notifyDataUnmapped(HstPtrBegin);
|
2020-06-24 12:18:00 -04:00
|
|
|
} else {
|
2020-08-26 18:11:26 -04:00
|
|
|
REPORT("Trying to disassociate a pointer which was not mapped via "
|
|
|
|
|
"omp_target_associate_ptr\n");
|
2017-12-06 21:59:09 +00:00
|
|
|
}
|
2021-08-31 15:21:16 -04:00
|
|
|
} else {
|
|
|
|
|
REPORT("Association not found\n");
|
2017-12-06 21:59:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Mapping not found
|
|
|
|
|
return OFFLOAD_FAIL;
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-05 15:14:20 -06:00
|
|
|
LookupResult DeviceTy::lookupMapping(HDTTMapAccessorTy &HDTTMap,
|
|
|
|
|
void *HstPtrBegin, int64_t Size) {
|
|
|
|
|
|
2022-07-01 11:48:15 -04:00
|
|
|
uintptr_t HP = (uintptr_t)HstPtrBegin;
|
|
|
|
|
LookupResult LR;
|
2017-12-06 21:59:09 +00:00
|
|
|
|
2020-08-05 13:06:27 -07:00
|
|
|
DP("Looking up mapping(HstPtrBegin=" DPxMOD ", Size=%" PRId64 ")...\n",
|
2022-07-01 11:48:15 -04:00
|
|
|
DPxPTR(HP), Size);
|
2020-06-24 12:18:00 -04:00
|
|
|
|
2022-03-05 15:14:20 -06:00
|
|
|
if (HDTTMap->empty())
|
2022-07-01 11:48:15 -04:00
|
|
|
return LR;
|
2020-06-24 12:18:00 -04:00
|
|
|
|
2022-07-01 11:48:15 -04:00
|
|
|
auto Upper = HDTTMap->upper_bound(HP);
|
2020-06-24 12:18:00 -04:00
|
|
|
|
2022-04-07 21:06:09 -05:00
|
|
|
if (Size == 0) {
|
|
|
|
|
// specification v5.1 Pointer Initialization for Device Data Environments
|
|
|
|
|
// upper_bound satisfies
|
|
|
|
|
// std::prev(upper)->HDTT.HstPtrBegin <= hp < upper->HDTT.HstPtrBegin
|
2022-07-01 11:48:15 -04:00
|
|
|
if (Upper != HDTTMap->begin()) {
|
|
|
|
|
LR.Entry = std::prev(Upper)->HDTT;
|
|
|
|
|
auto &HT = *LR.Entry;
|
2022-04-07 21:06:09 -05:00
|
|
|
// the left side of extended address range is satisified.
|
|
|
|
|
// hp >= HT.HstPtrBegin || hp >= HT.HstPtrBase
|
2022-07-01 11:48:15 -04:00
|
|
|
LR.Flags.IsContained = HP < HT.HstPtrEnd || HP < HT.HstPtrBase;
|
2022-04-07 21:06:09 -05:00
|
|
|
}
|
2017-12-06 21:59:09 +00:00
|
|
|
|
2022-07-01 11:48:15 -04:00
|
|
|
if (!LR.Flags.IsContained && Upper != HDTTMap->end()) {
|
|
|
|
|
LR.Entry = Upper->HDTT;
|
|
|
|
|
auto &HT = *LR.Entry;
|
2022-04-07 21:06:09 -05:00
|
|
|
// the right side of extended address range is satisified.
|
|
|
|
|
// hp < HT.HstPtrEnd || hp < HT.HstPtrBase
|
2022-07-01 11:48:15 -04:00
|
|
|
LR.Flags.IsContained = HP >= HT.HstPtrBase;
|
2022-04-07 21:06:09 -05:00
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
// check the left bin
|
2022-07-01 11:48:15 -04:00
|
|
|
if (Upper != HDTTMap->begin()) {
|
|
|
|
|
LR.Entry = std::prev(Upper)->HDTT;
|
|
|
|
|
auto &HT = *LR.Entry;
|
2022-04-07 21:06:09 -05:00
|
|
|
// Is it contained?
|
2022-07-01 11:48:15 -04:00
|
|
|
LR.Flags.IsContained = HP >= HT.HstPtrBegin && HP < HT.HstPtrEnd &&
|
|
|
|
|
(HP + Size) <= HT.HstPtrEnd;
|
2022-04-07 21:06:09 -05:00
|
|
|
// Does it extend beyond the mapped region?
|
2022-07-01 11:48:15 -04:00
|
|
|
LR.Flags.ExtendsAfter = HP < HT.HstPtrEnd && (HP + Size) > HT.HstPtrEnd;
|
2022-04-07 21:06:09 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// check the right bin
|
2022-07-01 11:48:15 -04:00
|
|
|
if (!(LR.Flags.IsContained || LR.Flags.ExtendsAfter) &&
|
|
|
|
|
Upper != HDTTMap->end()) {
|
|
|
|
|
LR.Entry = Upper->HDTT;
|
|
|
|
|
auto &HT = *LR.Entry;
|
2022-04-07 21:06:09 -05:00
|
|
|
// Does it extend into an already mapped region?
|
2022-07-01 11:48:15 -04:00
|
|
|
LR.Flags.ExtendsBefore =
|
|
|
|
|
HP < HT.HstPtrBegin && (HP + Size) > HT.HstPtrBegin;
|
2022-04-07 21:06:09 -05:00
|
|
|
// Does it extend beyond the mapped region?
|
2022-07-01 11:48:15 -04:00
|
|
|
LR.Flags.ExtendsAfter = HP < HT.HstPtrEnd && (HP + Size) > HT.HstPtrEnd;
|
2022-04-07 21:06:09 -05:00
|
|
|
}
|
|
|
|
|
|
2022-07-01 11:48:15 -04:00
|
|
|
if (LR.Flags.ExtendsBefore) {
|
2022-04-07 21:06:09 -05:00
|
|
|
DP("WARNING: Pointer is not mapped but section extends into already "
|
|
|
|
|
"mapped data\n");
|
|
|
|
|
}
|
2022-07-01 11:48:15 -04:00
|
|
|
if (LR.Flags.ExtendsAfter) {
|
2022-04-07 21:06:09 -05:00
|
|
|
DP("WARNING: Pointer is already mapped but section extends beyond mapped "
|
|
|
|
|
"region\n");
|
|
|
|
|
}
|
2017-12-06 21:59:09 +00:00
|
|
|
}
|
|
|
|
|
|
2022-07-01 11:48:15 -04:00
|
|
|
return LR;
|
2017-12-06 21:59:09 +00:00
|
|
|
}
|
|
|
|
|
|
2022-03-02 13:34:24 -06:00
|
|
|
TargetPointerResultTy DeviceTy::getTargetPointer(
|
|
|
|
|
void *HstPtrBegin, void *HstPtrBase, int64_t Size,
|
|
|
|
|
map_var_info_t HstPtrName, bool HasFlagTo, bool HasFlagAlways,
|
|
|
|
|
bool IsImplicit, bool UpdateRefCount, bool HasCloseModifier,
|
|
|
|
|
bool HasPresentModifier, bool HasHoldModifier, AsyncInfoTy &AsyncInfo) {
|
2022-03-05 15:14:20 -06:00
|
|
|
HDTTMapAccessorTy HDTTMap = HostDataToTargetMap.getExclusiveAccessor();
|
|
|
|
|
|
[OpenMP][Offloading] Fix data race in data mapping by using two locks
This patch tries to partially fix one of the two data race issues reported in
[1] by introducing a per-entry mutex. Additional discussion can also be found in
D104418, which will also be refined to fix another data race problem.
Here is how it works. Like before, `DataMapMtx` is still being used for mapping
table lookup and update. In any case, we will get a table entry. If we need to
make a data transfer (update the data on the device), we need to lock the entry
right before releasing `DataMapMtx`, and the issue of data transfer should be
after releasing `DataMapMtx`, and the entry is unlocked afterwards. This can
guarantee that: 1) issue of data movement is not in critical region, which will
not affect performance too much, and also will not affect other threads that don't
touch the same entry; 2) if another thread accesses the same entry, the state of
data movement is consistent (which requires that a thread must first get the
update lock before getting data movement information).
For a target that doesn't support async data transfer, issue of data movement is
data transfer. This two-lock design can potentially improve concurrency compared
with the design that guards data movement with `DataMapMtx` as well. For a target
that supports async data movement, we could simply attach the event between the
issue of data movement and unlock the entry. For a thread that wants to get the
event, it must first get the lock. This can also get rid of the busy wait until
the event pointer is valid.
Reference:
[1] https://bugs.llvm.org/show_bug.cgi?id=49940
Reviewed By: grokos
Differential Revision: https://reviews.llvm.org/D104555
2021-07-23 16:10:42 -04:00
|
|
|
void *TargetPointer = nullptr;
|
2021-07-01 12:31:45 -04:00
|
|
|
bool IsHostPtr = false;
|
2022-09-07 13:37:26 -05:00
|
|
|
bool IsPresent = true;
|
[OpenMP][Offloading] Fix data race in data mapping by using two locks
This patch tries to partially fix one of the two data race issues reported in
[1] by introducing a per-entry mutex. Additional discussion can also be found in
D104418, which will also be refined to fix another data race problem.
Here is how it works. Like before, `DataMapMtx` is still being used for mapping
table lookup and update. In any case, we will get a table entry. If we need to
make a data transfer (update the data on the device), we need to lock the entry
right before releasing `DataMapMtx`, and the issue of data transfer should be
after releasing `DataMapMtx`, and the entry is unlocked afterwards. This can
guarantee that: 1) issue of data movement is not in critical region, which will
not affect performance too much, and also will not affect other threads that don't
touch the same entry; 2) if another thread accesses the same entry, the state of
data movement is consistent (which requires that a thread must first get the
update lock before getting data movement information).
For a target that doesn't support async data transfer, issue of data movement is
data transfer. This two-lock design can potentially improve concurrency compared
with the design that guards data movement with `DataMapMtx` as well. For a target
that supports async data movement, we could simply attach the event between the
issue of data movement and unlock the entry. For a thread that wants to get the
event, it must first get the lock. This can also get rid of the busy wait until
the event pointer is valid.
Reference:
[1] https://bugs.llvm.org/show_bug.cgi?id=49940
Reviewed By: grokos
Differential Revision: https://reviews.llvm.org/D104555
2021-07-23 16:10:42 -04:00
|
|
|
bool IsNew = false;
|
|
|
|
|
|
2022-03-05 15:14:20 -06:00
|
|
|
LookupResult LR = lookupMapping(HDTTMap, HstPtrBegin, Size);
|
|
|
|
|
auto *Entry = LR.Entry;
|
2017-12-06 21:59:09 +00:00
|
|
|
|
|
|
|
|
// Check if the pointer is contained.
|
2019-08-07 17:29:45 +00:00
|
|
|
// If a variable is mapped to the device manually by the user - which would
|
|
|
|
|
// lead to the IsContained flag to be true - then we must ensure that the
|
|
|
|
|
// device address is returned even under unified memory conditions.
|
2021-07-01 12:31:45 -04:00
|
|
|
if (LR.Flags.IsContained ||
|
|
|
|
|
((LR.Flags.ExtendsBefore || LR.Flags.ExtendsAfter) && IsImplicit)) {
|
|
|
|
|
auto &HT = *LR.Entry;
|
2021-08-31 15:21:16 -04:00
|
|
|
const char *RefCountAction;
|
|
|
|
|
if (UpdateRefCount) {
|
2022-03-02 13:46:01 -06:00
|
|
|
// After this, reference count >= 1. If the reference count was 0 but the
|
|
|
|
|
// entry was still there we can reuse the data on the device and avoid a
|
|
|
|
|
// new submission.
|
2021-08-31 15:21:16 -04:00
|
|
|
HT.incRefCount(HasHoldModifier);
|
|
|
|
|
RefCountAction = " (incremented)";
|
|
|
|
|
} else {
|
[OpenMP] Avoid checking parent reference count in targetDataBegin
This patch is an attempt to do for `targetDataBegin` what D104924 does
for `targetDataEnd`:
* Eliminates a lock/unlock of the data mapping table.
* Clarifies the logic that determines whether a struct member's
host-to-device transfer occurs. The old logic, which checks the
parent struct's reference count, is a leftover from back when we had
a different map interface (as pointed out at
<https://reviews.llvm.org/D104924#2846972>).
Additionally, it eliminates the `DeviceTy::getMapEntryRefCnt`, which
is no longer used after this patch.
While D104924 does not change the computation of `IsLast`, I found I
needed to change the computation of `IsNew` for this patch. As far as
I can tell, the change is correct, and this patch does not cause any
additional `openmp` tests to fail. However, I'm not sure I've thought
of all use cases. Please advise.
Reviewed By: jdoerfert, jhuber6, protze.joachim, tianshilei1992, grokos, RaviNarayanaswamy
Differential Revision: https://reviews.llvm.org/D105121
2021-07-10 12:01:45 -04:00
|
|
|
// It might have been allocated with the parent, but it's still new.
|
2021-08-31 15:21:16 -04:00
|
|
|
IsNew = HT.getTotalRefCount() == 1;
|
|
|
|
|
RefCountAction = " (update suppressed)";
|
|
|
|
|
}
|
|
|
|
|
const char *DynRefCountAction = HasHoldModifier ? "" : RefCountAction;
|
|
|
|
|
const char *HoldRefCountAction = HasHoldModifier ? RefCountAction : "";
|
2021-07-01 12:31:45 -04:00
|
|
|
uintptr_t Ptr = HT.TgtPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin);
|
2021-01-21 09:59:29 -05:00
|
|
|
INFO(OMP_INFOTYPE_MAPPING_EXISTS, DeviceID,
|
|
|
|
|
"Mapping exists%s with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD
|
2021-08-31 15:21:16 -04:00
|
|
|
", Size=%" PRId64 ", DynRefCount=%s%s, HoldRefCount=%s%s, Name=%s\n",
|
2021-07-01 12:31:45 -04:00
|
|
|
(IsImplicit ? " (implicit)" : ""), DPxPTR(HstPtrBegin), DPxPTR(Ptr),
|
2021-08-31 15:21:16 -04:00
|
|
|
Size, HT.dynRefCountToStr().c_str(), DynRefCountAction,
|
|
|
|
|
HT.holdRefCountToStr().c_str(), HoldRefCountAction,
|
2021-01-21 09:59:29 -05:00
|
|
|
(HstPtrName) ? getNameFromMapping(HstPtrName).c_str() : "unknown");
|
2021-07-01 12:31:45 -04:00
|
|
|
TargetPointer = (void *)Ptr;
|
|
|
|
|
} else if ((LR.Flags.ExtendsBefore || LR.Flags.ExtendsAfter) && !IsImplicit) {
|
2017-12-06 21:59:09 +00:00
|
|
|
// Explicit extension of mapped data - not allowed.
|
2020-08-05 16:47:29 -04:00
|
|
|
MESSAGE("explicit extension not allowed: host address specified is " DPxMOD
|
2021-02-10 11:45:39 -06:00
|
|
|
" (%" PRId64
|
|
|
|
|
" bytes), but device allocation maps to host at " DPxMOD
|
|
|
|
|
" (%" PRId64 " bytes)",
|
2021-07-01 12:31:45 -04:00
|
|
|
DPxPTR(HstPtrBegin), Size, DPxPTR(Entry->HstPtrBegin),
|
|
|
|
|
Entry->HstPtrEnd - Entry->HstPtrBegin);
|
2020-08-05 16:47:29 -04:00
|
|
|
if (HasPresentModifier)
|
|
|
|
|
MESSAGE("device mapping required by 'present' map type modifier does not "
|
|
|
|
|
"exist for host address " DPxMOD " (%" PRId64 " bytes)",
|
|
|
|
|
DPxPTR(HstPtrBegin), Size);
|
2020-10-30 01:04:34 -04:00
|
|
|
} else if (PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY &&
|
2020-07-22 14:04:58 -04:00
|
|
|
!HasCloseModifier) {
|
|
|
|
|
// If unified shared memory is active, implicitly mapped variables that are
|
|
|
|
|
// not privatized use host address. Any explicitly mapped variables also use
|
|
|
|
|
// host address where correctness is not impeded. In all other cases maps
|
|
|
|
|
// are respected.
|
|
|
|
|
// In addition to the mapping rules above, the close map modifier forces the
|
|
|
|
|
// mapping of the variable to the device.
|
|
|
|
|
if (Size) {
|
[OpenMP] Improve ref count debug messages
For example, without this patch:
```
$ cat test.c
int main() {
int x;
#pragma omp target enter data map(alloc: x)
#pragma omp target exit data map(release: x)
;
return 0;
}
$ clang -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda test.c
$ LIBOMPTARGET_DEBUG=1 ./a.out |& grep 'Creating\|Mapping exists'
Libomptarget --> Creating new map entry with HstPtrBegin=0x00007ffcace8e448, TgtPtrBegin=0x00007f12ef600000, Size=4, Name=unknown
Libomptarget --> Mapping exists with HstPtrBegin=0x00007ffcace8e448, TgtPtrBegin=0x00007f12ef600000, Size=4, updated RefCount=1
```
There are two problems in this example:
* `RefCount` is not reported when a mapping is created, but it might
be 1 or infinite. In this case, because it's created by `omp target
enter data`, it's 1. Seeing that would make later `RefCount`
messages easier to understand.
* `RefCount` is still 1 at the `omp target exit data`, but it's
reported as `updated`. The reason it's still 1 is that, upon
deletions, the reference count is generally not updated in
`DeviceTy::getTgtPtrBegin`, where the report is produced. Instead,
it's zeroed later in `DeviceTy::deallocTgtPtr`, where it's actually
removed from the mapping table.
This patch makes the following changes:
* Report the reference count when creating a mapping.
* Where an existing mapping is reported, always report a reference
count action:
* `update suppressed` when `UpdateRefCount=false`
* `incremented`
* `decremented`
* `deferred final decrement`, which replaces the misleading
`updated` in the above example
* Add comments to `DeviceTy::getTgtPtrBegin` to explain why it does
not zero the reference count. (Please advise if these comments miss
the point.)
* For unified shared memory, don't report confusing messages like
`RefCount=` or `RefCount= updated` given that reference counts are
irrelevant in this case. Instead, just report `for unified shared
memory`.
* Use `INFO` not `DP` consistently for `Mapping exists` messages.
* Fix device table dumps to print `INF` instead of `-1` for an
infinite reference count.
Reviewed By: jhuber6, grokos
Differential Revision: https://reviews.llvm.org/D104559
2021-06-23 09:37:54 -04:00
|
|
|
DP("Return HstPtrBegin " DPxMOD " Size=%" PRId64 " for unified shared "
|
|
|
|
|
"memory\n",
|
|
|
|
|
DPxPTR((uintptr_t)HstPtrBegin), Size);
|
2022-09-07 13:37:26 -05:00
|
|
|
IsPresent = false;
|
2019-08-07 17:29:45 +00:00
|
|
|
IsHostPtr = true;
|
2021-07-01 12:31:45 -04:00
|
|
|
TargetPointer = HstPtrBegin;
|
2019-08-07 17:29:45 +00:00
|
|
|
}
|
2020-07-22 14:04:58 -04:00
|
|
|
} else if (HasPresentModifier) {
|
|
|
|
|
DP("Mapping required by 'present' map type modifier does not exist for "
|
2020-08-05 13:06:27 -07:00
|
|
|
"HstPtrBegin=" DPxMOD ", Size=%" PRId64 "\n",
|
2020-07-22 14:04:58 -04:00
|
|
|
DPxPTR(HstPtrBegin), Size);
|
|
|
|
|
MESSAGE("device mapping required by 'present' map type modifier does not "
|
2020-08-05 13:06:27 -07:00
|
|
|
"exist for host address " DPxMOD " (%" PRId64 " bytes)",
|
2020-07-22 14:04:58 -04:00
|
|
|
DPxPTR(HstPtrBegin), Size);
|
|
|
|
|
} else if (Size) {
|
|
|
|
|
// If it is not contained and Size > 0, we should create a new entry for it.
|
|
|
|
|
IsNew = true;
|
2021-07-01 12:31:45 -04:00
|
|
|
uintptr_t Ptr = (uintptr_t)allocData(Size, HstPtrBegin);
|
2022-03-05 15:14:20 -06:00
|
|
|
Entry = HDTTMap
|
|
|
|
|
->emplace(new HostDataToTargetTy(
|
|
|
|
|
(uintptr_t)HstPtrBase, (uintptr_t)HstPtrBegin,
|
|
|
|
|
(uintptr_t)HstPtrBegin + Size, Ptr, HasHoldModifier,
|
|
|
|
|
HstPtrName))
|
|
|
|
|
.first->HDTT;
|
2021-04-15 17:27:17 -04:00
|
|
|
INFO(OMP_INFOTYPE_MAPPING_CHANGED, DeviceID,
|
2022-04-18 14:08:16 -04:00
|
|
|
"Creating new map entry with HstPtrBase=" DPxMOD
|
2022-04-07 21:06:09 -05:00
|
|
|
", HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", Size=%ld, "
|
2021-08-31 15:21:16 -04:00
|
|
|
"DynRefCount=%s, HoldRefCount=%s, Name=%s\n",
|
2022-04-07 21:06:09 -05:00
|
|
|
DPxPTR(HstPtrBase), DPxPTR(HstPtrBegin), DPxPTR(Ptr), Size,
|
2021-08-31 15:21:16 -04:00
|
|
|
Entry->dynRefCountToStr().c_str(), Entry->holdRefCountToStr().c_str(),
|
2021-04-15 17:27:17 -04:00
|
|
|
(HstPtrName) ? getNameFromMapping(HstPtrName).c_str() : "unknown");
|
2021-07-01 12:31:45 -04:00
|
|
|
TargetPointer = (void *)Ptr;
|
2023-01-25 01:04:07 +01:00
|
|
|
|
|
|
|
|
// Notify the plugin about the new mapping.
|
|
|
|
|
if (notifyDataMapped(HstPtrBegin, Size))
|
|
|
|
|
return {{false /* IsNewEntry */, false /* IsHostPointer */},
|
|
|
|
|
nullptr /* Entry */,
|
|
|
|
|
nullptr /* TargetPointer */};
|
2022-09-07 13:37:26 -05:00
|
|
|
} else {
|
|
|
|
|
// This entry is not present and we did not create a new entry for it.
|
|
|
|
|
IsPresent = false;
|
2017-12-06 21:59:09 +00:00
|
|
|
}
|
|
|
|
|
|
[OpenMP][Offloading] Fix data race in data mapping by using two locks
This patch tries to partially fix one of the two data race issues reported in
[1] by introducing a per-entry mutex. Additional discussion can also be found in
D104418, which will also be refined to fix another data race problem.
Here is how it works. Like before, `DataMapMtx` is still being used for mapping
table lookup and update. In any case, we will get a table entry. If we need to
make a data transfer (update the data on the device), we need to lock the entry
right before releasing `DataMapMtx`, and the issue of data transfer should be
after releasing `DataMapMtx`, and the entry is unlocked afterwards. This can
guarantee that: 1) issue of data movement is not in critical region, which will
not affect performance too much, and also will not affect other threads that don't
touch the same entry; 2) if another thread accesses the same entry, the state of
data movement is consistent (which requires that a thread must first get the
update lock before getting data movement information).
For a target that doesn't support async data transfer, issue of data movement is
data transfer. This two-lock design can potentially improve concurrency compared
with the design that guards data movement with `DataMapMtx` as well. For a target
that supports async data movement, we could simply attach the event between the
issue of data movement and unlock the entry. For a thread that wants to get the
event, it must first get the lock. This can also get rid of the busy wait until
the event pointer is valid.
Reference:
[1] https://bugs.llvm.org/show_bug.cgi?id=49940
Reviewed By: grokos
Differential Revision: https://reviews.llvm.org/D104555
2021-07-23 16:10:42 -04:00
|
|
|
// If the target pointer is valid, and we need to transfer data, issue the
|
|
|
|
|
// data transfer.
|
2023-03-21 15:48:11 -07:00
|
|
|
if (TargetPointer && !IsHostPtr && HasFlagTo && (IsNew || HasFlagAlways) &&
|
|
|
|
|
Size != 0) {
|
[OpenMP][Offloading] Fix data race in data mapping by using two locks
This patch tries to partially fix one of the two data race issues reported in
[1] by introducing a per-entry mutex. Additional discussion can also be found in
D104418, which will also be refined to fix another data race problem.
Here is how it works. Like before, `DataMapMtx` is still being used for mapping
table lookup and update. In any case, we will get a table entry. If we need to
make a data transfer (update the data on the device), we need to lock the entry
right before releasing `DataMapMtx`, and the issue of data transfer should be
after releasing `DataMapMtx`, and the entry is unlocked afterwards. This can
guarantee that: 1) issue of data movement is not in critical region, which will
not affect performance too much, and also will not affect other threads that don't
touch the same entry; 2) if another thread accesses the same entry, the state of
data movement is consistent (which requires that a thread must first get the
update lock before getting data movement information).
For a target that doesn't support async data transfer, issue of data movement is
data transfer. This two-lock design can potentially improve concurrency compared
with the design that guards data movement with `DataMapMtx` as well. For a target
that supports async data movement, we could simply attach the event between the
issue of data movement and unlock the entry. For a thread that wants to get the
event, it must first get the lock. This can also get rid of the busy wait until
the event pointer is valid.
Reference:
[1] https://bugs.llvm.org/show_bug.cgi?id=49940
Reviewed By: grokos
Differential Revision: https://reviews.llvm.org/D104555
2021-07-23 16:10:42 -04:00
|
|
|
// Lock the entry before releasing the mapping table lock such that another
|
|
|
|
|
// thread that could issue data movement will get the right result.
|
2022-03-02 13:34:24 -06:00
|
|
|
std::lock_guard<decltype(*Entry)> LG(*Entry);
|
[OpenMP][Offloading] Fix data race in data mapping by using two locks
This patch tries to partially fix one of the two data race issues reported in
[1] by introducing a per-entry mutex. Additional discussion can also be found in
D104418, which will also be refined to fix another data race problem.
Here is how it works. Like before, `DataMapMtx` is still being used for mapping
table lookup and update. In any case, we will get a table entry. If we need to
make a data transfer (update the data on the device), we need to lock the entry
right before releasing `DataMapMtx`, and the issue of data transfer should be
after releasing `DataMapMtx`, and the entry is unlocked afterwards. This can
guarantee that: 1) issue of data movement is not in critical region, which will
not affect performance too much, and also will not affect other threads that don't
touch the same entry; 2) if another thread accesses the same entry, the state of
data movement is consistent (which requires that a thread must first get the
update lock before getting data movement information).
For a target that doesn't support async data transfer, issue of data movement is
data transfer. This two-lock design can potentially improve concurrency compared
with the design that guards data movement with `DataMapMtx` as well. For a target
that supports async data movement, we could simply attach the event between the
issue of data movement and unlock the entry. For a thread that wants to get the
event, it must first get the lock. This can also get rid of the busy wait until
the event pointer is valid.
Reference:
[1] https://bugs.llvm.org/show_bug.cgi?id=49940
Reviewed By: grokos
Differential Revision: https://reviews.llvm.org/D104555
2021-07-23 16:10:42 -04:00
|
|
|
// Release the mapping table lock right after the entry is locked.
|
2022-03-05 15:14:20 -06:00
|
|
|
HDTTMap.destroy();
|
[OpenMP][Offloading] Fix data race in data mapping by using two locks
This patch tries to partially fix one of the two data race issues reported in
[1] by introducing a per-entry mutex. Additional discussion can also be found in
D104418, which will also be refined to fix another data race problem.
Here is how it works. Like before, `DataMapMtx` is still being used for mapping
table lookup and update. In any case, we will get a table entry. If we need to
make a data transfer (update the data on the device), we need to lock the entry
right before releasing `DataMapMtx`, and the issue of data transfer should be
after releasing `DataMapMtx`, and the entry is unlocked afterwards. This can
guarantee that: 1) issue of data movement is not in critical region, which will
not affect performance too much, and also will not affect other threads that don't
touch the same entry; 2) if another thread accesses the same entry, the state of
data movement is consistent (which requires that a thread must first get the
update lock before getting data movement information).
For a target that doesn't support async data transfer, issue of data movement is
data transfer. This two-lock design can potentially improve concurrency compared
with the design that guards data movement with `DataMapMtx` as well. For a target
that supports async data movement, we could simply attach the event between the
issue of data movement and unlock the entry. For a thread that wants to get the
event, it must first get the lock. This can also get rid of the busy wait until
the event pointer is valid.
Reference:
[1] https://bugs.llvm.org/show_bug.cgi?id=49940
Reviewed By: grokos
Differential Revision: https://reviews.llvm.org/D104555
2021-07-23 16:10:42 -04:00
|
|
|
|
|
|
|
|
DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n", Size,
|
|
|
|
|
DPxPTR(HstPtrBegin), DPxPTR(TargetPointer));
|
|
|
|
|
|
|
|
|
|
int Ret = submitData(TargetPointer, HstPtrBegin, Size, AsyncInfo);
|
|
|
|
|
if (Ret != OFFLOAD_SUCCESS) {
|
|
|
|
|
REPORT("Copying data to device failed.\n");
|
|
|
|
|
// We will also return nullptr if the data movement fails because that
|
|
|
|
|
// pointer points to a corrupted memory region so it doesn't make any
|
|
|
|
|
// sense to continue to use it.
|
|
|
|
|
TargetPointer = nullptr;
|
2022-03-02 13:34:24 -06:00
|
|
|
} else if (Entry->addEventIfNecessary(*this, AsyncInfo) != OFFLOAD_SUCCESS)
|
2022-01-05 20:19:55 -05:00
|
|
|
return {{false /* IsNewEntry */, false /* IsHostPointer */},
|
2022-03-05 15:14:20 -06:00
|
|
|
nullptr /* Entry */,
|
2022-01-05 20:19:55 -05:00
|
|
|
nullptr /* TargetPointer */};
|
[OpenMP][Offloading] Fix data race in data mapping by using two locks
This patch tries to partially fix one of the two data race issues reported in
[1] by introducing a per-entry mutex. Additional discussion can also be found in
D104418, which will also be refined to fix another data race problem.
Here is how it works. Like before, `DataMapMtx` is still being used for mapping
table lookup and update. In any case, we will get a table entry. If we need to
make a data transfer (update the data on the device), we need to lock the entry
right before releasing `DataMapMtx`, and the issue of data transfer should be
after releasing `DataMapMtx`, and the entry is unlocked afterwards. This can
guarantee that: 1) issue of data movement is not in critical region, which will
not affect performance too much, and also will not affect other threads that don't
touch the same entry; 2) if another thread accesses the same entry, the state of
data movement is consistent (which requires that a thread must first get the
update lock before getting data movement information).
For a target that doesn't support async data transfer, issue of data movement is
data transfer. This two-lock design can potentially improve concurrency compared
with the design that guards data movement with `DataMapMtx` as well. For a target
that supports async data movement, we could simply attach the event between the
issue of data movement and unlock the entry. For a thread that wants to get the
event, it must first get the lock. This can also get rid of the busy wait until
the event pointer is valid.
Reference:
[1] https://bugs.llvm.org/show_bug.cgi?id=49940
Reviewed By: grokos
Differential Revision: https://reviews.llvm.org/D104555
2021-07-23 16:10:42 -04:00
|
|
|
} else {
|
|
|
|
|
// Release the mapping table lock directly.
|
2022-03-05 15:14:20 -06:00
|
|
|
HDTTMap.destroy();
|
2022-01-05 20:19:55 -05:00
|
|
|
// If not a host pointer and no present modifier, we need to wait for the
|
|
|
|
|
// event if it exists.
|
2022-01-05 23:04:17 -05:00
|
|
|
// Note: Entry might be nullptr because of zero length array section.
|
2022-03-05 15:14:20 -06:00
|
|
|
if (Entry && !IsHostPtr && !HasPresentModifier) {
|
2022-03-02 13:34:24 -06:00
|
|
|
std::lock_guard<decltype(*Entry)> LG(*Entry);
|
2022-01-05 20:19:55 -05:00
|
|
|
void *Event = Entry->getEvent();
|
|
|
|
|
if (Event) {
|
|
|
|
|
int Ret = waitEvent(Event, AsyncInfo);
|
|
|
|
|
if (Ret != OFFLOAD_SUCCESS) {
|
|
|
|
|
// If it fails to wait for the event, we need to return nullptr in
|
|
|
|
|
// case of any data race.
|
|
|
|
|
REPORT("Failed to wait for event " DPxMOD ".\n", DPxPTR(Event));
|
|
|
|
|
return {{false /* IsNewEntry */, false /* IsHostPointer */},
|
2022-03-05 15:14:20 -06:00
|
|
|
nullptr /* Entry */,
|
2022-01-05 20:19:55 -05:00
|
|
|
nullptr /* TargetPointer */};
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
[OpenMP][Offloading] Fix data race in data mapping by using two locks
This patch tries to partially fix one of the two data race issues reported in
[1] by introducing a per-entry mutex. Additional discussion can also be found in
D104418, which will also be refined to fix another data race problem.
Here is how it works. Like before, `DataMapMtx` is still being used for mapping
table lookup and update. In any case, we will get a table entry. If we need to
make a data transfer (update the data on the device), we need to lock the entry
right before releasing `DataMapMtx`, and the issue of data transfer should be
after releasing `DataMapMtx`, and the entry is unlocked afterwards. This can
guarantee that: 1) issue of data movement is not in critical region, which will
not affect performance too much, and also will not affect other threads that don't
touch the same entry; 2) if another thread accesses the same entry, the state of
data movement is consistent (which requires that a thread must first get the
update lock before getting data movement information).
For a target that doesn't support async data transfer, issue of data movement is
data transfer. This two-lock design can potentially improve concurrency compared
with the design that guards data movement with `DataMapMtx` as well. For a target
that supports async data movement, we could simply attach the event between the
issue of data movement and unlock the entry. For a thread that wants to get the
event, it must first get the lock. This can also get rid of the busy wait until
the event pointer is valid.
Reference:
[1] https://bugs.llvm.org/show_bug.cgi?id=49940
Reviewed By: grokos
Differential Revision: https://reviews.llvm.org/D104555
2021-07-23 16:10:42 -04:00
|
|
|
}
|
|
|
|
|
|
2022-09-07 13:37:26 -05:00
|
|
|
return {{IsNew, IsHostPtr, IsPresent}, Entry, TargetPointer};
|
2017-12-06 21:59:09 +00:00
|
|
|
}
|
|
|
|
|
|
2021-11-03 11:52:21 -05:00
|
|
|
TargetPointerResultTy
|
|
|
|
|
DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast,
|
|
|
|
|
bool UpdateRefCount, bool UseHoldRefCount,
|
2023-01-19 12:11:20 -03:00
|
|
|
bool &IsHostPtr, bool MustContain, bool ForceDelete,
|
|
|
|
|
bool FromDataEnd) {
|
2022-03-05 15:14:20 -06:00
|
|
|
HDTTMapAccessorTy HDTTMap = HostDataToTargetMap.getExclusiveAccessor();
|
|
|
|
|
|
2021-11-03 11:52:21 -05:00
|
|
|
void *TargetPointer = NULL;
|
|
|
|
|
bool IsNew = false;
|
2022-09-07 13:37:26 -05:00
|
|
|
bool IsPresent = true;
|
2019-08-07 17:29:45 +00:00
|
|
|
IsHostPtr = false;
|
|
|
|
|
IsLast = false;
|
2022-07-01 11:48:15 -04:00
|
|
|
LookupResult LR = lookupMapping(HDTTMap, HstPtrBegin, Size);
|
2017-12-06 21:59:09 +00:00
|
|
|
|
2022-07-01 11:48:15 -04:00
|
|
|
if (LR.Flags.IsContained ||
|
|
|
|
|
(!MustContain && (LR.Flags.ExtendsBefore || LR.Flags.ExtendsAfter))) {
|
|
|
|
|
auto &HT = *LR.Entry;
|
2021-08-31 15:21:16 -04:00
|
|
|
IsLast = HT.decShouldRemove(UseHoldRefCount, ForceDelete);
|
2022-03-02 13:46:01 -06:00
|
|
|
|
|
|
|
|
if (ForceDelete) {
|
2021-08-31 15:21:16 -04:00
|
|
|
HT.resetRefCount(UseHoldRefCount);
|
|
|
|
|
assert(IsLast == HT.decShouldRemove(UseHoldRefCount) &&
|
[OpenMP] Fix delete map type in ref count debug messages
For example, without this patch:
```
$ cat test.c
int main() {
int x;
#pragma omp target enter data map(alloc: x)
#pragma omp target enter data map(alloc: x)
#pragma omp target enter data map(alloc: x)
#pragma omp target exit data map(delete: x)
;
return 0;
}
$ clang -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda test.c
$ LIBOMPTARGET_DEBUG=1 ./a.out |& grep 'Creating\|Mapping exists\|last'
Libomptarget --> Creating new map entry with HstPtrBegin=0x00007ffddf1eaea8, TgtPtrBegin=0x00000000013bb040, Size=4, RefCount=1, Name=unknown
Libomptarget --> Mapping exists with HstPtrBegin=0x00007ffddf1eaea8, TgtPtrBegin=0x00000000013bb040, Size=4, RefCount=2 (incremented), Name=unknown
Libomptarget --> Mapping exists with HstPtrBegin=0x00007ffddf1eaea8, TgtPtrBegin=0x00000000013bb040, Size=4, RefCount=3 (incremented), Name=unknown
Libomptarget --> Mapping exists with HstPtrBegin=0x00007ffddf1eaea8, TgtPtrBegin=0x00000000013bb040, Size=4, RefCount=2 (decremented)
Libomptarget --> There are 4 bytes allocated at target address 0x00000000013bb040 - is not last
```
`RefCount` is reported as decremented to 2, but it ought to be reset
because of the `delete` map type, and `is not last` is incorrect.
This patch migrates the reset of reference counts from
`DeviceTy::deallocTgtPtr` to `DeviceTy::getTgtPtrBegin`, which then
correctly reports the reset. Based on the `IsLast` result from
`DeviceTy::getTgtPtrBegin`, `targetDataEnd` then correctly reports `is
last` for any deletion. `DeviceTy::deallocTgtPtr` is responsible only
for the final reference count decrement and mapping removal.
An obscure side effect of this patch is that a `delete` map type when
the reference count is infinite yields `DelEntry=IsLast=false` in
`targetDataEnd` and so no longer results in a
`DeviceTy::deallocTgtPtr` call. Without this patch, that call is a
no-op anyway besides some unnecessary locking and mapping table
lookups.
Reviewed By: grokos
Differential Revision: https://reviews.llvm.org/D104560
2021-06-23 09:39:04 -04:00
|
|
|
"expected correct IsLast prediction for reset");
|
2022-03-02 13:46:01 -06:00
|
|
|
}
|
|
|
|
|
|
2023-01-19 12:11:20 -03:00
|
|
|
// Increment the number of threads that is using the entry on a
|
|
|
|
|
// targetDataEnd, tracking the number of possible "deleters". A thread may
|
|
|
|
|
// come to own the entry deletion even if it was not the last one querying
|
|
|
|
|
// for it. Thus, we must track every query on targetDataEnds to ensure only
|
|
|
|
|
// the last thread that holds a reference to an entry actually deletes it.
|
|
|
|
|
if (FromDataEnd)
|
|
|
|
|
HT.incDataEndThreadCount();
|
|
|
|
|
|
2022-03-02 13:46:01 -06:00
|
|
|
const char *RefCountAction;
|
|
|
|
|
if (!UpdateRefCount) {
|
|
|
|
|
RefCountAction = " (update suppressed)";
|
[OpenMP] Fix delete map type in ref count debug messages
For example, without this patch:
```
$ cat test.c
int main() {
int x;
#pragma omp target enter data map(alloc: x)
#pragma omp target enter data map(alloc: x)
#pragma omp target enter data map(alloc: x)
#pragma omp target exit data map(delete: x)
;
return 0;
}
$ clang -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda test.c
$ LIBOMPTARGET_DEBUG=1 ./a.out |& grep 'Creating\|Mapping exists\|last'
Libomptarget --> Creating new map entry with HstPtrBegin=0x00007ffddf1eaea8, TgtPtrBegin=0x00000000013bb040, Size=4, RefCount=1, Name=unknown
Libomptarget --> Mapping exists with HstPtrBegin=0x00007ffddf1eaea8, TgtPtrBegin=0x00000000013bb040, Size=4, RefCount=2 (incremented), Name=unknown
Libomptarget --> Mapping exists with HstPtrBegin=0x00007ffddf1eaea8, TgtPtrBegin=0x00000000013bb040, Size=4, RefCount=3 (incremented), Name=unknown
Libomptarget --> Mapping exists with HstPtrBegin=0x00007ffddf1eaea8, TgtPtrBegin=0x00000000013bb040, Size=4, RefCount=2 (decremented)
Libomptarget --> There are 4 bytes allocated at target address 0x00000000013bb040 - is not last
```
`RefCount` is reported as decremented to 2, but it ought to be reset
because of the `delete` map type, and `is not last` is incorrect.
This patch migrates the reset of reference counts from
`DeviceTy::deallocTgtPtr` to `DeviceTy::getTgtPtrBegin`, which then
correctly reports the reset. Based on the `IsLast` result from
`DeviceTy::getTgtPtrBegin`, `targetDataEnd` then correctly reports `is
last` for any deletion. `DeviceTy::deallocTgtPtr` is responsible only
for the final reference count decrement and mapping removal.
An obscure side effect of this patch is that a `delete` map type when
the reference count is infinite yields `DelEntry=IsLast=false` in
`targetDataEnd` and so no longer results in a
`DeviceTy::deallocTgtPtr` call. Without this patch, that call is a
no-op anyway besides some unnecessary locking and mapping table
lookups.
Reviewed By: grokos
Differential Revision: https://reviews.llvm.org/D104560
2021-06-23 09:39:04 -04:00
|
|
|
} else if (IsLast) {
|
2022-03-02 13:46:01 -06:00
|
|
|
HT.decRefCount(UseHoldRefCount);
|
|
|
|
|
assert(HT.getTotalRefCount() == 0 &&
|
|
|
|
|
"Expected zero reference count when deletion is scheduled");
|
|
|
|
|
if (ForceDelete)
|
|
|
|
|
RefCountAction = " (reset, delayed deletion)";
|
|
|
|
|
else
|
|
|
|
|
RefCountAction = " (decremented, delayed deletion)";
|
[OpenMP] Fix delete map type in ref count debug messages
For example, without this patch:
```
$ cat test.c
int main() {
int x;
#pragma omp target enter data map(alloc: x)
#pragma omp target enter data map(alloc: x)
#pragma omp target enter data map(alloc: x)
#pragma omp target exit data map(delete: x)
;
return 0;
}
$ clang -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda test.c
$ LIBOMPTARGET_DEBUG=1 ./a.out |& grep 'Creating\|Mapping exists\|last'
Libomptarget --> Creating new map entry with HstPtrBegin=0x00007ffddf1eaea8, TgtPtrBegin=0x00000000013bb040, Size=4, RefCount=1, Name=unknown
Libomptarget --> Mapping exists with HstPtrBegin=0x00007ffddf1eaea8, TgtPtrBegin=0x00000000013bb040, Size=4, RefCount=2 (incremented), Name=unknown
Libomptarget --> Mapping exists with HstPtrBegin=0x00007ffddf1eaea8, TgtPtrBegin=0x00000000013bb040, Size=4, RefCount=3 (incremented), Name=unknown
Libomptarget --> Mapping exists with HstPtrBegin=0x00007ffddf1eaea8, TgtPtrBegin=0x00000000013bb040, Size=4, RefCount=2 (decremented)
Libomptarget --> There are 4 bytes allocated at target address 0x00000000013bb040 - is not last
```
`RefCount` is reported as decremented to 2, but it ought to be reset
because of the `delete` map type, and `is not last` is incorrect.
This patch migrates the reset of reference counts from
`DeviceTy::deallocTgtPtr` to `DeviceTy::getTgtPtrBegin`, which then
correctly reports the reset. Based on the `IsLast` result from
`DeviceTy::getTgtPtrBegin`, `targetDataEnd` then correctly reports `is
last` for any deletion. `DeviceTy::deallocTgtPtr` is responsible only
for the final reference count decrement and mapping removal.
An obscure side effect of this patch is that a `delete` map type when
the reference count is infinite yields `DelEntry=IsLast=false` in
`targetDataEnd` and so no longer results in a
`DeviceTy::deallocTgtPtr` call. Without this patch, that call is a
no-op anyway besides some unnecessary locking and mapping table
lookups.
Reviewed By: grokos
Differential Revision: https://reviews.llvm.org/D104560
2021-06-23 09:39:04 -04:00
|
|
|
} else {
|
2021-08-31 15:21:16 -04:00
|
|
|
HT.decRefCount(UseHoldRefCount);
|
|
|
|
|
RefCountAction = " (decremented)";
|
[OpenMP] Improve ref count debug messages
For example, without this patch:
```
$ cat test.c
int main() {
int x;
#pragma omp target enter data map(alloc: x)
#pragma omp target exit data map(release: x)
;
return 0;
}
$ clang -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda test.c
$ LIBOMPTARGET_DEBUG=1 ./a.out |& grep 'Creating\|Mapping exists'
Libomptarget --> Creating new map entry with HstPtrBegin=0x00007ffcace8e448, TgtPtrBegin=0x00007f12ef600000, Size=4, Name=unknown
Libomptarget --> Mapping exists with HstPtrBegin=0x00007ffcace8e448, TgtPtrBegin=0x00007f12ef600000, Size=4, updated RefCount=1
```
There are two problems in this example:
* `RefCount` is not reported when a mapping is created, but it might
be 1 or infinite. In this case, because it's created by `omp target
enter data`, it's 1. Seeing that would make later `RefCount`
messages easier to understand.
* `RefCount` is still 1 at the `omp target exit data`, but it's
reported as `updated`. The reason it's still 1 is that, upon
deletions, the reference count is generally not updated in
`DeviceTy::getTgtPtrBegin`, where the report is produced. Instead,
it's zeroed later in `DeviceTy::deallocTgtPtr`, where it's actually
removed from the mapping table.
This patch makes the following changes:
* Report the reference count when creating a mapping.
* Where an existing mapping is reported, always report a reference
count action:
* `update suppressed` when `UpdateRefCount=false`
* `incremented`
* `decremented`
* `deferred final decrement`, which replaces the misleading
`updated` in the above example
* Add comments to `DeviceTy::getTgtPtrBegin` to explain why it does
not zero the reference count. (Please advise if these comments miss
the point.)
* For unified shared memory, don't report confusing messages like
`RefCount=` or `RefCount= updated` given that reference counts are
irrelevant in this case. Instead, just report `for unified shared
memory`.
* Use `INFO` not `DP` consistently for `Mapping exists` messages.
* Fix device table dumps to print `INF` instead of `-1` for an
infinite reference count.
Reviewed By: jhuber6, grokos
Differential Revision: https://reviews.llvm.org/D104559
2021-06-23 09:37:54 -04:00
|
|
|
}
|
2021-08-31 15:21:16 -04:00
|
|
|
const char *DynRefCountAction = UseHoldRefCount ? "" : RefCountAction;
|
|
|
|
|
const char *HoldRefCountAction = UseHoldRefCount ? RefCountAction : "";
|
2022-07-01 11:48:15 -04:00
|
|
|
uintptr_t TP = HT.TgtPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin);
|
[OpenMP] Improve ref count debug messages
For example, without this patch:
```
$ cat test.c
int main() {
int x;
#pragma omp target enter data map(alloc: x)
#pragma omp target exit data map(release: x)
;
return 0;
}
$ clang -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda test.c
$ LIBOMPTARGET_DEBUG=1 ./a.out |& grep 'Creating\|Mapping exists'
Libomptarget --> Creating new map entry with HstPtrBegin=0x00007ffcace8e448, TgtPtrBegin=0x00007f12ef600000, Size=4, Name=unknown
Libomptarget --> Mapping exists with HstPtrBegin=0x00007ffcace8e448, TgtPtrBegin=0x00007f12ef600000, Size=4, updated RefCount=1
```
There are two problems in this example:
* `RefCount` is not reported when a mapping is created, but it might
be 1 or infinite. In this case, because it's created by `omp target
enter data`, it's 1. Seeing that would make later `RefCount`
messages easier to understand.
* `RefCount` is still 1 at the `omp target exit data`, but it's
reported as `updated`. The reason it's still 1 is that, upon
deletions, the reference count is generally not updated in
`DeviceTy::getTgtPtrBegin`, where the report is produced. Instead,
it's zeroed later in `DeviceTy::deallocTgtPtr`, where it's actually
removed from the mapping table.
This patch makes the following changes:
* Report the reference count when creating a mapping.
* Where an existing mapping is reported, always report a reference
count action:
* `update suppressed` when `UpdateRefCount=false`
* `incremented`
* `decremented`
* `deferred final decrement`, which replaces the misleading
`updated` in the above example
* Add comments to `DeviceTy::getTgtPtrBegin` to explain why it does
not zero the reference count. (Please advise if these comments miss
the point.)
* For unified shared memory, don't report confusing messages like
`RefCount=` or `RefCount= updated` given that reference counts are
irrelevant in this case. Instead, just report `for unified shared
memory`.
* Use `INFO` not `DP` consistently for `Mapping exists` messages.
* Fix device table dumps to print `INF` instead of `-1` for an
infinite reference count.
Reviewed By: jhuber6, grokos
Differential Revision: https://reviews.llvm.org/D104559
2021-06-23 09:37:54 -04:00
|
|
|
INFO(OMP_INFOTYPE_MAPPING_EXISTS, DeviceID,
|
|
|
|
|
"Mapping exists with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", "
|
2021-08-31 15:21:16 -04:00
|
|
|
"Size=%" PRId64 ", DynRefCount=%s%s, HoldRefCount=%s%s\n",
|
2022-07-01 11:48:15 -04:00
|
|
|
DPxPTR(HstPtrBegin), DPxPTR(TP), Size, HT.dynRefCountToStr().c_str(),
|
2021-08-31 15:21:16 -04:00
|
|
|
DynRefCountAction, HT.holdRefCountToStr().c_str(), HoldRefCountAction);
|
2022-07-01 11:48:15 -04:00
|
|
|
TargetPointer = (void *)TP;
|
2020-10-30 01:04:34 -04:00
|
|
|
} else if (PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) {
|
2019-08-07 17:29:45 +00:00
|
|
|
// If the value isn't found in the mapping and unified shared memory
|
|
|
|
|
// is on then it means we have stumbled upon a value which we need to
|
|
|
|
|
// use directly from the host.
|
[OpenMP] Improve ref count debug messages
For example, without this patch:
```
$ cat test.c
int main() {
int x;
#pragma omp target enter data map(alloc: x)
#pragma omp target exit data map(release: x)
;
return 0;
}
$ clang -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda test.c
$ LIBOMPTARGET_DEBUG=1 ./a.out |& grep 'Creating\|Mapping exists'
Libomptarget --> Creating new map entry with HstPtrBegin=0x00007ffcace8e448, TgtPtrBegin=0x00007f12ef600000, Size=4, Name=unknown
Libomptarget --> Mapping exists with HstPtrBegin=0x00007ffcace8e448, TgtPtrBegin=0x00007f12ef600000, Size=4, updated RefCount=1
```
There are two problems in this example:
* `RefCount` is not reported when a mapping is created, but it might
be 1 or infinite. In this case, because it's created by `omp target
enter data`, it's 1. Seeing that would make later `RefCount`
messages easier to understand.
* `RefCount` is still 1 at the `omp target exit data`, but it's
reported as `updated`. The reason it's still 1 is that, upon
deletions, the reference count is generally not updated in
`DeviceTy::getTgtPtrBegin`, where the report is produced. Instead,
it's zeroed later in `DeviceTy::deallocTgtPtr`, where it's actually
removed from the mapping table.
This patch makes the following changes:
* Report the reference count when creating a mapping.
* Where an existing mapping is reported, always report a reference
count action:
* `update suppressed` when `UpdateRefCount=false`
* `incremented`
* `decremented`
* `deferred final decrement`, which replaces the misleading
`updated` in the above example
* Add comments to `DeviceTy::getTgtPtrBegin` to explain why it does
not zero the reference count. (Please advise if these comments miss
the point.)
* For unified shared memory, don't report confusing messages like
`RefCount=` or `RefCount= updated` given that reference counts are
irrelevant in this case. Instead, just report `for unified shared
memory`.
* Use `INFO` not `DP` consistently for `Mapping exists` messages.
* Fix device table dumps to print `INF` instead of `-1` for an
infinite reference count.
Reviewed By: jhuber6, grokos
Differential Revision: https://reviews.llvm.org/D104559
2021-06-23 09:37:54 -04:00
|
|
|
DP("Get HstPtrBegin " DPxMOD " Size=%" PRId64 " for unified shared "
|
|
|
|
|
"memory\n",
|
|
|
|
|
DPxPTR((uintptr_t)HstPtrBegin), Size);
|
2022-09-07 13:37:26 -05:00
|
|
|
IsPresent = false;
|
2019-08-07 17:29:45 +00:00
|
|
|
IsHostPtr = true;
|
2021-11-03 11:52:21 -05:00
|
|
|
TargetPointer = HstPtrBegin;
|
2022-09-07 13:37:26 -05:00
|
|
|
} else {
|
|
|
|
|
// OpenMP Specification v5.2: if a matching list item is not found, the
|
|
|
|
|
// pointer retains its original value as per firstprivate semantics.
|
|
|
|
|
IsPresent = false;
|
|
|
|
|
IsHostPtr = false;
|
|
|
|
|
TargetPointer = HstPtrBegin;
|
2017-12-06 21:59:09 +00:00
|
|
|
}
|
|
|
|
|
|
2022-09-07 13:37:26 -05:00
|
|
|
return {{IsNew, IsHostPtr, IsPresent}, LR.Entry, TargetPointer};
|
2017-12-06 21:59:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Return the target pointer begin (where the data will be moved).
|
2022-03-05 15:14:20 -06:00
|
|
|
void *DeviceTy::getTgtPtrBegin(HDTTMapAccessorTy &HDTTMap, void *HstPtrBegin,
|
|
|
|
|
int64_t Size) {
|
2022-07-01 11:48:15 -04:00
|
|
|
uintptr_t HP = (uintptr_t)HstPtrBegin;
|
|
|
|
|
LookupResult LR = lookupMapping(HDTTMap, HstPtrBegin, Size);
|
|
|
|
|
if (LR.Flags.IsContained || LR.Flags.ExtendsBefore || LR.Flags.ExtendsAfter) {
|
|
|
|
|
auto &HT = *LR.Entry;
|
|
|
|
|
uintptr_t TP = HT.TgtPtrBegin + (HP - HT.HstPtrBegin);
|
|
|
|
|
return (void *)TP;
|
2017-12-06 21:59:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2023-01-19 12:11:20 -03:00
|
|
|
int DeviceTy::eraseMapEntry(HDTTMapAccessorTy &HDTTMap,
|
|
|
|
|
HostDataToTargetTy *Entry, int64_t Size) {
|
|
|
|
|
assert(Entry && "Trying to delete a null entry from the HDTT map.");
|
|
|
|
|
assert(Entry->getTotalRefCount() == 0 && Entry->getDataEndThreadCount() == 0 &&
|
2022-03-02 13:46:01 -06:00
|
|
|
"Trying to delete entry that is in use or owned by another thread.");
|
|
|
|
|
|
|
|
|
|
INFO(OMP_INFOTYPE_MAPPING_CHANGED, DeviceID,
|
|
|
|
|
"Removing map entry with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD
|
|
|
|
|
", Size=%" PRId64 ", Name=%s\n",
|
2023-01-19 12:11:20 -03:00
|
|
|
DPxPTR(Entry->HstPtrBegin), DPxPTR(Entry->TgtPtrBegin), Size,
|
|
|
|
|
(Entry->HstPtrName) ? getNameFromMapping(Entry->HstPtrName).c_str()
|
|
|
|
|
: "unknown");
|
|
|
|
|
|
|
|
|
|
if (HDTTMap->erase(Entry) == 0) {
|
|
|
|
|
REPORT("Trying to remove a non-existent map entry\n");
|
|
|
|
|
return OFFLOAD_FAIL;
|
|
|
|
|
}
|
2022-03-02 13:46:01 -06:00
|
|
|
|
2023-01-19 12:11:20 -03:00
|
|
|
return OFFLOAD_SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int DeviceTy::deallocTgtPtrAndEntry(HostDataToTargetTy *Entry, int64_t Size) {
|
|
|
|
|
assert(Entry && "Trying to deallocate a null entry.");
|
|
|
|
|
|
|
|
|
|
DP("Deleting tgt data " DPxMOD " of size %" PRId64 "\n",
|
|
|
|
|
DPxPTR(Entry->TgtPtrBegin), Size);
|
|
|
|
|
|
|
|
|
|
void *Event = Entry->getEvent();
|
2022-03-02 13:46:01 -06:00
|
|
|
if (Event && destroyEvent(Event) != OFFLOAD_SUCCESS) {
|
|
|
|
|
REPORT("Failed to destroy event " DPxMOD "\n", DPxPTR(Event));
|
2023-01-19 12:11:20 -03:00
|
|
|
return OFFLOAD_FAIL;
|
2017-12-06 21:59:09 +00:00
|
|
|
}
|
|
|
|
|
|
2023-01-19 12:11:20 -03:00
|
|
|
int Ret = deleteData((void *)Entry->TgtPtrBegin);
|
2023-01-25 01:04:07 +01:00
|
|
|
|
|
|
|
|
// Notify the plugin about the unmapped memory.
|
|
|
|
|
Ret |= notifyDataUnmapped((void *)Entry->HstPtrBegin);
|
|
|
|
|
|
2023-01-19 12:11:20 -03:00
|
|
|
delete Entry;
|
|
|
|
|
|
2022-01-05 20:19:55 -05:00
|
|
|
return Ret;
|
2017-12-06 21:59:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Init device, should not be called directly.
|
|
|
|
|
void DeviceTy::init() {
|
2019-06-04 15:05:53 +00:00
|
|
|
// Make call to init_requires if it exists for this plugin.
|
|
|
|
|
if (RTL->init_requires)
|
2020-10-30 01:04:34 -04:00
|
|
|
RTL->init_requires(PM->RTLs.RequiresFlags);
|
[OpenMP] Introduce target memory manager
Target memory manager is introduced in this patch which aims to manage target
memory such that they will not be freed immediately when they are not used
because the overhead of memory allocation and free is very large. For CUDA
device, cuMemFree even blocks the context switch on device which affects
concurrent kernel execution.
The memory manager can be taken as a memory pool. It divides the pool into
multiple buckets according to the size such that memory allocation/free
distributed to different buckets will not affect each other.
In this version, we use the exact-equality policy to find a free buffer. This
is an open question: will best-fit work better here? IMO, best-fit is not good
for target memory management because computation on GPU usually requires GBs of
data. Best-fit might lead to a serious waste. For example, there is a free
buffer of size 1960MB, and now we need a buffer of size 1200MB. If best-fit,
the free buffer will be returned, leading to a 760MB waste.
The allocation will happen when there is no free memory left, and the memory
free on device will take place in the following two cases:
1. The program ends. Obviously. However, there is a little problem that plugin
library is destroyed before the memory manager is destroyed, leading to a fact
that the call to target plugin will not succeed.
2. Device is out of memory when we request a new memory. The manager will walk
through all free buffers from the bucket with largest base size, pick up one
buffer, free it, and try to allocate immediately. If it succeeds, it will
return right away rather than freeing all buffers in free list.
Update:
A threshold (8KB by default) is set such that users could control what size of memory
will be managed by the manager. It can also be configured by an environment variable
`LIBOMPTARGET_MEMORY_MANAGER_THRESHOLD`.
Reviewed By: jdoerfert, ye-luo, JonChesterfield
Differential Revision: https://reviews.llvm.org/D81054
2020-08-19 23:12:02 -04:00
|
|
|
int32_t Ret = RTL->init_device(RTLDeviceID);
|
|
|
|
|
if (Ret != OFFLOAD_SUCCESS)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
IsInit = true;
|
2017-12-06 21:59:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Thread-safe method to initialize the device only once.
|
|
|
|
|
int32_t DeviceTy::initOnce() {
|
|
|
|
|
std::call_once(InitFlag, &DeviceTy::init, this);
|
|
|
|
|
|
|
|
|
|
// At this point, if IsInit is true, then either this thread or some other
|
|
|
|
|
// thread in the past successfully initialized the device, so we can return
|
|
|
|
|
// OFFLOAD_SUCCESS. If this thread executed init() via call_once() and it
|
|
|
|
|
// failed, return OFFLOAD_FAIL. If call_once did not invoke init(), it means
|
|
|
|
|
// that some other thread already attempted to execute init() and if IsInit
|
|
|
|
|
// is still false, return OFFLOAD_FAIL.
|
|
|
|
|
if (IsInit)
|
|
|
|
|
return OFFLOAD_SUCCESS;
|
2022-07-01 11:48:15 -04:00
|
|
|
return OFFLOAD_FAIL;
|
2017-12-06 21:59:09 +00:00
|
|
|
}
|
|
|
|
|
|
2022-02-17 16:04:12 -06:00
|
|
|
void DeviceTy::deinit() {
|
|
|
|
|
if (RTL->deinit_device)
|
|
|
|
|
RTL->deinit_device(RTLDeviceID);
|
|
|
|
|
}
|
|
|
|
|
|
2017-12-06 21:59:09 +00:00
|
|
|
// Load binary to device.
|
2022-07-01 11:48:15 -04:00
|
|
|
__tgt_target_table *DeviceTy::loadBinary(void *Img) {
|
2022-03-02 13:34:24 -06:00
|
|
|
std::lock_guard<decltype(RTL->Mtx)> LG(RTL->Mtx);
|
2022-07-01 11:48:15 -04:00
|
|
|
return RTL->load_binary(RTLDeviceID, Img);
|
2017-12-06 21:59:09 +00:00
|
|
|
}
|
|
|
|
|
|
2021-03-03 11:48:32 -08:00
|
|
|
void *DeviceTy::allocData(int64_t Size, void *HstPtr, int32_t Kind) {
|
|
|
|
|
return RTL->data_alloc(RTLDeviceID, Size, HstPtr, Kind);
|
2020-07-27 16:08:19 -04:00
|
|
|
}
|
|
|
|
|
|
2022-08-31 15:55:14 -05:00
|
|
|
int32_t DeviceTy::deleteData(void *TgtPtrBegin, int32_t Kind) {
|
|
|
|
|
return RTL->data_delete(RTLDeviceID, TgtPtrBegin, Kind);
|
2020-07-27 16:08:19 -04:00
|
|
|
}
|
|
|
|
|
|
[OpenMP] Optimized stream selection by scheduling data mapping for the same target region into a same stream
Summary:
This patch introduces two things for offloading:
1. Asynchronous data transferring: those functions are suffix with `_async`. They have one more argument compared with their synchronous counterparts: `__tgt_async_info*`, which is a new struct that only has one field, `void *Identifier`. This struct is for information exchange between different asynchronous operations. It can be used for stream selection, like in this case, or operation synchronization, which is also used. We may expect more usages in the future.
2. Optimization of stream selection for data mapping. Previous implementation was using asynchronous device memory transfer but synchronizing after each memory transfer. Actually, if we say kernel A needs four memory copy to device and two memory copy back to host, then we can schedule these seven operations (four H2D, two D2H, and one kernel launch) into a same stream and just need synchronization after memory copy from device to host. In this way, we can save a huge overhead compared with synchronization after each operation.
Reviewers: jdoerfert, ye-luo
Reviewed By: jdoerfert
Subscribers: yaxunl, lildmh, guansong, openmp-commits
Tags: #openmp
Differential Revision: https://reviews.llvm.org/D77005
2020-04-07 14:51:56 -04:00
|
|
|
// Submit data to device
|
2020-07-28 20:10:59 -04:00
|
|
|
int32_t DeviceTy::submitData(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size,
|
2021-02-10 11:06:00 -06:00
|
|
|
AsyncInfoTy &AsyncInfo) {
|
2021-06-08 16:43:59 -04:00
|
|
|
if (getInfoLevel() & OMP_INFOTYPE_DATA_TRANSFER) {
|
2022-03-05 15:14:20 -06:00
|
|
|
HDTTMapAccessorTy HDTTMap = HostDataToTargetMap.getExclusiveAccessor();
|
|
|
|
|
LookupResult LR = lookupMapping(HDTTMap, HstPtrBegin, Size);
|
2021-06-08 16:43:59 -04:00
|
|
|
auto *HT = &*LR.Entry;
|
|
|
|
|
|
|
|
|
|
INFO(OMP_INFOTYPE_DATA_TRANSFER, DeviceID,
|
|
|
|
|
"Copying data from host to device, HstPtr=" DPxMOD ", TgtPtr=" DPxMOD
|
|
|
|
|
", Size=%" PRId64 ", Name=%s\n",
|
|
|
|
|
DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin), Size,
|
|
|
|
|
(HT && HT->HstPtrName) ? getNameFromMapping(HT->HstPtrName).c_str()
|
|
|
|
|
: "unknown");
|
|
|
|
|
}
|
|
|
|
|
|
2021-02-10 11:06:00 -06:00
|
|
|
if (!AsyncInfo || !RTL->data_submit_async || !RTL->synchronize)
|
2020-04-09 22:40:30 -04:00
|
|
|
return RTL->data_submit(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size);
|
2022-07-01 11:48:15 -04:00
|
|
|
return RTL->data_submit_async(RTLDeviceID, TgtPtrBegin, HstPtrBegin, Size,
|
|
|
|
|
AsyncInfo);
|
2017-12-06 21:59:09 +00:00
|
|
|
}
|
|
|
|
|
|
[OpenMP] Optimized stream selection by scheduling data mapping for the same target region into a same stream
Summary:
This patch introduces two things for offloading:
1. Asynchronous data transferring: those functions are suffix with `_async`. They have one more argument compared with their synchronous counterparts: `__tgt_async_info*`, which is a new struct that only has one field, `void *Identifier`. This struct is for information exchange between different asynchronous operations. It can be used for stream selection, like in this case, or operation synchronization, which is also used. We may expect more usages in the future.
2. Optimization of stream selection for data mapping. Previous implementation was using asynchronous device memory transfer but synchronizing after each memory transfer. Actually, if we say kernel A needs four memory copy to device and two memory copy back to host, then we can schedule these seven operations (four H2D, two D2H, and one kernel launch) into a same stream and just need synchronization after memory copy from device to host. In this way, we can save a huge overhead compared with synchronization after each operation.
Reviewers: jdoerfert, ye-luo
Reviewed By: jdoerfert
Subscribers: yaxunl, lildmh, guansong, openmp-commits
Tags: #openmp
Differential Revision: https://reviews.llvm.org/D77005
2020-04-07 14:51:56 -04:00
|
|
|
// Retrieve data from device
|
2020-07-30 21:37:01 -04:00
|
|
|
int32_t DeviceTy::retrieveData(void *HstPtrBegin, void *TgtPtrBegin,
|
2021-02-10 11:06:00 -06:00
|
|
|
int64_t Size, AsyncInfoTy &AsyncInfo) {
|
2021-06-08 16:43:59 -04:00
|
|
|
if (getInfoLevel() & OMP_INFOTYPE_DATA_TRANSFER) {
|
2022-03-05 15:14:20 -06:00
|
|
|
HDTTMapAccessorTy HDTTMap = HostDataToTargetMap.getExclusiveAccessor();
|
|
|
|
|
LookupResult LR = lookupMapping(HDTTMap, HstPtrBegin, Size);
|
2021-06-08 16:43:59 -04:00
|
|
|
auto *HT = &*LR.Entry;
|
|
|
|
|
INFO(OMP_INFOTYPE_DATA_TRANSFER, DeviceID,
|
|
|
|
|
"Copying data from device to host, TgtPtr=" DPxMOD ", HstPtr=" DPxMOD
|
|
|
|
|
", Size=%" PRId64 ", Name=%s\n",
|
|
|
|
|
DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin), Size,
|
|
|
|
|
(HT && HT->HstPtrName) ? getNameFromMapping(HT->HstPtrName).c_str()
|
|
|
|
|
: "unknown");
|
|
|
|
|
}
|
|
|
|
|
|
2021-02-10 11:06:00 -06:00
|
|
|
if (!RTL->data_retrieve_async || !RTL->synchronize)
|
2020-04-09 22:40:30 -04:00
|
|
|
return RTL->data_retrieve(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size);
|
2022-07-01 11:48:15 -04:00
|
|
|
return RTL->data_retrieve_async(RTLDeviceID, HstPtrBegin, TgtPtrBegin, Size,
|
|
|
|
|
AsyncInfo);
|
2017-12-06 21:59:09 +00:00
|
|
|
}
|
|
|
|
|
|
2020-06-04 16:58:37 -04:00
|
|
|
// Copy data from current device to destination device directly
|
2020-08-19 16:07:58 -04:00
|
|
|
int32_t DeviceTy::dataExchange(void *SrcPtr, DeviceTy &DstDev, void *DstPtr,
|
2021-02-10 11:06:00 -06:00
|
|
|
int64_t Size, AsyncInfoTy &AsyncInfo) {
|
2020-08-19 16:07:58 -04:00
|
|
|
if (!AsyncInfo || !RTL->data_exchange_async || !RTL->synchronize) {
|
2020-06-04 16:58:37 -04:00
|
|
|
assert(RTL->data_exchange && "RTL->data_exchange is nullptr");
|
|
|
|
|
return RTL->data_exchange(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID, DstPtr,
|
|
|
|
|
Size);
|
2022-07-01 11:48:15 -04:00
|
|
|
}
|
|
|
|
|
return RTL->data_exchange_async(RTLDeviceID, SrcPtr, DstDev.RTLDeviceID,
|
|
|
|
|
DstPtr, Size, AsyncInfo);
|
2020-06-04 16:58:37 -04:00
|
|
|
}
|
|
|
|
|
|
2023-01-25 01:04:07 +01:00
|
|
|
int32_t DeviceTy::notifyDataMapped(void *HstPtr, int64_t Size) {
|
|
|
|
|
if (!RTL->data_notify_mapped)
|
|
|
|
|
return OFFLOAD_SUCCESS;
|
|
|
|
|
|
|
|
|
|
DP("Notifying about new mapping: HstPtr=" DPxMOD ", Size=%" PRId64 "\n",
|
|
|
|
|
DPxPTR(HstPtr), Size);
|
|
|
|
|
|
|
|
|
|
if (RTL->data_notify_mapped(RTLDeviceID, HstPtr, Size)) {
|
|
|
|
|
REPORT("Notifiying about data mapping failed.\n");
|
|
|
|
|
return OFFLOAD_FAIL;
|
|
|
|
|
}
|
|
|
|
|
return OFFLOAD_SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int32_t DeviceTy::notifyDataUnmapped(void *HstPtr) {
|
|
|
|
|
if (!RTL->data_notify_unmapped)
|
|
|
|
|
return OFFLOAD_SUCCESS;
|
|
|
|
|
|
|
|
|
|
DP("Notifying about an unmapping: HstPtr=" DPxMOD "\n", DPxPTR(HstPtr));
|
|
|
|
|
|
|
|
|
|
if (RTL->data_notify_unmapped(RTLDeviceID, HstPtr)) {
|
|
|
|
|
REPORT("Notifiying about data unmapping failed.\n");
|
|
|
|
|
return OFFLOAD_FAIL;
|
|
|
|
|
}
|
|
|
|
|
return OFFLOAD_SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
|
2017-12-06 21:59:09 +00:00
|
|
|
// Run region on device
|
2023-01-19 13:40:58 -08:00
|
|
|
int32_t DeviceTy::launchKernel(void *TgtEntryPtr, void **TgtVarsPtr,
|
|
|
|
|
ptrdiff_t *TgtOffsets,
|
|
|
|
|
const KernelArgsTy &KernelArgs,
|
|
|
|
|
AsyncInfoTy &AsyncInfo) {
|
|
|
|
|
return RTL->launch_kernel(RTLDeviceID, TgtEntryPtr, TgtVarsPtr, TgtOffsets,
|
|
|
|
|
&KernelArgs, AsyncInfo);
|
2017-12-06 21:59:09 +00:00
|
|
|
}
|
|
|
|
|
|
2021-07-27 21:47:40 -04:00
|
|
|
// Run region on device
|
|
|
|
|
bool DeviceTy::printDeviceInfo(int32_t RTLDevId) {
|
|
|
|
|
if (!RTL->print_device_info)
|
|
|
|
|
return false;
|
|
|
|
|
RTL->print_device_info(RTLDevId);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2020-06-04 16:58:37 -04:00
|
|
|
// Whether data can be copied to DstDevice directly
|
|
|
|
|
bool DeviceTy::isDataExchangable(const DeviceTy &DstDevice) {
|
|
|
|
|
if (RTL != DstDevice.RTL || !RTL->is_data_exchangable)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
if (RTL->is_data_exchangable(RTLDeviceID, DstDevice.RTLDeviceID))
|
|
|
|
|
return (RTL->data_exchange != nullptr) ||
|
|
|
|
|
(RTL->data_exchange_async != nullptr);
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2021-02-10 11:06:00 -06:00
|
|
|
int32_t DeviceTy::synchronize(AsyncInfoTy &AsyncInfo) {
|
2020-07-27 16:08:19 -04:00
|
|
|
if (RTL->synchronize)
|
2021-02-10 11:06:00 -06:00
|
|
|
return RTL->synchronize(RTLDeviceID, AsyncInfo);
|
2020-07-27 16:08:19 -04:00
|
|
|
return OFFLOAD_SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
|
2022-12-14 13:46:23 -03:00
|
|
|
int32_t DeviceTy::queryAsync(AsyncInfoTy &AsyncInfo) {
|
|
|
|
|
if (RTL->query_async)
|
|
|
|
|
return RTL->query_async(RTLDeviceID, AsyncInfo);
|
|
|
|
|
|
|
|
|
|
return synchronize(AsyncInfo);
|
|
|
|
|
}
|
|
|
|
|
|
2021-08-28 16:24:06 -04:00
|
|
|
int32_t DeviceTy::createEvent(void **Event) {
|
|
|
|
|
if (RTL->create_event)
|
|
|
|
|
return RTL->create_event(RTLDeviceID, Event);
|
|
|
|
|
|
|
|
|
|
return OFFLOAD_SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int32_t DeviceTy::recordEvent(void *Event, AsyncInfoTy &AsyncInfo) {
|
|
|
|
|
if (RTL->record_event)
|
|
|
|
|
return RTL->record_event(RTLDeviceID, Event, AsyncInfo);
|
|
|
|
|
|
|
|
|
|
return OFFLOAD_SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int32_t DeviceTy::waitEvent(void *Event, AsyncInfoTy &AsyncInfo) {
|
|
|
|
|
if (RTL->wait_event)
|
|
|
|
|
return RTL->wait_event(RTLDeviceID, Event, AsyncInfo);
|
|
|
|
|
|
|
|
|
|
return OFFLOAD_SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int32_t DeviceTy::syncEvent(void *Event) {
|
|
|
|
|
if (RTL->sync_event)
|
|
|
|
|
return RTL->sync_event(RTLDeviceID, Event);
|
|
|
|
|
|
|
|
|
|
return OFFLOAD_SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int32_t DeviceTy::destroyEvent(void *Event) {
|
|
|
|
|
if (RTL->create_event)
|
|
|
|
|
return RTL->destroy_event(RTLDeviceID, Event);
|
|
|
|
|
|
|
|
|
|
return OFFLOAD_SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
|
2017-12-06 21:59:12 +00:00
|
|
|
/// Check whether a device has an associated RTL and initialize it if it's not
|
|
|
|
|
/// already initialized.
|
2022-07-01 11:48:15 -04:00
|
|
|
bool deviceIsReady(int DeviceNum) {
|
|
|
|
|
DP("Checking whether device %d is ready.\n", DeviceNum);
|
2017-12-06 21:59:12 +00:00
|
|
|
// Devices.size() can only change while registering a new
|
|
|
|
|
// library, so try to acquire the lock of RTLs' mutex.
|
2022-03-02 13:34:24 -06:00
|
|
|
size_t DevicesSize;
|
|
|
|
|
{
|
|
|
|
|
std::lock_guard<decltype(PM->RTLsMtx)> LG(PM->RTLsMtx);
|
|
|
|
|
DevicesSize = PM->Devices.size();
|
|
|
|
|
}
|
2022-07-01 11:48:15 -04:00
|
|
|
if (DevicesSize <= (size_t)DeviceNum) {
|
|
|
|
|
DP("Device ID %d does not have a matching RTL\n", DeviceNum);
|
2017-12-06 21:59:12 +00:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Get device info
|
2022-07-01 11:48:15 -04:00
|
|
|
DeviceTy &Device = *PM->Devices[DeviceNum];
|
2017-12-06 21:59:12 +00:00
|
|
|
|
2022-07-01 11:48:15 -04:00
|
|
|
DP("Is the device %d (local ID %d) initialized? %d\n", DeviceNum,
|
2021-02-10 11:45:39 -06:00
|
|
|
Device.RTLDeviceID, Device.IsInit);
|
2017-12-06 21:59:12 +00:00
|
|
|
|
|
|
|
|
// Init the device if not done before
|
|
|
|
|
if (!Device.IsInit && Device.initOnce() != OFFLOAD_SUCCESS) {
|
2022-07-01 11:48:15 -04:00
|
|
|
DP("Failed to init device %d\n", DeviceNum);
|
2017-12-06 21:59:12 +00:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2022-07-01 11:48:15 -04:00
|
|
|
DP("Device %d is ready to use.\n", DeviceNum);
|
2017-12-06 21:59:12 +00:00
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
}
|